* [PATCH v1] ext4: add mb_stats_clear for mballoc statistics
@ 2026-04-14 10:02 Baolin Liu
2026-04-14 10:07 ` liubaolin
` (4 more replies)
0 siblings, 5 replies; 9+ messages in thread
From: Baolin Liu @ 2026-04-14 10:02 UTC (permalink / raw)
To: tytso, adilger.kernel
Cc: liubaolin12138, linux-ext4, linux-kernel, wangguanyu, Baolin Liu
From: Baolin Liu <liubaolin@kylinos.cn>
Add a write-only mb_stats_clear sysfs knob to reset ext4 mballoc
runtime statistics.This makes it easier to inspect allocator
activity for a specific workload instead of using counters
accumulated since mount.
Signed-off-by: Baolin Liu <liubaolin@kylinos.cn>
---
fs/ext4/ext4.h | 1 +
fs/ext4/mballoc.c | 31 +++++++++++++++++++++++++++++++
fs/ext4/sysfs.c | 24 ++++++++++++++++++++++++
3 files changed, 56 insertions(+)
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 7617e2d454ea..3a32e1a515dd 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -2995,6 +2995,7 @@ int ext4_fc_record_regions(struct super_block *sb, int ino,
extern const struct seq_operations ext4_mb_seq_groups_ops;
extern const struct seq_operations ext4_mb_seq_structs_summary_ops;
extern int ext4_seq_mb_stats_show(struct seq_file *seq, void *offset);
+extern void ext4_mb_stats_clear(struct ext4_sb_info *sbi);
extern int ext4_mb_init(struct super_block *);
extern void ext4_mb_release(struct super_block *);
extern ext4_fsblk_t ext4_mb_new_blocks(handle_t *,
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index bb58eafb87bc..382c91586b26 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -3219,6 +3219,8 @@ int ext4_seq_mb_stats_show(struct seq_file *seq, void *offset)
}
seq_printf(seq, "\treqs: %u\n", atomic_read(&sbi->s_bal_reqs));
seq_printf(seq, "\tsuccess: %u\n", atomic_read(&sbi->s_bal_success));
+ seq_printf(seq, "\tblocks_allocated: %u\n",
+ atomic_read(&sbi->s_bal_allocated));
seq_printf(seq, "\tgroups_scanned: %u\n",
atomic_read(&sbi->s_bal_groups_scanned));
@@ -4721,6 +4723,35 @@ static void ext4_mb_collect_stats(struct ext4_allocation_context *ac)
trace_ext4_mballoc_prealloc(ac);
}
+void ext4_mb_stats_clear(struct ext4_sb_info *sbi)
+{
+ int i;
+
+ atomic_set(&sbi->s_bal_reqs, 0);
+ atomic_set(&sbi->s_bal_success, 0);
+ atomic_set(&sbi->s_bal_allocated, 0);
+ atomic_set(&sbi->s_bal_groups_scanned, 0);
+
+ for (i = 0; i < EXT4_MB_NUM_CRS; i++) {
+ atomic64_set(&sbi->s_bal_cX_hits[i], 0);
+ atomic64_set(&sbi->s_bal_cX_groups_considered[i], 0);
+ atomic_set(&sbi->s_bal_cX_ex_scanned[i], 0);
+ atomic64_set(&sbi->s_bal_cX_failed[i], 0);
+ }
+
+ atomic_set(&sbi->s_bal_ex_scanned, 0);
+ atomic_set(&sbi->s_bal_goals, 0);
+ atomic_set(&sbi->s_bal_stream_goals, 0);
+ atomic_set(&sbi->s_bal_len_goals, 0);
+ atomic_set(&sbi->s_bal_2orders, 0);
+ atomic_set(&sbi->s_bal_breaks, 0);
+ atomic_set(&sbi->s_mb_lost_chunks, 0);
+ atomic_set(&sbi->s_mb_buddies_generated, 0);
+ atomic64_set(&sbi->s_mb_generation_time, 0);
+ atomic_set(&sbi->s_mb_preallocated, 0);
+ atomic_set(&sbi->s_mb_discarded, 0);
+}
+
/*
* Called on failure; free up any blocks from the inode PA for this
* context. We don't need this for MB_GROUP_PA because we only change
diff --git a/fs/ext4/sysfs.c b/fs/ext4/sysfs.c
index 923b375e017f..a5bd88a99f22 100644
--- a/fs/ext4/sysfs.c
+++ b/fs/ext4/sysfs.c
@@ -41,6 +41,7 @@ typedef enum {
attr_pointer_atomic,
attr_journal_task,
attr_err_report_sec,
+ attr_mb_stats_clear,
} attr_id_t;
typedef enum {
@@ -161,6 +162,25 @@ static ssize_t err_report_sec_store(struct ext4_sb_info *sbi,
return count;
}
+static ssize_t mb_stats_clear_store(struct ext4_sb_info *sbi,
+ const char *buf, size_t count)
+{
+ int val;
+ int ret;
+
+ if (!capable(CAP_SYS_ADMIN))
+ return -EPERM;
+
+ ret = kstrtoint(skip_spaces(buf), 0, &val);
+ if (ret)
+ return ret;
+ if (val != 1)
+ return -EINVAL;
+
+ ext4_mb_stats_clear(sbi);
+ return count;
+}
+
static ssize_t journal_task_show(struct ext4_sb_info *sbi, char *buf)
{
if (!sbi->s_journal)
@@ -251,6 +271,7 @@ EXT4_ATTR_OFFSET(mb_best_avail_max_trim_order, 0644, mb_order,
EXT4_ATTR_OFFSET(err_report_sec, 0644, err_report_sec, ext4_sb_info, s_err_report_sec);
EXT4_RW_ATTR_SBI_UI(inode_goal, s_inode_goal);
EXT4_RW_ATTR_SBI_UI(mb_stats, s_mb_stats);
+EXT4_ATTR(mb_stats_clear, 0200, mb_stats_clear);
EXT4_RW_ATTR_SBI_UI(mb_max_to_scan, s_mb_max_to_scan);
EXT4_RW_ATTR_SBI_UI(mb_min_to_scan, s_mb_min_to_scan);
EXT4_RW_ATTR_SBI_UI(mb_order2_req, s_mb_order2_reqs);
@@ -301,6 +322,7 @@ static struct attribute *ext4_attrs[] = {
ATTR_LIST(inode_readahead_blks),
ATTR_LIST(inode_goal),
ATTR_LIST(mb_stats),
+ ATTR_LIST(mb_stats_clear),
ATTR_LIST(mb_max_to_scan),
ATTR_LIST(mb_min_to_scan),
ATTR_LIST(mb_order2_req),
@@ -561,6 +583,8 @@ static ssize_t ext4_attr_store(struct kobject *kobj,
return trigger_test_error(sbi, buf, len);
case attr_err_report_sec:
return err_report_sec_store(sbi, buf, len);
+ case attr_mb_stats_clear:
+ return mb_stats_clear_store(sbi, buf, len);
default:
return ext4_generic_attr_store(a, sbi, buf, len);
}
--
2.51.0
^ permalink raw reply related [flat|nested] 9+ messages in thread
* Re: [PATCH v1] ext4: add mb_stats_clear for mballoc statistics
2026-04-14 10:02 [PATCH v1] ext4: add mb_stats_clear for mballoc statistics Baolin Liu
@ 2026-04-14 10:07 ` liubaolin
2026-04-15 19:26 ` Ojaswin Mujoo
` (3 subsequent siblings)
4 siblings, 0 replies; 9+ messages in thread
From: liubaolin @ 2026-04-14 10:07 UTC (permalink / raw)
To: tytso, adilger.kernel; +Cc: linux-ext4, linux-kernel, wangguanyu, Baolin Liu
> Dear all,
> I have sent a small ext4 patch to add a manual reset capability for the mballoc statistics, and I would like to add some background on the motivation.
>
> The idea came mainly from XFS stats_clear.
> ext4 already exports mballoc runtime statistics through /proc/fs/ext4/<dev>/mb_stats,
> but these counters keep accumulating from mount time, which makes it inconvenient when trying to observe allocator behavior for a single test run.
>
> This patch adds a write-only sysfs node, /sys/fs/ext4/<dev>/mb_stats_clear, so that writing 1 to it resets the ext4 mballoc runtime statistics.
> It also adds sbi->s_bal_allocated to /proc/fs/ext4/<dev>/mb_stats,
> so that the proc output matches the mballoc summary printed at unmount time and the set of counters covered by mb_stats_clear is more complete.
>
> The main goal is to make it easier to observe allocator activity for a specific test run instead of relying on counters accumulated since mount.
> With this in place, the counters can be cleared before starting a test, and the resulting mb_stats output reflects only the activity generated by that test.
>
> The counters being cleared are runtime mballoc statistics used for /proc/fs/ext4/<dev>/mb_stats reporting and for the mballoc summary printed at unmount time.
> I did not find any cases where these fields are read back to drive ext4 behavior, so the reset only affects statistics reporting.
>
> For validation, /sys/fs/ext4/<dev>/mb_stats can be enabled first,
> then a file operation test can be run so that the relevant values in /proc/fs/ext4/<dev>/mb_stats become non-zero.
> After writing 1 to /sys/fs/ext4/<dev>/mb_stats_clear, those values should return to 0.
> Running another file operation test afterward should make those values increase again.
>
> Best regards,
> Baolin Liu
在 2026/4/14 18:02, Baolin Liu 写道:
> From: Baolin Liu <liubaolin@kylinos.cn>
>
> Add a write-only mb_stats_clear sysfs knob to reset ext4 mballoc
> runtime statistics.This makes it easier to inspect allocator
> activity for a specific workload instead of using counters
> accumulated since mount.
>
> Signed-off-by: Baolin Liu <liubaolin@kylinos.cn>
> ---
> fs/ext4/ext4.h | 1 +
> fs/ext4/mballoc.c | 31 +++++++++++++++++++++++++++++++
> fs/ext4/sysfs.c | 24 ++++++++++++++++++++++++
> 3 files changed, 56 insertions(+)
>
> diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
> index 7617e2d454ea..3a32e1a515dd 100644
> --- a/fs/ext4/ext4.h
> +++ b/fs/ext4/ext4.h
> @@ -2995,6 +2995,7 @@ int ext4_fc_record_regions(struct super_block *sb, int ino,
> extern const struct seq_operations ext4_mb_seq_groups_ops;
> extern const struct seq_operations ext4_mb_seq_structs_summary_ops;
> extern int ext4_seq_mb_stats_show(struct seq_file *seq, void *offset);
> +extern void ext4_mb_stats_clear(struct ext4_sb_info *sbi);
> extern int ext4_mb_init(struct super_block *);
> extern void ext4_mb_release(struct super_block *);
> extern ext4_fsblk_t ext4_mb_new_blocks(handle_t *,
> diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
> index bb58eafb87bc..382c91586b26 100644
> --- a/fs/ext4/mballoc.c
> +++ b/fs/ext4/mballoc.c
> @@ -3219,6 +3219,8 @@ int ext4_seq_mb_stats_show(struct seq_file *seq, void *offset)
> }
> seq_printf(seq, "\treqs: %u\n", atomic_read(&sbi->s_bal_reqs));
> seq_printf(seq, "\tsuccess: %u\n", atomic_read(&sbi->s_bal_success));
> + seq_printf(seq, "\tblocks_allocated: %u\n",
> + atomic_read(&sbi->s_bal_allocated));
>
> seq_printf(seq, "\tgroups_scanned: %u\n",
> atomic_read(&sbi->s_bal_groups_scanned));
> @@ -4721,6 +4723,35 @@ static void ext4_mb_collect_stats(struct ext4_allocation_context *ac)
> trace_ext4_mballoc_prealloc(ac);
> }
>
> +void ext4_mb_stats_clear(struct ext4_sb_info *sbi)
> +{
> + int i;
> +
> + atomic_set(&sbi->s_bal_reqs, 0);
> + atomic_set(&sbi->s_bal_success, 0);
> + atomic_set(&sbi->s_bal_allocated, 0);
> + atomic_set(&sbi->s_bal_groups_scanned, 0);
> +
> + for (i = 0; i < EXT4_MB_NUM_CRS; i++) {
> + atomic64_set(&sbi->s_bal_cX_hits[i], 0);
> + atomic64_set(&sbi->s_bal_cX_groups_considered[i], 0);
> + atomic_set(&sbi->s_bal_cX_ex_scanned[i], 0);
> + atomic64_set(&sbi->s_bal_cX_failed[i], 0);
> + }
> +
> + atomic_set(&sbi->s_bal_ex_scanned, 0);
> + atomic_set(&sbi->s_bal_goals, 0);
> + atomic_set(&sbi->s_bal_stream_goals, 0);
> + atomic_set(&sbi->s_bal_len_goals, 0);
> + atomic_set(&sbi->s_bal_2orders, 0);
> + atomic_set(&sbi->s_bal_breaks, 0);
> + atomic_set(&sbi->s_mb_lost_chunks, 0);
> + atomic_set(&sbi->s_mb_buddies_generated, 0);
> + atomic64_set(&sbi->s_mb_generation_time, 0);
> + atomic_set(&sbi->s_mb_preallocated, 0);
> + atomic_set(&sbi->s_mb_discarded, 0);
> +}
> +
> /*
> * Called on failure; free up any blocks from the inode PA for this
> * context. We don't need this for MB_GROUP_PA because we only change
> diff --git a/fs/ext4/sysfs.c b/fs/ext4/sysfs.c
> index 923b375e017f..a5bd88a99f22 100644
> --- a/fs/ext4/sysfs.c
> +++ b/fs/ext4/sysfs.c
> @@ -41,6 +41,7 @@ typedef enum {
> attr_pointer_atomic,
> attr_journal_task,
> attr_err_report_sec,
> + attr_mb_stats_clear,
> } attr_id_t;
>
> typedef enum {
> @@ -161,6 +162,25 @@ static ssize_t err_report_sec_store(struct ext4_sb_info *sbi,
> return count;
> }
>
> +static ssize_t mb_stats_clear_store(struct ext4_sb_info *sbi,
> + const char *buf, size_t count)
> +{
> + int val;
> + int ret;
> +
> + if (!capable(CAP_SYS_ADMIN))
> + return -EPERM;
> +
> + ret = kstrtoint(skip_spaces(buf), 0, &val);
> + if (ret)
> + return ret;
> + if (val != 1)
> + return -EINVAL;
> +
> + ext4_mb_stats_clear(sbi);
> + return count;
> +}
> +
> static ssize_t journal_task_show(struct ext4_sb_info *sbi, char *buf)
> {
> if (!sbi->s_journal)
> @@ -251,6 +271,7 @@ EXT4_ATTR_OFFSET(mb_best_avail_max_trim_order, 0644, mb_order,
> EXT4_ATTR_OFFSET(err_report_sec, 0644, err_report_sec, ext4_sb_info, s_err_report_sec);
> EXT4_RW_ATTR_SBI_UI(inode_goal, s_inode_goal);
> EXT4_RW_ATTR_SBI_UI(mb_stats, s_mb_stats);
> +EXT4_ATTR(mb_stats_clear, 0200, mb_stats_clear);
> EXT4_RW_ATTR_SBI_UI(mb_max_to_scan, s_mb_max_to_scan);
> EXT4_RW_ATTR_SBI_UI(mb_min_to_scan, s_mb_min_to_scan);
> EXT4_RW_ATTR_SBI_UI(mb_order2_req, s_mb_order2_reqs);
> @@ -301,6 +322,7 @@ static struct attribute *ext4_attrs[] = {
> ATTR_LIST(inode_readahead_blks),
> ATTR_LIST(inode_goal),
> ATTR_LIST(mb_stats),
> + ATTR_LIST(mb_stats_clear),
> ATTR_LIST(mb_max_to_scan),
> ATTR_LIST(mb_min_to_scan),
> ATTR_LIST(mb_order2_req),
> @@ -561,6 +583,8 @@ static ssize_t ext4_attr_store(struct kobject *kobj,
> return trigger_test_error(sbi, buf, len);
> case attr_err_report_sec:
> return err_report_sec_store(sbi, buf, len);
> + case attr_mb_stats_clear:
> + return mb_stats_clear_store(sbi, buf, len);
> default:
> return ext4_generic_attr_store(a, sbi, buf, len);
> }
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [PATCH v1] ext4: add mb_stats_clear for mballoc statistics
2026-04-14 10:02 [PATCH v1] ext4: add mb_stats_clear for mballoc statistics Baolin Liu
2026-04-14 10:07 ` liubaolin
@ 2026-04-15 19:26 ` Ojaswin Mujoo
2026-04-16 7:07 ` liubaolin
2026-04-16 1:14 ` Andreas Dilger
` (2 subsequent siblings)
4 siblings, 1 reply; 9+ messages in thread
From: Ojaswin Mujoo @ 2026-04-15 19:26 UTC (permalink / raw)
To: Baolin Liu
Cc: tytso, adilger.kernel, linux-ext4, linux-kernel, wangguanyu,
Baolin Liu
On Tue, Apr 14, 2026 at 06:02:11PM +0800, Baolin Liu wrote:
> From: Baolin Liu <liubaolin@kylinos.cn>
>
> Add a write-only mb_stats_clear sysfs knob to reset ext4 mballoc
> runtime statistics.This makes it easier to inspect allocator
> activity for a specific workload instead of using counters
> accumulated since mount.
>
> Signed-off-by: Baolin Liu <liubaolin@kylinos.cn>
The patch looks good to me Baolin. We just need to add documentation of
this to the Documentation/ABI/testing/sysfs-fs-ext4 file so that the
users know what it is and the fact that the only value we allow to write
is 1.
Regards,
ojaswin
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [PATCH v1] ext4: add mb_stats_clear for mballoc statistics
2026-04-14 10:02 [PATCH v1] ext4: add mb_stats_clear for mballoc statistics Baolin Liu
2026-04-14 10:07 ` liubaolin
2026-04-15 19:26 ` Ojaswin Mujoo
@ 2026-04-16 1:14 ` Andreas Dilger
2026-04-16 7:11 ` liubaolin
2026-04-16 1:44 ` Ritesh Harjani
2026-04-16 8:53 ` Zhang Yi
4 siblings, 1 reply; 9+ messages in thread
From: Andreas Dilger @ 2026-04-16 1:14 UTC (permalink / raw)
To: Baolin Liu; +Cc: tytso, linux-ext4, linux-kernel, wangguanyu, Baolin Liu
On Apr 14, 2026, at 04:02, Baolin Liu <liubaolin12138@163.com> wrote:
>
> From: Baolin Liu <liubaolin@kylinos.cn>
>
> Add a write-only mb_stats_clear sysfs knob to reset ext4 mballoc
> runtime statistics. This makes it easier to inspect allocator
> activity for a specific workload instead of using counters
> accumulated since mount.
Rather than having a read-only "mb_stats" procfs file and a separate
write-only "mb_stats_clear" sysfs file to clear "mb_stats", IMHO it
would be more obvious to write directly to "/proc/fs/ext4/DEV/mb_stats"
file to clear it. Writing "0" would be logical to zero out the stats.
Cheers, Andreas
>
> Signed-off-by: Baolin Liu <liubaolin@kylinos.cn>
> ---
> fs/ext4/ext4.h | 1 +
> fs/ext4/mballoc.c | 31 +++++++++++++++++++++++++++++++
> fs/ext4/sysfs.c | 24 ++++++++++++++++++++++++
> 3 files changed, 56 insertions(+)
>
> diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
> index 7617e2d454ea..3a32e1a515dd 100644
> --- a/fs/ext4/ext4.h
> +++ b/fs/ext4/ext4.h
> @@ -2995,6 +2995,7 @@ int ext4_fc_record_regions(struct super_block *sb, int ino,
> extern const struct seq_operations ext4_mb_seq_groups_ops;
> extern const struct seq_operations ext4_mb_seq_structs_summary_ops;
> extern int ext4_seq_mb_stats_show(struct seq_file *seq, void *offset);
> +extern void ext4_mb_stats_clear(struct ext4_sb_info *sbi);
> extern int ext4_mb_init(struct super_block *);
> extern void ext4_mb_release(struct super_block *);
> extern ext4_fsblk_t ext4_mb_new_blocks(handle_t *,
> diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
> index bb58eafb87bc..382c91586b26 100644
> --- a/fs/ext4/mballoc.c
> +++ b/fs/ext4/mballoc.c
> @@ -3219,6 +3219,8 @@ int ext4_seq_mb_stats_show(struct seq_file *seq, void *offset)
> }
> seq_printf(seq, "\treqs: %u\n", atomic_read(&sbi->s_bal_reqs));
> seq_printf(seq, "\tsuccess: %u\n", atomic_read(&sbi->s_bal_success));
> + seq_printf(seq, "\tblocks_allocated: %u\n",
> + atomic_read(&sbi->s_bal_allocated));
>
> seq_printf(seq, "\tgroups_scanned: %u\n",
> atomic_read(&sbi->s_bal_groups_scanned));
> @@ -4721,6 +4723,35 @@ static void ext4_mb_collect_stats(struct ext4_allocation_context *ac)
> trace_ext4_mballoc_prealloc(ac);
> }
>
> +void ext4_mb_stats_clear(struct ext4_sb_info *sbi)
> +{
> + int i;
> +
> + atomic_set(&sbi->s_bal_reqs, 0);
> + atomic_set(&sbi->s_bal_success, 0);
> + atomic_set(&sbi->s_bal_allocated, 0);
> + atomic_set(&sbi->s_bal_groups_scanned, 0);
> +
> + for (i = 0; i < EXT4_MB_NUM_CRS; i++) {
> + atomic64_set(&sbi->s_bal_cX_hits[i], 0);
> + atomic64_set(&sbi->s_bal_cX_groups_considered[i], 0);
> + atomic_set(&sbi->s_bal_cX_ex_scanned[i], 0);
> + atomic64_set(&sbi->s_bal_cX_failed[i], 0);
> + }
> +
> + atomic_set(&sbi->s_bal_ex_scanned, 0);
> + atomic_set(&sbi->s_bal_goals, 0);
> + atomic_set(&sbi->s_bal_stream_goals, 0);
> + atomic_set(&sbi->s_bal_len_goals, 0);
> + atomic_set(&sbi->s_bal_2orders, 0);
> + atomic_set(&sbi->s_bal_breaks, 0);
> + atomic_set(&sbi->s_mb_lost_chunks, 0);
> + atomic_set(&sbi->s_mb_buddies_generated, 0);
> + atomic64_set(&sbi->s_mb_generation_time, 0);
> + atomic_set(&sbi->s_mb_preallocated, 0);
> + atomic_set(&sbi->s_mb_discarded, 0);
> +}
> +
> /*
> * Called on failure; free up any blocks from the inode PA for this
> * context. We don't need this for MB_GROUP_PA because we only change
> diff --git a/fs/ext4/sysfs.c b/fs/ext4/sysfs.c
> index 923b375e017f..a5bd88a99f22 100644
> --- a/fs/ext4/sysfs.c
> +++ b/fs/ext4/sysfs.c
> @@ -41,6 +41,7 @@ typedef enum {
> attr_pointer_atomic,
> attr_journal_task,
> attr_err_report_sec,
> + attr_mb_stats_clear,
> } attr_id_t;
>
> typedef enum {
> @@ -161,6 +162,25 @@ static ssize_t err_report_sec_store(struct ext4_sb_info *sbi,
> return count;
> }
>
> +static ssize_t mb_stats_clear_store(struct ext4_sb_info *sbi,
> + const char *buf, size_t count)
> +{
> + int val;
> + int ret;
> +
> + if (!capable(CAP_SYS_ADMIN))
> + return -EPERM;
> +
> + ret = kstrtoint(skip_spaces(buf), 0, &val);
> + if (ret)
> + return ret;
> + if (val != 1)
> + return -EINVAL;
> +
> + ext4_mb_stats_clear(sbi);
> + return count;
> +}
> +
> static ssize_t journal_task_show(struct ext4_sb_info *sbi, char *buf)
> {
> if (!sbi->s_journal)
> @@ -251,6 +271,7 @@ EXT4_ATTR_OFFSET(mb_best_avail_max_trim_order, 0644, mb_order,
> EXT4_ATTR_OFFSET(err_report_sec, 0644, err_report_sec, ext4_sb_info, s_err_report_sec);
> EXT4_RW_ATTR_SBI_UI(inode_goal, s_inode_goal);
> EXT4_RW_ATTR_SBI_UI(mb_stats, s_mb_stats);
> +EXT4_ATTR(mb_stats_clear, 0200, mb_stats_clear);
> EXT4_RW_ATTR_SBI_UI(mb_max_to_scan, s_mb_max_to_scan);
> EXT4_RW_ATTR_SBI_UI(mb_min_to_scan, s_mb_min_to_scan);
> EXT4_RW_ATTR_SBI_UI(mb_order2_req, s_mb_order2_reqs);
> @@ -301,6 +322,7 @@ static struct attribute *ext4_attrs[] = {
> ATTR_LIST(inode_readahead_blks),
> ATTR_LIST(inode_goal),
> ATTR_LIST(mb_stats),
> + ATTR_LIST(mb_stats_clear),
> ATTR_LIST(mb_max_to_scan),
> ATTR_LIST(mb_min_to_scan),
> ATTR_LIST(mb_order2_req),
> @@ -561,6 +583,8 @@ static ssize_t ext4_attr_store(struct kobject *kobj,
> return trigger_test_error(sbi, buf, len);
> case attr_err_report_sec:
> return err_report_sec_store(sbi, buf, len);
> + case attr_mb_stats_clear:
> + return mb_stats_clear_store(sbi, buf, len);
> default:
> return ext4_generic_attr_store(a, sbi, buf, len);
> }
> --
> 2.51.0
>
Cheers, Andreas
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [PATCH v1] ext4: add mb_stats_clear for mballoc statistics
2026-04-14 10:02 [PATCH v1] ext4: add mb_stats_clear for mballoc statistics Baolin Liu
` (2 preceding siblings ...)
2026-04-16 1:14 ` Andreas Dilger
@ 2026-04-16 1:44 ` Ritesh Harjani
2026-04-16 7:16 ` liubaolin
2026-04-16 8:53 ` Zhang Yi
4 siblings, 1 reply; 9+ messages in thread
From: Ritesh Harjani @ 2026-04-16 1:44 UTC (permalink / raw)
To: Baolin Liu, tytso, adilger.kernel
Cc: liubaolin12138, linux-ext4, linux-kernel, wangguanyu, Baolin Liu
Baolin Liu <liubaolin12138@163.com> writes:
> From: Baolin Liu <liubaolin@kylinos.cn>
>
> Add a write-only mb_stats_clear sysfs knob to reset ext4 mballoc
> runtime statistics.This makes it easier to inspect allocator
> activity for a specific workload instead of using counters
> accumulated since mount.
>
> Signed-off-by: Baolin Liu <liubaolin@kylinos.cn>
Make sense to me. The changes looks good and works fine at my end.
So please feel free to add:
Reviewed-by: Ritesh Harjani (IBM) <ritesh.list@gmail.com>
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [PATCH v1] ext4: add mb_stats_clear for mballoc statistics
2026-04-15 19:26 ` Ojaswin Mujoo
@ 2026-04-16 7:07 ` liubaolin
0 siblings, 0 replies; 9+ messages in thread
From: liubaolin @ 2026-04-16 7:07 UTC (permalink / raw)
To: Ojaswin Mujoo
Cc: tytso, adilger.kernel, linux-ext4, linux-kernel, wangguanyu,
Baolin Liu
> Dear ojaswin,
> Alright, thank you for your review. I will revise the patch according to Andreas's comments and submit a second version.
> When submitting the second version, I will also update the relevant files under the Documentation directory according to your suggestions to add explanations.
>
> Regards,
> Baolin
在 2026/4/16 3:26, Ojaswin Mujoo 写道:
> On Tue, Apr 14, 2026 at 06:02:11PM +0800, Baolin Liu wrote:
>> From: Baolin Liu <liubaolin@kylinos.cn>
>>
>> Add a write-only mb_stats_clear sysfs knob to reset ext4 mballoc
>> runtime statistics.This makes it easier to inspect allocator
>> activity for a specific workload instead of using counters
>> accumulated since mount.
>>
>> Signed-off-by: Baolin Liu <liubaolin@kylinos.cn>
>
> The patch looks good to me Baolin. We just need to add documentation of
> this to the Documentation/ABI/testing/sysfs-fs-ext4 file so that the
> users know what it is and the fact that the only value we allow to write
> is 1.
>
> Regards,
> ojaswin
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [PATCH v1] ext4: add mb_stats_clear for mballoc statistics
2026-04-16 1:14 ` Andreas Dilger
@ 2026-04-16 7:11 ` liubaolin
0 siblings, 0 replies; 9+ messages in thread
From: liubaolin @ 2026-04-16 7:11 UTC (permalink / raw)
To: Andreas Dilger; +Cc: tytso, linux-ext4, linux-kernel, wangguanyu, Baolin Liu
> Dear Andreas,
> Alright, thank you for your review.
> I will revise the patch according to your suggestions and submit a second version as soon as possible.
>
> Regards,
> Baolin
在 2026/4/16 9:14, Andreas Dilger 写道:
> On Apr 14, 2026, at 04:02, Baolin Liu <liubaolin12138@163.com> wrote:
>>
>> From: Baolin Liu <liubaolin@kylinos.cn>
>>
>> Add a write-only mb_stats_clear sysfs knob to reset ext4 mballoc
>> runtime statistics. This makes it easier to inspect allocator
>> activity for a specific workload instead of using counters
>> accumulated since mount.
>
> Rather than having a read-only "mb_stats" procfs file and a separate
> write-only "mb_stats_clear" sysfs file to clear "mb_stats", IMHO it
> would be more obvious to write directly to "/proc/fs/ext4/DEV/mb_stats"
> file to clear it. Writing "0" would be logical to zero out the stats.
>
> Cheers, Andreas
>
>>
>> Signed-off-by: Baolin Liu <liubaolin@kylinos.cn>
>> ---
>> fs/ext4/ext4.h | 1 +
>> fs/ext4/mballoc.c | 31 +++++++++++++++++++++++++++++++
>> fs/ext4/sysfs.c | 24 ++++++++++++++++++++++++
>> 3 files changed, 56 insertions(+)
>>
>> diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
>> index 7617e2d454ea..3a32e1a515dd 100644
>> --- a/fs/ext4/ext4.h
>> +++ b/fs/ext4/ext4.h
>> @@ -2995,6 +2995,7 @@ int ext4_fc_record_regions(struct super_block *sb, int ino,
>> extern const struct seq_operations ext4_mb_seq_groups_ops;
>> extern const struct seq_operations ext4_mb_seq_structs_summary_ops;
>> extern int ext4_seq_mb_stats_show(struct seq_file *seq, void *offset);
>> +extern void ext4_mb_stats_clear(struct ext4_sb_info *sbi);
>> extern int ext4_mb_init(struct super_block *);
>> extern void ext4_mb_release(struct super_block *);
>> extern ext4_fsblk_t ext4_mb_new_blocks(handle_t *,
>> diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
>> index bb58eafb87bc..382c91586b26 100644
>> --- a/fs/ext4/mballoc.c
>> +++ b/fs/ext4/mballoc.c
>> @@ -3219,6 +3219,8 @@ int ext4_seq_mb_stats_show(struct seq_file *seq, void *offset)
>> }
>> seq_printf(seq, "\treqs: %u\n", atomic_read(&sbi->s_bal_reqs));
>> seq_printf(seq, "\tsuccess: %u\n", atomic_read(&sbi->s_bal_success));
>> + seq_printf(seq, "\tblocks_allocated: %u\n",
>> + atomic_read(&sbi->s_bal_allocated));
>>
>> seq_printf(seq, "\tgroups_scanned: %u\n",
>> atomic_read(&sbi->s_bal_groups_scanned));
>> @@ -4721,6 +4723,35 @@ static void ext4_mb_collect_stats(struct ext4_allocation_context *ac)
>> trace_ext4_mballoc_prealloc(ac);
>> }
>>
>> +void ext4_mb_stats_clear(struct ext4_sb_info *sbi)
>> +{
>> + int i;
>> +
>> + atomic_set(&sbi->s_bal_reqs, 0);
>> + atomic_set(&sbi->s_bal_success, 0);
>> + atomic_set(&sbi->s_bal_allocated, 0);
>> + atomic_set(&sbi->s_bal_groups_scanned, 0);
>> +
>> + for (i = 0; i < EXT4_MB_NUM_CRS; i++) {
>> + atomic64_set(&sbi->s_bal_cX_hits[i], 0);
>> + atomic64_set(&sbi->s_bal_cX_groups_considered[i], 0);
>> + atomic_set(&sbi->s_bal_cX_ex_scanned[i], 0);
>> + atomic64_set(&sbi->s_bal_cX_failed[i], 0);
>> + }
>> +
>> + atomic_set(&sbi->s_bal_ex_scanned, 0);
>> + atomic_set(&sbi->s_bal_goals, 0);
>> + atomic_set(&sbi->s_bal_stream_goals, 0);
>> + atomic_set(&sbi->s_bal_len_goals, 0);
>> + atomic_set(&sbi->s_bal_2orders, 0);
>> + atomic_set(&sbi->s_bal_breaks, 0);
>> + atomic_set(&sbi->s_mb_lost_chunks, 0);
>> + atomic_set(&sbi->s_mb_buddies_generated, 0);
>> + atomic64_set(&sbi->s_mb_generation_time, 0);
>> + atomic_set(&sbi->s_mb_preallocated, 0);
>> + atomic_set(&sbi->s_mb_discarded, 0);
>> +}
>> +
>> /*
>> * Called on failure; free up any blocks from the inode PA for this
>> * context. We don't need this for MB_GROUP_PA because we only change
>> diff --git a/fs/ext4/sysfs.c b/fs/ext4/sysfs.c
>> index 923b375e017f..a5bd88a99f22 100644
>> --- a/fs/ext4/sysfs.c
>> +++ b/fs/ext4/sysfs.c
>> @@ -41,6 +41,7 @@ typedef enum {
>> attr_pointer_atomic,
>> attr_journal_task,
>> attr_err_report_sec,
>> + attr_mb_stats_clear,
>> } attr_id_t;
>>
>> typedef enum {
>> @@ -161,6 +162,25 @@ static ssize_t err_report_sec_store(struct ext4_sb_info *sbi,
>> return count;
>> }
>>
>> +static ssize_t mb_stats_clear_store(struct ext4_sb_info *sbi,
>> + const char *buf, size_t count)
>> +{
>> + int val;
>> + int ret;
>> +
>> + if (!capable(CAP_SYS_ADMIN))
>> + return -EPERM;
>> +
>> + ret = kstrtoint(skip_spaces(buf), 0, &val);
>> + if (ret)
>> + return ret;
>> + if (val != 1)
>> + return -EINVAL;
>> +
>> + ext4_mb_stats_clear(sbi);
>> + return count;
>> +}
>> +
>> static ssize_t journal_task_show(struct ext4_sb_info *sbi, char *buf)
>> {
>> if (!sbi->s_journal)
>> @@ -251,6 +271,7 @@ EXT4_ATTR_OFFSET(mb_best_avail_max_trim_order, 0644, mb_order,
>> EXT4_ATTR_OFFSET(err_report_sec, 0644, err_report_sec, ext4_sb_info, s_err_report_sec);
>> EXT4_RW_ATTR_SBI_UI(inode_goal, s_inode_goal);
>> EXT4_RW_ATTR_SBI_UI(mb_stats, s_mb_stats);
>> +EXT4_ATTR(mb_stats_clear, 0200, mb_stats_clear);
>> EXT4_RW_ATTR_SBI_UI(mb_max_to_scan, s_mb_max_to_scan);
>> EXT4_RW_ATTR_SBI_UI(mb_min_to_scan, s_mb_min_to_scan);
>> EXT4_RW_ATTR_SBI_UI(mb_order2_req, s_mb_order2_reqs);
>> @@ -301,6 +322,7 @@ static struct attribute *ext4_attrs[] = {
>> ATTR_LIST(inode_readahead_blks),
>> ATTR_LIST(inode_goal),
>> ATTR_LIST(mb_stats),
>> + ATTR_LIST(mb_stats_clear),
>> ATTR_LIST(mb_max_to_scan),
>> ATTR_LIST(mb_min_to_scan),
>> ATTR_LIST(mb_order2_req),
>> @@ -561,6 +583,8 @@ static ssize_t ext4_attr_store(struct kobject *kobj,
>> return trigger_test_error(sbi, buf, len);
>> case attr_err_report_sec:
>> return err_report_sec_store(sbi, buf, len);
>> + case attr_mb_stats_clear:
>> + return mb_stats_clear_store(sbi, buf, len);
>> default:
>> return ext4_generic_attr_store(a, sbi, buf, len);
>> }
>> --
>> 2.51.0
>>
>
>
> Cheers, Andreas
>
>
>
>
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [PATCH v1] ext4: add mb_stats_clear for mballoc statistics
2026-04-16 1:44 ` Ritesh Harjani
@ 2026-04-16 7:16 ` liubaolin
0 siblings, 0 replies; 9+ messages in thread
From: liubaolin @ 2026-04-16 7:16 UTC (permalink / raw)
To: Ritesh Harjani (IBM), tytso, adilger.kernel
Cc: linux-ext4, linux-kernel, wangguanyu, Baolin Liu
> Dear Ritesh,
> Thank you for your review. I will revise the patch according to Andreas's comments and submit a second version.
> When the second version is submitted, you are welcome to review it.
> Once the patch is finalized, the maintainer will add the Reviewed-by tag.
>
> Regards,
> Baolin
在 2026/4/16 9:44, Ritesh Harjani (IBM) 写道:
> Baolin Liu <liubaolin12138@163.com> writes:
>
>> From: Baolin Liu <liubaolin@kylinos.cn>
>>
>> Add a write-only mb_stats_clear sysfs knob to reset ext4 mballoc
>> runtime statistics.This makes it easier to inspect allocator
>> activity for a specific workload instead of using counters
>> accumulated since mount.
>>
>> Signed-off-by: Baolin Liu <liubaolin@kylinos.cn>
>
> Make sense to me. The changes looks good and works fine at my end.
> So please feel free to add:
>
> Reviewed-by: Ritesh Harjani (IBM) <ritesh.list@gmail.com>
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [PATCH v1] ext4: add mb_stats_clear for mballoc statistics
2026-04-14 10:02 [PATCH v1] ext4: add mb_stats_clear for mballoc statistics Baolin Liu
` (3 preceding siblings ...)
2026-04-16 1:44 ` Ritesh Harjani
@ 2026-04-16 8:53 ` Zhang Yi
4 siblings, 0 replies; 9+ messages in thread
From: Zhang Yi @ 2026-04-16 8:53 UTC (permalink / raw)
To: Baolin Liu, tytso, adilger.kernel
Cc: linux-ext4, linux-kernel, wangguanyu, Baolin Liu
On 4/14/2026 6:02 PM, Baolin Liu wrote:
> From: Baolin Liu <liubaolin@kylinos.cn>
>
> Add a write-only mb_stats_clear sysfs knob to reset ext4 mballoc
> runtime statistics.This makes it easier to inspect allocator
> activity for a specific workload instead of using counters
> accumulated since mount.
>
> Signed-off-by: Baolin Liu <liubaolin@kylinos.cn>
Looks good to me!
Reviewed-by: Zhang Yi <yi.zhang@huawei.com>
> ---
> fs/ext4/ext4.h | 1 +
> fs/ext4/mballoc.c | 31 +++++++++++++++++++++++++++++++
> fs/ext4/sysfs.c | 24 ++++++++++++++++++++++++
> 3 files changed, 56 insertions(+)
>
> diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
> index 7617e2d454ea..3a32e1a515dd 100644
> --- a/fs/ext4/ext4.h
> +++ b/fs/ext4/ext4.h
> @@ -2995,6 +2995,7 @@ int ext4_fc_record_regions(struct super_block *sb, int ino,
> extern const struct seq_operations ext4_mb_seq_groups_ops;
> extern const struct seq_operations ext4_mb_seq_structs_summary_ops;
> extern int ext4_seq_mb_stats_show(struct seq_file *seq, void *offset);
> +extern void ext4_mb_stats_clear(struct ext4_sb_info *sbi);
> extern int ext4_mb_init(struct super_block *);
> extern void ext4_mb_release(struct super_block *);
> extern ext4_fsblk_t ext4_mb_new_blocks(handle_t *,
> diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
> index bb58eafb87bc..382c91586b26 100644
> --- a/fs/ext4/mballoc.c
> +++ b/fs/ext4/mballoc.c
> @@ -3219,6 +3219,8 @@ int ext4_seq_mb_stats_show(struct seq_file *seq, void *offset)
> }
> seq_printf(seq, "\treqs: %u\n", atomic_read(&sbi->s_bal_reqs));
> seq_printf(seq, "\tsuccess: %u\n", atomic_read(&sbi->s_bal_success));
> + seq_printf(seq, "\tblocks_allocated: %u\n",
> + atomic_read(&sbi->s_bal_allocated));
>
> seq_printf(seq, "\tgroups_scanned: %u\n",
> atomic_read(&sbi->s_bal_groups_scanned));
> @@ -4721,6 +4723,35 @@ static void ext4_mb_collect_stats(struct ext4_allocation_context *ac)
> trace_ext4_mballoc_prealloc(ac);
> }
>
> +void ext4_mb_stats_clear(struct ext4_sb_info *sbi)
> +{
> + int i;
> +
> + atomic_set(&sbi->s_bal_reqs, 0);
> + atomic_set(&sbi->s_bal_success, 0);
> + atomic_set(&sbi->s_bal_allocated, 0);
> + atomic_set(&sbi->s_bal_groups_scanned, 0);
> +
> + for (i = 0; i < EXT4_MB_NUM_CRS; i++) {
> + atomic64_set(&sbi->s_bal_cX_hits[i], 0);
> + atomic64_set(&sbi->s_bal_cX_groups_considered[i], 0);
> + atomic_set(&sbi->s_bal_cX_ex_scanned[i], 0);
> + atomic64_set(&sbi->s_bal_cX_failed[i], 0);
> + }
> +
> + atomic_set(&sbi->s_bal_ex_scanned, 0);
> + atomic_set(&sbi->s_bal_goals, 0);
> + atomic_set(&sbi->s_bal_stream_goals, 0);
> + atomic_set(&sbi->s_bal_len_goals, 0);
> + atomic_set(&sbi->s_bal_2orders, 0);
> + atomic_set(&sbi->s_bal_breaks, 0);
> + atomic_set(&sbi->s_mb_lost_chunks, 0);
> + atomic_set(&sbi->s_mb_buddies_generated, 0);
> + atomic64_set(&sbi->s_mb_generation_time, 0);
> + atomic_set(&sbi->s_mb_preallocated, 0);
> + atomic_set(&sbi->s_mb_discarded, 0);
> +}
> +
> /*
> * Called on failure; free up any blocks from the inode PA for this
> * context. We don't need this for MB_GROUP_PA because we only change
> diff --git a/fs/ext4/sysfs.c b/fs/ext4/sysfs.c
> index 923b375e017f..a5bd88a99f22 100644
> --- a/fs/ext4/sysfs.c
> +++ b/fs/ext4/sysfs.c
> @@ -41,6 +41,7 @@ typedef enum {
> attr_pointer_atomic,
> attr_journal_task,
> attr_err_report_sec,
> + attr_mb_stats_clear,
> } attr_id_t;
>
> typedef enum {
> @@ -161,6 +162,25 @@ static ssize_t err_report_sec_store(struct ext4_sb_info *sbi,
> return count;
> }
>
> +static ssize_t mb_stats_clear_store(struct ext4_sb_info *sbi,
> + const char *buf, size_t count)
> +{
> + int val;
> + int ret;
> +
> + if (!capable(CAP_SYS_ADMIN))
> + return -EPERM;
> +
> + ret = kstrtoint(skip_spaces(buf), 0, &val);
> + if (ret)
> + return ret;
> + if (val != 1)
> + return -EINVAL;
> +
> + ext4_mb_stats_clear(sbi);
> + return count;
> +}
> +
> static ssize_t journal_task_show(struct ext4_sb_info *sbi, char *buf)
> {
> if (!sbi->s_journal)
> @@ -251,6 +271,7 @@ EXT4_ATTR_OFFSET(mb_best_avail_max_trim_order, 0644, mb_order,
> EXT4_ATTR_OFFSET(err_report_sec, 0644, err_report_sec, ext4_sb_info, s_err_report_sec);
> EXT4_RW_ATTR_SBI_UI(inode_goal, s_inode_goal);
> EXT4_RW_ATTR_SBI_UI(mb_stats, s_mb_stats);
> +EXT4_ATTR(mb_stats_clear, 0200, mb_stats_clear);
> EXT4_RW_ATTR_SBI_UI(mb_max_to_scan, s_mb_max_to_scan);
> EXT4_RW_ATTR_SBI_UI(mb_min_to_scan, s_mb_min_to_scan);
> EXT4_RW_ATTR_SBI_UI(mb_order2_req, s_mb_order2_reqs);
> @@ -301,6 +322,7 @@ static struct attribute *ext4_attrs[] = {
> ATTR_LIST(inode_readahead_blks),
> ATTR_LIST(inode_goal),
> ATTR_LIST(mb_stats),
> + ATTR_LIST(mb_stats_clear),
> ATTR_LIST(mb_max_to_scan),
> ATTR_LIST(mb_min_to_scan),
> ATTR_LIST(mb_order2_req),
> @@ -561,6 +583,8 @@ static ssize_t ext4_attr_store(struct kobject *kobj,
> return trigger_test_error(sbi, buf, len);
> case attr_err_report_sec:
> return err_report_sec_store(sbi, buf, len);
> + case attr_mb_stats_clear:
> + return mb_stats_clear_store(sbi, buf, len);
> default:
> return ext4_generic_attr_store(a, sbi, buf, len);
> }
^ permalink raw reply [flat|nested] 9+ messages in thread
end of thread, other threads:[~2026-04-16 8:53 UTC | newest]
Thread overview: 9+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2026-04-14 10:02 [PATCH v1] ext4: add mb_stats_clear for mballoc statistics Baolin Liu
2026-04-14 10:07 ` liubaolin
2026-04-15 19:26 ` Ojaswin Mujoo
2026-04-16 7:07 ` liubaolin
2026-04-16 1:14 ` Andreas Dilger
2026-04-16 7:11 ` liubaolin
2026-04-16 1:44 ` Ritesh Harjani
2026-04-16 7:16 ` liubaolin
2026-04-16 8:53 ` Zhang Yi
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox