From: Zhang Yi <yi.zhang@huaweicloud.com>
To: linux-ext4@vger.kernel.org
Cc: tytso@mit.edu, adilger.kernel@dilger.ca, jack@suse.cz,
yi.zhang@huawei.com, yi.zhang@huaweicloud.com,
chengzhihao1@huawei.com, yukuai3@huawei.com
Subject: [RFC PATCH 15/16] ext4: flush delalloc blocks if no free space
Date: Thu, 24 Aug 2023 17:26:18 +0800 [thread overview]
Message-ID: <20230824092619.1327976-16-yi.zhang@huaweicloud.com> (raw)
In-Reply-To: <20230824092619.1327976-1-yi.zhang@huaweicloud.com>
From: Zhang Yi <yi.zhang@huawei.com>
For delalloc, the reserved metadata blocks count is calculated in the
worst case, so the reservation could be larger than the real needs, that
could lead to return false positive -ENOSPC when claiming free space. So
start a worker to flush delalloc blocks in ext4_should_retry_alloc().
If the s_dirtyclusters_counter is not zero, there may have some delalloc
metadata blocks that could be freed.
Signed-off-by: Zhang Yi <yi.zhang@huawei.com>
---
fs/ext4/balloc.c | 47 +++++++++++++++++++++++++++++++++++++++++------
fs/ext4/ext4.h | 5 +++++
fs/ext4/super.c | 12 ++++++++++++
3 files changed, 58 insertions(+), 6 deletions(-)
diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c
index 79b20d6ae39e..e8acc21ef56d 100644
--- a/fs/ext4/balloc.c
+++ b/fs/ext4/balloc.c
@@ -667,6 +667,30 @@ int ext4_claim_free_clusters(struct ext4_sb_info *sbi,
return -ENOSPC;
}
+void ext4_writeback_da_blocks(struct work_struct *work)
+{
+ struct ext4_sb_info *sbi = container_of(work, struct ext4_sb_info,
+ s_da_flush_work);
+
+ try_to_writeback_inodes_sb(sbi->s_sb, WB_REASON_FS_FREE_SPACE);
+}
+
+/*
+ * Writeback delallocated blocks and try to free unused reserved extent
+ * blocks, return 0 if no delalloc blocks need to writeback, 1 otherwise.
+ */
+static int ext4_flush_da_blocks(struct ext4_sb_info *sbi)
+{
+ if (!percpu_counter_read_positive(&sbi->s_dirtyclusters_counter) &&
+ !percpu_counter_sum(&sbi->s_dirtyclusters_counter))
+ return 0;
+
+ if (!work_busy(&sbi->s_da_flush_work))
+ queue_work(sbi->s_da_flush_wq, &sbi->s_da_flush_work);
+ flush_work(&sbi->s_da_flush_work);
+ return 1;
+}
+
/**
* ext4_should_retry_alloc() - check if a block allocation should be retried
* @sb: superblock
@@ -681,15 +705,22 @@ int ext4_claim_free_clusters(struct ext4_sb_info *sbi,
int ext4_should_retry_alloc(struct super_block *sb, int *retries)
{
struct ext4_sb_info *sbi = EXT4_SB(sb);
-
- if (!sbi->s_journal)
- return 0;
+ int result = 0;
if (++(*retries) > 3) {
percpu_counter_inc(&sbi->s_sra_exceeded_retry_limit);
return 0;
}
+ /*
+ * Flush allocated delalloc blocks and try to free unused
+ * reserved extent blocks.
+ */
+ if (test_opt(sb, DELALLOC))
+ result += ext4_flush_da_blocks(sbi);
+
+ if (!sbi->s_journal)
+ goto out;
/*
* if there's no indication that blocks are about to be freed it's
* possible we just missed a transaction commit that did so
@@ -701,16 +732,20 @@ int ext4_should_retry_alloc(struct super_block *sb, int *retries)
flush_work(&sbi->s_discard_work);
atomic_dec(&sbi->s_retry_alloc_pending);
}
- return ext4_has_free_clusters(sbi, 1, 0);
+ result += ext4_has_free_clusters(sbi, 1, 0);
+ goto out;
}
/*
* it's possible we've just missed a transaction commit here,
* so ignore the returned status
*/
- ext4_debug("%s: retrying operation after ENOSPC\n", sb->s_id);
+ result += 1;
(void) jbd2_journal_force_commit_nested(sbi->s_journal);
- return 1;
+out:
+ if (result)
+ ext4_debug("%s: retrying operation after ENOSPC\n", sb->s_id);
+ return result;
}
/*
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 67b12f9ffc50..6f4259ea6751 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -1627,6 +1627,10 @@ struct ext4_sb_info {
/* workqueue for reserved extent conversions (buffered io) */
struct workqueue_struct *rsv_conversion_wq;
+ /* workqueue for delalloc buffer IO flushing */
+ struct workqueue_struct *s_da_flush_wq;
+ struct work_struct s_da_flush_work;
+
/* timer for periodic error stats printing */
struct timer_list s_err_report;
@@ -2716,6 +2720,7 @@ extern int ext4_wait_block_bitmap(struct super_block *sb,
struct buffer_head *bh);
extern struct buffer_head *ext4_read_block_bitmap(struct super_block *sb,
ext4_group_t block_group);
+extern void ext4_writeback_da_blocks(struct work_struct *work);
extern unsigned ext4_free_clusters_after_init(struct super_block *sb,
ext4_group_t block_group,
struct ext4_group_desc *gdp);
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 7bc7c8c0ed71..6f50975ba42e 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -1335,6 +1335,8 @@ static void ext4_put_super(struct super_block *sb)
flush_work(&sbi->s_sb_upd_work);
destroy_workqueue(sbi->rsv_conversion_wq);
+ flush_work(&sbi->s_da_flush_work);
+ destroy_workqueue(sbi->s_da_flush_wq);
ext4_release_orphan_info(sb);
if (sbi->s_journal) {
@@ -5491,6 +5493,14 @@ static int __ext4_fill_super(struct fs_context *fc, struct super_block *sb)
goto failed_mount4;
}
+ INIT_WORK(&sbi->s_da_flush_work, ext4_writeback_da_blocks);
+ sbi->s_da_flush_wq = alloc_workqueue("ext4_delalloc_flush", WQ_UNBOUND, 1);
+ if (!sbi->s_da_flush_wq) {
+ printk(KERN_ERR "EXT4-fs: failed to create workqueue\n");
+ err = -ENOMEM;
+ goto failed_mount4;
+ }
+
/*
* The jbd2_journal_load will have done any necessary log recovery,
* so we can safely mount the rest of the filesystem now.
@@ -5660,6 +5670,8 @@ failed_mount9: __maybe_unused
sb->s_root = NULL;
failed_mount4:
ext4_msg(sb, KERN_ERR, "mount failed");
+ if (sbi->s_da_flush_wq)
+ destroy_workqueue(sbi->s_da_flush_wq);
if (EXT4_SB(sb)->rsv_conversion_wq)
destroy_workqueue(EXT4_SB(sb)->rsv_conversion_wq);
failed_mount_wq:
--
2.39.2
next prev parent reply other threads:[~2023-08-24 9:31 UTC|newest]
Thread overview: 24+ messages / expand[flat|nested] mbox.gz Atom feed top
2023-08-24 9:26 [RFC PATCH 00/16] ext4: more accurate metadata reservaion for delalloc mount option Zhang Yi
2023-08-24 9:26 ` [RFC PATCH 01/16] ext4: correct the start block of counting reserved clusters Zhang Yi
2023-08-30 13:10 ` Jan Kara
2023-10-06 2:33 ` Theodore Ts'o
2023-08-24 9:26 ` [RFC PATCH 02/16] ext4: make sure allocate pending entry not fail Zhang Yi
2023-08-30 13:25 ` Jan Kara
2023-10-06 2:33 ` Theodore Ts'o
2023-08-24 9:26 ` [RFC PATCH 03/16] ext4: let __revise_pending() return the number of new inserts pendings Zhang Yi
2023-08-24 9:26 ` [RFC PATCH 04/16] ext4: count removed reserved blocks for delalloc only es entry Zhang Yi
2023-08-24 9:26 ` [RFC PATCH 05/16] ext4: pass real delayed status into ext4_es_insert_extent() Zhang Yi
2023-08-24 9:26 ` [RFC PATCH 06/16] ext4: move delalloc data reserve spcae updating " Zhang Yi
2023-08-24 9:26 ` [RFC PATCH 07/16] ext4: count inode's total delalloc data blocks into ext4_es_tree Zhang Yi
2023-08-24 9:26 ` [RFC PATCH 08/16] ext4: refactor delalloc space reservation Zhang Yi
2023-08-24 9:26 ` [RFC PATCH 09/16] ext4: count reserved metadata blocks for delalloc per inode Zhang Yi
2023-08-24 9:26 ` [RFC PATCH 10/16] ext4: reserve meta blocks in ext4_da_reserve_space() Zhang Yi
2023-08-24 9:26 ` [RFC PATCH 11/16] ext4: factor out common part of ext4_da_{release|update_reserve}_space() Zhang Yi
2023-08-24 9:26 ` [RFC PATCH 12/16] ext4: update reserved meta blocks in ext4_da_{release|update_reserve}_space() Zhang Yi
2023-09-06 7:35 ` kernel test robot
2023-08-24 9:26 ` [RFC PATCH 13/16] ext4: calculate the worst extent blocks needed of a delalloc es entry Zhang Yi
2023-08-24 9:26 ` [RFC PATCH 14/16] ext4: reserve extent blocks for delalloc Zhang Yi
2023-08-24 9:26 ` Zhang Yi [this message]
2023-08-24 9:26 ` [RFC PATCH 16/16] ext4: drop ext4_nonda_switch() Zhang Yi
2023-08-30 15:30 ` [RFC PATCH 00/16] ext4: more accurate metadata reservaion for delalloc mount option Jan Kara
2023-09-01 2:33 ` Zhang Yi
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20230824092619.1327976-16-yi.zhang@huaweicloud.com \
--to=yi.zhang@huaweicloud.com \
--cc=adilger.kernel@dilger.ca \
--cc=chengzhihao1@huawei.com \
--cc=jack@suse.cz \
--cc=linux-ext4@vger.kernel.org \
--cc=tytso@mit.edu \
--cc=yi.zhang@huawei.com \
--cc=yukuai3@huawei.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).