linux-ext4.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Zhang Yi <yi.zhang@huaweicloud.com>
To: linux-ext4@vger.kernel.org
Cc: tytso@mit.edu, adilger.kernel@dilger.ca, jack@suse.cz,
	yi.zhang@huawei.com, yi.zhang@huaweicloud.com,
	chengzhihao1@huawei.com, yukuai3@huawei.com
Subject: [RFC PATCH 15/16] ext4: flush delalloc blocks if no free space
Date: Thu, 24 Aug 2023 17:26:18 +0800	[thread overview]
Message-ID: <20230824092619.1327976-16-yi.zhang@huaweicloud.com> (raw)
In-Reply-To: <20230824092619.1327976-1-yi.zhang@huaweicloud.com>

From: Zhang Yi <yi.zhang@huawei.com>

For delalloc, the reserved metadata blocks count is calculated in the
worst case, so the reservation could be larger than the real needs, that
could lead to return false positive -ENOSPC when claiming free space. So
start a worker to flush delalloc blocks in ext4_should_retry_alloc().
If the s_dirtyclusters_counter is not zero, there may have some delalloc
metadata blocks that could be freed.

Signed-off-by: Zhang Yi <yi.zhang@huawei.com>
---
 fs/ext4/balloc.c | 47 +++++++++++++++++++++++++++++++++++++++++------
 fs/ext4/ext4.h   |  5 +++++
 fs/ext4/super.c  | 12 ++++++++++++
 3 files changed, 58 insertions(+), 6 deletions(-)

diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c
index 79b20d6ae39e..e8acc21ef56d 100644
--- a/fs/ext4/balloc.c
+++ b/fs/ext4/balloc.c
@@ -667,6 +667,30 @@ int ext4_claim_free_clusters(struct ext4_sb_info *sbi,
 		return -ENOSPC;
 }
 
+void ext4_writeback_da_blocks(struct work_struct *work)
+{
+	struct ext4_sb_info *sbi = container_of(work, struct ext4_sb_info,
+						s_da_flush_work);
+
+	try_to_writeback_inodes_sb(sbi->s_sb, WB_REASON_FS_FREE_SPACE);
+}
+
+/*
+ * Writeback delallocated blocks and try to free unused reserved extent
+ * blocks, return 0 if no delalloc blocks need to writeback, 1 otherwise.
+ */
+static int ext4_flush_da_blocks(struct ext4_sb_info *sbi)
+{
+	if (!percpu_counter_read_positive(&sbi->s_dirtyclusters_counter) &&
+	    !percpu_counter_sum(&sbi->s_dirtyclusters_counter))
+		return 0;
+
+	if (!work_busy(&sbi->s_da_flush_work))
+		queue_work(sbi->s_da_flush_wq, &sbi->s_da_flush_work);
+	flush_work(&sbi->s_da_flush_work);
+	return 1;
+}
+
 /**
  * ext4_should_retry_alloc() - check if a block allocation should be retried
  * @sb:			superblock
@@ -681,15 +705,22 @@ int ext4_claim_free_clusters(struct ext4_sb_info *sbi,
 int ext4_should_retry_alloc(struct super_block *sb, int *retries)
 {
 	struct ext4_sb_info *sbi = EXT4_SB(sb);
-
-	if (!sbi->s_journal)
-		return 0;
+	int result = 0;
 
 	if (++(*retries) > 3) {
 		percpu_counter_inc(&sbi->s_sra_exceeded_retry_limit);
 		return 0;
 	}
 
+	/*
+	 * Flush allocated delalloc blocks and try to free unused
+	 * reserved extent blocks.
+	 */
+	if (test_opt(sb, DELALLOC))
+		result += ext4_flush_da_blocks(sbi);
+
+	if (!sbi->s_journal)
+		goto out;
 	/*
 	 * if there's no indication that blocks are about to be freed it's
 	 * possible we just missed a transaction commit that did so
@@ -701,16 +732,20 @@ int ext4_should_retry_alloc(struct super_block *sb, int *retries)
 			flush_work(&sbi->s_discard_work);
 			atomic_dec(&sbi->s_retry_alloc_pending);
 		}
-		return ext4_has_free_clusters(sbi, 1, 0);
+		result += ext4_has_free_clusters(sbi, 1, 0);
+		goto out;
 	}
 
 	/*
 	 * it's possible we've just missed a transaction commit here,
 	 * so ignore the returned status
 	 */
-	ext4_debug("%s: retrying operation after ENOSPC\n", sb->s_id);
+	result += 1;
 	(void) jbd2_journal_force_commit_nested(sbi->s_journal);
-	return 1;
+out:
+	if (result)
+		ext4_debug("%s: retrying operation after ENOSPC\n", sb->s_id);
+	return result;
 }
 
 /*
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 67b12f9ffc50..6f4259ea6751 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -1627,6 +1627,10 @@ struct ext4_sb_info {
 	/* workqueue for reserved extent conversions (buffered io) */
 	struct workqueue_struct *rsv_conversion_wq;
 
+	/* workqueue for delalloc buffer IO flushing */
+	struct workqueue_struct *s_da_flush_wq;
+	struct work_struct s_da_flush_work;
+
 	/* timer for periodic error stats printing */
 	struct timer_list s_err_report;
 
@@ -2716,6 +2720,7 @@ extern int ext4_wait_block_bitmap(struct super_block *sb,
 				  struct buffer_head *bh);
 extern struct buffer_head *ext4_read_block_bitmap(struct super_block *sb,
 						  ext4_group_t block_group);
+extern void ext4_writeback_da_blocks(struct work_struct *work);
 extern unsigned ext4_free_clusters_after_init(struct super_block *sb,
 					      ext4_group_t block_group,
 					      struct ext4_group_desc *gdp);
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 7bc7c8c0ed71..6f50975ba42e 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -1335,6 +1335,8 @@ static void ext4_put_super(struct super_block *sb)
 
 	flush_work(&sbi->s_sb_upd_work);
 	destroy_workqueue(sbi->rsv_conversion_wq);
+	flush_work(&sbi->s_da_flush_work);
+	destroy_workqueue(sbi->s_da_flush_wq);
 	ext4_release_orphan_info(sb);
 
 	if (sbi->s_journal) {
@@ -5491,6 +5493,14 @@ static int __ext4_fill_super(struct fs_context *fc, struct super_block *sb)
 		goto failed_mount4;
 	}
 
+	INIT_WORK(&sbi->s_da_flush_work, ext4_writeback_da_blocks);
+	sbi->s_da_flush_wq = alloc_workqueue("ext4_delalloc_flush", WQ_UNBOUND, 1);
+	if (!sbi->s_da_flush_wq) {
+		printk(KERN_ERR "EXT4-fs: failed to create workqueue\n");
+		err = -ENOMEM;
+		goto failed_mount4;
+	}
+
 	/*
 	 * The jbd2_journal_load will have done any necessary log recovery,
 	 * so we can safely mount the rest of the filesystem now.
@@ -5660,6 +5670,8 @@ failed_mount9: __maybe_unused
 	sb->s_root = NULL;
 failed_mount4:
 	ext4_msg(sb, KERN_ERR, "mount failed");
+	if (sbi->s_da_flush_wq)
+		destroy_workqueue(sbi->s_da_flush_wq);
 	if (EXT4_SB(sb)->rsv_conversion_wq)
 		destroy_workqueue(EXT4_SB(sb)->rsv_conversion_wq);
 failed_mount_wq:
-- 
2.39.2


  parent reply	other threads:[~2023-08-24  9:31 UTC|newest]

Thread overview: 24+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2023-08-24  9:26 [RFC PATCH 00/16] ext4: more accurate metadata reservaion for delalloc mount option Zhang Yi
2023-08-24  9:26 ` [RFC PATCH 01/16] ext4: correct the start block of counting reserved clusters Zhang Yi
2023-08-30 13:10   ` Jan Kara
2023-10-06  2:33     ` Theodore Ts'o
2023-08-24  9:26 ` [RFC PATCH 02/16] ext4: make sure allocate pending entry not fail Zhang Yi
2023-08-30 13:25   ` Jan Kara
2023-10-06  2:33     ` Theodore Ts'o
2023-08-24  9:26 ` [RFC PATCH 03/16] ext4: let __revise_pending() return the number of new inserts pendings Zhang Yi
2023-08-24  9:26 ` [RFC PATCH 04/16] ext4: count removed reserved blocks for delalloc only es entry Zhang Yi
2023-08-24  9:26 ` [RFC PATCH 05/16] ext4: pass real delayed status into ext4_es_insert_extent() Zhang Yi
2023-08-24  9:26 ` [RFC PATCH 06/16] ext4: move delalloc data reserve spcae updating " Zhang Yi
2023-08-24  9:26 ` [RFC PATCH 07/16] ext4: count inode's total delalloc data blocks into ext4_es_tree Zhang Yi
2023-08-24  9:26 ` [RFC PATCH 08/16] ext4: refactor delalloc space reservation Zhang Yi
2023-08-24  9:26 ` [RFC PATCH 09/16] ext4: count reserved metadata blocks for delalloc per inode Zhang Yi
2023-08-24  9:26 ` [RFC PATCH 10/16] ext4: reserve meta blocks in ext4_da_reserve_space() Zhang Yi
2023-08-24  9:26 ` [RFC PATCH 11/16] ext4: factor out common part of ext4_da_{release|update_reserve}_space() Zhang Yi
2023-08-24  9:26 ` [RFC PATCH 12/16] ext4: update reserved meta blocks in ext4_da_{release|update_reserve}_space() Zhang Yi
2023-09-06  7:35   ` kernel test robot
2023-08-24  9:26 ` [RFC PATCH 13/16] ext4: calculate the worst extent blocks needed of a delalloc es entry Zhang Yi
2023-08-24  9:26 ` [RFC PATCH 14/16] ext4: reserve extent blocks for delalloc Zhang Yi
2023-08-24  9:26 ` Zhang Yi [this message]
2023-08-24  9:26 ` [RFC PATCH 16/16] ext4: drop ext4_nonda_switch() Zhang Yi
2023-08-30 15:30 ` [RFC PATCH 00/16] ext4: more accurate metadata reservaion for delalloc mount option Jan Kara
2023-09-01  2:33   ` Zhang Yi

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20230824092619.1327976-16-yi.zhang@huaweicloud.com \
    --to=yi.zhang@huaweicloud.com \
    --cc=adilger.kernel@dilger.ca \
    --cc=chengzhihao1@huawei.com \
    --cc=jack@suse.cz \
    --cc=linux-ext4@vger.kernel.org \
    --cc=tytso@mit.edu \
    --cc=yi.zhang@huawei.com \
    --cc=yukuai3@huawei.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).