* [PATCH 01/10] ext4: factor out ext4_map_create_blocks() to allocate new blocks
2024-06-01 3:41 [PATCH 00/10] ext4: simplify the counting and management of delalloc reserved blocks Zhang Yi
@ 2024-06-01 3:41 ` Zhang Yi
2024-06-01 3:41 ` [PATCH 02/10] ext4: optimize the EXT4_GET_BLOCKS_DELALLOC_RESERVE flag set Zhang Yi
` (8 subsequent siblings)
9 siblings, 0 replies; 11+ messages in thread
From: Zhang Yi @ 2024-06-01 3:41 UTC (permalink / raw)
To: linux-ext4
Cc: linux-fsdevel, linux-kernel, tytso, adilger.kernel, jack,
ritesh.list, yi.zhang, yi.zhang, chengzhihao1, yukuai3
From: Zhang Yi <yi.zhang@huawei.com>
Factor out a common helper ext4_map_create_blocks() from
ext4_map_blocks() to do a real blocks allocation, no logic changes.
Signed-off-by: Zhang Yi <yi.zhang@huawei.com>
---
fs/ext4/inode.c | 157 +++++++++++++++++++++++++-----------------------
1 file changed, 81 insertions(+), 76 deletions(-)
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 2afa3f83ddfb..01a30483e521 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -482,6 +482,86 @@ static int ext4_map_query_blocks(handle_t *handle, struct inode *inode,
return retval;
}
+static int ext4_map_create_blocks(handle_t *handle, struct inode *inode,
+ struct ext4_map_blocks *map, int flags)
+{
+ struct extent_status es;
+ unsigned int status;
+ int err, retval = 0;
+
+ /*
+ * Here we clear m_flags because after allocating an new extent,
+ * it will be set again.
+ */
+ map->m_flags &= ~EXT4_MAP_FLAGS;
+
+ /*
+ * We need to check for EXT4 here because migrate could have
+ * changed the inode type in between.
+ */
+ if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) {
+ retval = ext4_ext_map_blocks(handle, inode, map, flags);
+ } else {
+ retval = ext4_ind_map_blocks(handle, inode, map, flags);
+
+ /*
+ * We allocated new blocks which will result in i_data's
+ * format changing. Force the migrate to fail by clearing
+ * migrate flags.
+ */
+ if (retval > 0 && map->m_flags & EXT4_MAP_NEW)
+ ext4_clear_inode_state(inode, EXT4_STATE_EXT_MIGRATE);
+ }
+ if (retval <= 0)
+ return retval;
+
+ if (unlikely(retval != map->m_len)) {
+ ext4_warning(inode->i_sb,
+ "ES len assertion failed for inode %lu: "
+ "retval %d != map->m_len %d",
+ inode->i_ino, retval, map->m_len);
+ WARN_ON(1);
+ }
+
+ /*
+ * We have to zeroout blocks before inserting them into extent
+ * status tree. Otherwise someone could look them up there and
+ * use them before they are really zeroed. We also have to
+ * unmap metadata before zeroing as otherwise writeback can
+ * overwrite zeros with stale data from block device.
+ */
+ if (flags & EXT4_GET_BLOCKS_ZERO &&
+ map->m_flags & EXT4_MAP_MAPPED && map->m_flags & EXT4_MAP_NEW) {
+ err = ext4_issue_zeroout(inode, map->m_lblk, map->m_pblk,
+ map->m_len);
+ if (err)
+ return err;
+ }
+
+ /*
+ * If the extent has been zeroed out, we don't need to update
+ * extent status tree.
+ */
+ if (flags & EXT4_GET_BLOCKS_PRE_IO &&
+ ext4_es_lookup_extent(inode, map->m_lblk, NULL, &es)) {
+ if (ext4_es_is_written(&es))
+ return retval;
+ }
+
+ status = map->m_flags & EXT4_MAP_UNWRITTEN ?
+ EXTENT_STATUS_UNWRITTEN : EXTENT_STATUS_WRITTEN;
+ if (!(flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE) &&
+ !(status & EXTENT_STATUS_WRITTEN) &&
+ ext4_es_scan_range(inode, &ext4_es_is_delayed, map->m_lblk,
+ map->m_lblk + map->m_len - 1))
+ status |= EXTENT_STATUS_DELAYED;
+
+ ext4_es_insert_extent(inode, map->m_lblk, map->m_len,
+ map->m_pblk, status);
+
+ return retval;
+}
+
/*
* The ext4_map_blocks() function tries to look up the requested blocks,
* and returns if the blocks are already mapped.
@@ -630,12 +710,6 @@ int ext4_map_blocks(handle_t *handle, struct inode *inode,
if (!(flags & EXT4_GET_BLOCKS_CONVERT_UNWRITTEN))
return retval;
- /*
- * Here we clear m_flags because after allocating an new extent,
- * it will be set again.
- */
- map->m_flags &= ~EXT4_MAP_FLAGS;
-
/*
* New blocks allocate and/or writing to unwritten extent
* will possibly result in updating i_data, so we take
@@ -643,76 +717,7 @@ int ext4_map_blocks(handle_t *handle, struct inode *inode,
* with create == 1 flag.
*/
down_write(&EXT4_I(inode)->i_data_sem);
-
- /*
- * We need to check for EXT4 here because migrate
- * could have changed the inode type in between
- */
- if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) {
- retval = ext4_ext_map_blocks(handle, inode, map, flags);
- } else {
- retval = ext4_ind_map_blocks(handle, inode, map, flags);
-
- if (retval > 0 && map->m_flags & EXT4_MAP_NEW) {
- /*
- * We allocated new blocks which will result in
- * i_data's format changing. Force the migrate
- * to fail by clearing migrate flags
- */
- ext4_clear_inode_state(inode, EXT4_STATE_EXT_MIGRATE);
- }
- }
-
- if (retval > 0) {
- unsigned int status;
-
- if (unlikely(retval != map->m_len)) {
- ext4_warning(inode->i_sb,
- "ES len assertion failed for inode "
- "%lu: retval %d != map->m_len %d",
- inode->i_ino, retval, map->m_len);
- WARN_ON(1);
- }
-
- /*
- * We have to zeroout blocks before inserting them into extent
- * status tree. Otherwise someone could look them up there and
- * use them before they are really zeroed. We also have to
- * unmap metadata before zeroing as otherwise writeback can
- * overwrite zeros with stale data from block device.
- */
- if (flags & EXT4_GET_BLOCKS_ZERO &&
- map->m_flags & EXT4_MAP_MAPPED &&
- map->m_flags & EXT4_MAP_NEW) {
- ret = ext4_issue_zeroout(inode, map->m_lblk,
- map->m_pblk, map->m_len);
- if (ret) {
- retval = ret;
- goto out_sem;
- }
- }
-
- /*
- * If the extent has been zeroed out, we don't need to update
- * extent status tree.
- */
- if ((flags & EXT4_GET_BLOCKS_PRE_IO) &&
- ext4_es_lookup_extent(inode, map->m_lblk, NULL, &es)) {
- if (ext4_es_is_written(&es))
- goto out_sem;
- }
- status = map->m_flags & EXT4_MAP_UNWRITTEN ?
- EXTENT_STATUS_UNWRITTEN : EXTENT_STATUS_WRITTEN;
- if (!(flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE) &&
- !(status & EXTENT_STATUS_WRITTEN) &&
- ext4_es_scan_range(inode, &ext4_es_is_delayed, map->m_lblk,
- map->m_lblk + map->m_len - 1))
- status |= EXTENT_STATUS_DELAYED;
- ext4_es_insert_extent(inode, map->m_lblk, map->m_len,
- map->m_pblk, status);
- }
-
-out_sem:
+ retval = ext4_map_create_blocks(handle, inode, map, flags);
up_write((&EXT4_I(inode)->i_data_sem));
if (retval > 0 && map->m_flags & EXT4_MAP_MAPPED) {
ret = check_block_validity(inode, map);
--
2.31.1
^ permalink raw reply related [flat|nested] 11+ messages in thread* [PATCH 02/10] ext4: optimize the EXT4_GET_BLOCKS_DELALLOC_RESERVE flag set
2024-06-01 3:41 [PATCH 00/10] ext4: simplify the counting and management of delalloc reserved blocks Zhang Yi
2024-06-01 3:41 ` [PATCH 01/10] ext4: factor out ext4_map_create_blocks() to allocate new blocks Zhang Yi
@ 2024-06-01 3:41 ` Zhang Yi
2024-06-01 3:41 ` [PATCH 03/10] ext4: don't set EXTENT_STATUS_DELAYED on allocated blocks Zhang Yi
` (7 subsequent siblings)
9 siblings, 0 replies; 11+ messages in thread
From: Zhang Yi @ 2024-06-01 3:41 UTC (permalink / raw)
To: linux-ext4
Cc: linux-fsdevel, linux-kernel, tytso, adilger.kernel, jack,
ritesh.list, yi.zhang, yi.zhang, chengzhihao1, yukuai3
From: Zhang Yi <yi.zhang@huawei.com>
When doing block allocation, magic EXT4_GET_BLOCKS_DELALLOC_RESERVE
means the allocating range covers a range of delayed allocated clusters,
the blocks and quotas have already been reserved in ext4_da_map_blocks(),
we should update the reserved space and don't need to claim them again.
At the moment, we only set this magic in mpage_map_one_extent() when
allocating a range of delayed allocated clusters in the write back path,
it makes things complicated since we have to notice and deal with the
case of allocating non-delayed allocated clusters separately in
ext4_ext_map_blocks(). For example, it we fallocate some blocks that
have been delayed allocated, free space would be claimed again in
ext4_mb_new_blocks() (this is wrong exactily), and we can't claim quota
space again, we have to release the quota reservations made for that
previously delayed allocated clusters.
Move the position thats set the EXT4_GET_BLOCKS_DELALLOC_RESERVE to
where we actually do block allocation, it could simplify above handling
a lot, it means that we always set this magic once the allocation range
covers delalloc blocks, no need to take care of the allocation path.
Signed-off-by: Zhang Yi <yi.zhang@huawei.com>
---
fs/ext4/inode.c | 15 ++++++++-------
1 file changed, 8 insertions(+), 7 deletions(-)
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 01a30483e521..1f6de35e6216 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -489,6 +489,14 @@ static int ext4_map_create_blocks(handle_t *handle, struct inode *inode,
unsigned int status;
int err, retval = 0;
+ /*
+ * We pass in the magic EXT4_GET_BLOCKS_DELALLOC_RESERVE
+ * indicates that the blocks and quotas has already been
+ * checked when the data was copied into the page cache.
+ */
+ if (map->m_flags & EXT4_MAP_DELAYED)
+ flags |= EXT4_GET_BLOCKS_DELALLOC_RESERVE;
+
/*
* Here we clear m_flags because after allocating an new extent,
* it will be set again.
@@ -2224,11 +2232,6 @@ static int mpage_map_one_extent(handle_t *handle, struct mpage_da_data *mpd)
* writeback and there is nothing we can do about it so it might result
* in data loss. So use reserved blocks to allocate metadata if
* possible.
- *
- * We pass in the magic EXT4_GET_BLOCKS_DELALLOC_RESERVE if
- * the blocks in question are delalloc blocks. This indicates
- * that the blocks and quotas has already been checked when
- * the data was copied into the page cache.
*/
get_blocks_flags = EXT4_GET_BLOCKS_CREATE |
EXT4_GET_BLOCKS_METADATA_NOFAIL |
@@ -2236,8 +2239,6 @@ static int mpage_map_one_extent(handle_t *handle, struct mpage_da_data *mpd)
dioread_nolock = ext4_should_dioread_nolock(inode);
if (dioread_nolock)
get_blocks_flags |= EXT4_GET_BLOCKS_IO_CREATE_EXT;
- if (map->m_flags & BIT(BH_Delay))
- get_blocks_flags |= EXT4_GET_BLOCKS_DELALLOC_RESERVE;
err = ext4_map_blocks(handle, inode, map, get_blocks_flags);
if (err < 0)
--
2.31.1
^ permalink raw reply related [flat|nested] 11+ messages in thread* [PATCH 03/10] ext4: don't set EXTENT_STATUS_DELAYED on allocated blocks
2024-06-01 3:41 [PATCH 00/10] ext4: simplify the counting and management of delalloc reserved blocks Zhang Yi
2024-06-01 3:41 ` [PATCH 01/10] ext4: factor out ext4_map_create_blocks() to allocate new blocks Zhang Yi
2024-06-01 3:41 ` [PATCH 02/10] ext4: optimize the EXT4_GET_BLOCKS_DELALLOC_RESERVE flag set Zhang Yi
@ 2024-06-01 3:41 ` Zhang Yi
2024-06-01 3:41 ` [PATCH 04/10] ext4: let __revise_pending() return newly inserted pendings Zhang Yi
` (6 subsequent siblings)
9 siblings, 0 replies; 11+ messages in thread
From: Zhang Yi @ 2024-06-01 3:41 UTC (permalink / raw)
To: linux-ext4
Cc: linux-fsdevel, linux-kernel, tytso, adilger.kernel, jack,
ritesh.list, yi.zhang, yi.zhang, chengzhihao1, yukuai3
From: Zhang Yi <yi.zhang@huawei.com>
Since we always set EXT4_GET_BLOCKS_DELALLOC_RESERVE when allocating
delalloc blocks, there is no need to keep delayed flag on the unwritten
extent status entry, so just drop it after allocation.
Signed-off-by: Zhang Yi <yi.zhang@huawei.com>
---
fs/ext4/extents_status.c | 9 +--------
fs/ext4/inode.c | 11 -----------
2 files changed, 1 insertion(+), 19 deletions(-)
diff --git a/fs/ext4/extents_status.c b/fs/ext4/extents_status.c
index 23caf1f028b0..084ea0a753ee 100644
--- a/fs/ext4/extents_status.c
+++ b/fs/ext4/extents_status.c
@@ -867,14 +867,7 @@ void ext4_es_insert_extent(struct inode *inode, ext4_lblk_t lblk,
return;
BUG_ON(end < lblk);
-
- if ((status & EXTENT_STATUS_DELAYED) &&
- (status & EXTENT_STATUS_WRITTEN)) {
- ext4_warning(inode->i_sb, "Inserting extent [%u/%u] as "
- " delayed and written which can potentially "
- " cause data loss.", lblk, len);
- WARN_ON(1);
- }
+ WARN_ON_ONCE(status & EXTENT_STATUS_DELAYED);
newes.es_lblk = lblk;
newes.es_len = len;
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 1f6de35e6216..0dde2bf078ba 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -558,12 +558,6 @@ static int ext4_map_create_blocks(handle_t *handle, struct inode *inode,
status = map->m_flags & EXT4_MAP_UNWRITTEN ?
EXTENT_STATUS_UNWRITTEN : EXTENT_STATUS_WRITTEN;
- if (!(flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE) &&
- !(status & EXTENT_STATUS_WRITTEN) &&
- ext4_es_scan_range(inode, &ext4_es_is_delayed, map->m_lblk,
- map->m_lblk + map->m_len - 1))
- status |= EXTENT_STATUS_DELAYED;
-
ext4_es_insert_extent(inode, map->m_lblk, map->m_len,
map->m_pblk, status);
@@ -682,11 +676,6 @@ int ext4_map_blocks(handle_t *handle, struct inode *inode,
status = map->m_flags & EXT4_MAP_UNWRITTEN ?
EXTENT_STATUS_UNWRITTEN : EXTENT_STATUS_WRITTEN;
- if (!(flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE) &&
- !(status & EXTENT_STATUS_WRITTEN) &&
- ext4_es_scan_range(inode, &ext4_es_is_delayed, map->m_lblk,
- map->m_lblk + map->m_len - 1))
- status |= EXTENT_STATUS_DELAYED;
ext4_es_insert_extent(inode, map->m_lblk, map->m_len,
map->m_pblk, status);
}
--
2.31.1
^ permalink raw reply related [flat|nested] 11+ messages in thread* [PATCH 04/10] ext4: let __revise_pending() return newly inserted pendings
2024-06-01 3:41 [PATCH 00/10] ext4: simplify the counting and management of delalloc reserved blocks Zhang Yi
` (2 preceding siblings ...)
2024-06-01 3:41 ` [PATCH 03/10] ext4: don't set EXTENT_STATUS_DELAYED on allocated blocks Zhang Yi
@ 2024-06-01 3:41 ` Zhang Yi
2024-06-01 3:41 ` [PATCH 05/10] ext4: count removed reserved blocks for delalloc only extent entry Zhang Yi
` (5 subsequent siblings)
9 siblings, 0 replies; 11+ messages in thread
From: Zhang Yi @ 2024-06-01 3:41 UTC (permalink / raw)
To: linux-ext4
Cc: linux-fsdevel, linux-kernel, tytso, adilger.kernel, jack,
ritesh.list, yi.zhang, yi.zhang, chengzhihao1, yukuai3
From: Zhang Yi <yi.zhang@huawei.com>
Let __insert_pending() return 1 after successfully inserting a new
pending cluster, and also let __revise_pending() to return the number of
of newly inserted pendings.
Signed-off-by: Zhang Yi <yi.zhang@huawei.com>
---
fs/ext4/extents_status.c | 28 ++++++++++++++++++----------
1 file changed, 18 insertions(+), 10 deletions(-)
diff --git a/fs/ext4/extents_status.c b/fs/ext4/extents_status.c
index 084ea0a753ee..e54e15eb05b9 100644
--- a/fs/ext4/extents_status.c
+++ b/fs/ext4/extents_status.c
@@ -885,7 +885,7 @@ void ext4_es_insert_extent(struct inode *inode, ext4_lblk_t lblk,
es1 = __es_alloc_extent(true);
if ((err1 || err2) && !es2)
es2 = __es_alloc_extent(true);
- if ((err1 || err2 || err3) && revise_pending && !pr)
+ if ((err1 || err2 || err3 < 0) && revise_pending && !pr)
pr = __alloc_pending(true);
write_lock(&EXT4_I(inode)->i_es_lock);
@@ -913,7 +913,7 @@ void ext4_es_insert_extent(struct inode *inode, ext4_lblk_t lblk,
if (revise_pending) {
err3 = __revise_pending(inode, lblk, len, &pr);
- if (err3 != 0)
+ if (err3 < 0)
goto error;
if (pr) {
__free_pending(pr);
@@ -922,7 +922,7 @@ void ext4_es_insert_extent(struct inode *inode, ext4_lblk_t lblk,
}
error:
write_unlock(&EXT4_I(inode)->i_es_lock);
- if (err1 || err2 || err3)
+ if (err1 || err2 || err3 < 0)
goto retry;
ext4_es_print_tree(inode);
@@ -1931,7 +1931,7 @@ static struct pending_reservation *__get_pending(struct inode *inode,
* @lblk - logical block in the cluster to be added
* @prealloc - preallocated pending entry
*
- * Returns 0 on successful insertion and -ENOMEM on failure. If the
+ * Returns 1 on successful insertion and -ENOMEM on failure. If the
* pending reservation is already in the set, returns successfully.
*/
static int __insert_pending(struct inode *inode, ext4_lblk_t lblk,
@@ -1975,6 +1975,7 @@ static int __insert_pending(struct inode *inode, ext4_lblk_t lblk,
rb_link_node(&pr->rb_node, parent, p);
rb_insert_color(&pr->rb_node, &tree->root);
+ ret = 1;
out:
return ret;
@@ -2096,7 +2097,7 @@ void ext4_es_insert_delayed_extent(struct inode *inode, ext4_lblk_t lblk,
es1 = __es_alloc_extent(true);
if ((err1 || err2) && !es2)
es2 = __es_alloc_extent(true);
- if (err1 || err2 || err3) {
+ if (err1 || err2 || err3 < 0) {
if (lclu_allocated && !pr1)
pr1 = __alloc_pending(true);
if (end_allocated && !pr2)
@@ -2126,7 +2127,7 @@ void ext4_es_insert_delayed_extent(struct inode *inode, ext4_lblk_t lblk,
if (lclu_allocated) {
err3 = __insert_pending(inode, lblk, &pr1);
- if (err3 != 0)
+ if (err3 < 0)
goto error;
if (pr1) {
__free_pending(pr1);
@@ -2135,7 +2136,7 @@ void ext4_es_insert_delayed_extent(struct inode *inode, ext4_lblk_t lblk,
}
if (end_allocated) {
err3 = __insert_pending(inode, end, &pr2);
- if (err3 != 0)
+ if (err3 < 0)
goto error;
if (pr2) {
__free_pending(pr2);
@@ -2144,7 +2145,7 @@ void ext4_es_insert_delayed_extent(struct inode *inode, ext4_lblk_t lblk,
}
error:
write_unlock(&EXT4_I(inode)->i_es_lock);
- if (err1 || err2 || err3)
+ if (err1 || err2 || err3 < 0)
goto retry;
ext4_es_print_tree(inode);
@@ -2254,7 +2255,9 @@ unsigned int ext4_es_delayed_clu(struct inode *inode, ext4_lblk_t lblk,
*
* Used after a newly allocated extent is added to the extents status tree.
* Requires that the extents in the range have either written or unwritten
- * status. Must be called while holding i_es_lock.
+ * status. Must be called while holding i_es_lock. Returns number of new
+ * inserts pending cluster on insert pendings, returns 0 on remove pendings,
+ * return -ENOMEM on failure.
*/
static int __revise_pending(struct inode *inode, ext4_lblk_t lblk,
ext4_lblk_t len,
@@ -2264,6 +2267,7 @@ static int __revise_pending(struct inode *inode, ext4_lblk_t lblk,
ext4_lblk_t end = lblk + len - 1;
ext4_lblk_t first, last;
bool f_del = false, l_del = false;
+ int pendings = 0;
int ret = 0;
if (len == 0)
@@ -2291,6 +2295,7 @@ static int __revise_pending(struct inode *inode, ext4_lblk_t lblk,
ret = __insert_pending(inode, first, prealloc);
if (ret < 0)
goto out;
+ pendings += ret;
} else {
last = EXT4_LBLK_CMASK(sbi, end) +
sbi->s_cluster_ratio - 1;
@@ -2302,6 +2307,7 @@ static int __revise_pending(struct inode *inode, ext4_lblk_t lblk,
ret = __insert_pending(inode, last, prealloc);
if (ret < 0)
goto out;
+ pendings += ret;
} else
__remove_pending(inode, last);
}
@@ -2314,6 +2320,7 @@ static int __revise_pending(struct inode *inode, ext4_lblk_t lblk,
ret = __insert_pending(inode, first, prealloc);
if (ret < 0)
goto out;
+ pendings += ret;
} else
__remove_pending(inode, first);
@@ -2325,9 +2332,10 @@ static int __revise_pending(struct inode *inode, ext4_lblk_t lblk,
ret = __insert_pending(inode, last, prealloc);
if (ret < 0)
goto out;
+ pendings += ret;
} else
__remove_pending(inode, last);
}
out:
- return ret;
+ return (ret < 0) ? ret : pendings;
}
--
2.31.1
^ permalink raw reply related [flat|nested] 11+ messages in thread* [PATCH 05/10] ext4: count removed reserved blocks for delalloc only extent entry
2024-06-01 3:41 [PATCH 00/10] ext4: simplify the counting and management of delalloc reserved blocks Zhang Yi
` (3 preceding siblings ...)
2024-06-01 3:41 ` [PATCH 04/10] ext4: let __revise_pending() return newly inserted pendings Zhang Yi
@ 2024-06-01 3:41 ` Zhang Yi
2024-06-01 3:41 ` [PATCH 06/10] ext4: update delalloc data reserve spcae in ext4_es_insert_extent() Zhang Yi
` (4 subsequent siblings)
9 siblings, 0 replies; 11+ messages in thread
From: Zhang Yi @ 2024-06-01 3:41 UTC (permalink / raw)
To: linux-ext4
Cc: linux-fsdevel, linux-kernel, tytso, adilger.kernel, jack,
ritesh.list, yi.zhang, yi.zhang, chengzhihao1, yukuai3
From: Zhang Yi <yi.zhang@huawei.com>
If bigalloc feature is enabled, __es_remove_extent() only counts
reserved clusters when removing delalloc extent entry, it doesn't count
reserved blocks. However, it's useful to distinguish whether we are
allocating blocks that contains a delalloc range in one cluster, so
let's count the reserved blocks number too.
Signed-off-by: Zhang Yi <yi.zhang@huawei.com>
---
fs/ext4/extents_status.c | 64 +++++++++++++++++++++++++---------------
1 file changed, 40 insertions(+), 24 deletions(-)
diff --git a/fs/ext4/extents_status.c b/fs/ext4/extents_status.c
index e54e15eb05b9..ced9826682bb 100644
--- a/fs/ext4/extents_status.c
+++ b/fs/ext4/extents_status.c
@@ -141,13 +141,18 @@
* -- Extent-level locking
*/
+struct rsvd_info {
+ int delonly_cluster; /* reserved clusters for delalloc es entry */
+ int delonly_block; /* reserved blocks for delalloc es entry */
+};
+
static struct kmem_cache *ext4_es_cachep;
static struct kmem_cache *ext4_pending_cachep;
static int __es_insert_extent(struct inode *inode, struct extent_status *newes,
struct extent_status *prealloc);
static int __es_remove_extent(struct inode *inode, ext4_lblk_t lblk,
- ext4_lblk_t end, int *reserved,
+ ext4_lblk_t end, struct rsvd_info *rinfo,
struct extent_status *prealloc);
static int es_reclaim_extents(struct ext4_inode_info *ei, int *nr_to_scan);
static int __es_shrink(struct ext4_sb_info *sbi, int nr_to_scan,
@@ -1042,7 +1047,8 @@ int ext4_es_lookup_extent(struct inode *inode, ext4_lblk_t lblk,
}
struct rsvd_count {
- int ndelonly;
+ int ndelonly_cluster;
+ int ndelonly_block;
bool first_do_lblk_found;
ext4_lblk_t first_do_lblk;
ext4_lblk_t last_do_lblk;
@@ -1068,7 +1074,8 @@ static void init_rsvd(struct inode *inode, ext4_lblk_t lblk,
struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
struct rb_node *node;
- rc->ndelonly = 0;
+ rc->ndelonly_cluster = 0;
+ rc->ndelonly_block = 0;
/*
* for bigalloc, note the first delonly block in the range has not
@@ -1116,11 +1123,13 @@ static void count_rsvd(struct inode *inode, ext4_lblk_t lblk, long len,
WARN_ON(len <= 0);
if (sbi->s_cluster_ratio == 1) {
- rc->ndelonly += (int) len;
+ rc->ndelonly_cluster += (int) len;
+ rc->ndelonly_block = rc->ndelonly_cluster;
return;
}
/* bigalloc */
+ rc->ndelonly_block += (int)len;
i = (lblk < es->es_lblk) ? es->es_lblk : lblk;
end = lblk + (ext4_lblk_t) len - 1;
@@ -1140,7 +1149,7 @@ static void count_rsvd(struct inode *inode, ext4_lblk_t lblk, long len,
* doesn't start with it, count it and stop tracking
*/
if (rc->partial && (rc->lclu != EXT4_B2C(sbi, i))) {
- rc->ndelonly++;
+ rc->ndelonly_cluster++;
rc->partial = false;
}
@@ -1150,7 +1159,7 @@ static void count_rsvd(struct inode *inode, ext4_lblk_t lblk, long len,
*/
if (EXT4_LBLK_COFF(sbi, i) != 0) {
if (end >= EXT4_LBLK_CFILL(sbi, i)) {
- rc->ndelonly++;
+ rc->ndelonly_cluster++;
rc->partial = false;
i = EXT4_LBLK_CFILL(sbi, i) + 1;
}
@@ -1162,7 +1171,7 @@ static void count_rsvd(struct inode *inode, ext4_lblk_t lblk, long len,
*/
if ((i + sbi->s_cluster_ratio - 1) <= end) {
nclu = (end - i + 1) >> sbi->s_cluster_bits;
- rc->ndelonly += nclu;
+ rc->ndelonly_cluster += nclu;
i += nclu << sbi->s_cluster_bits;
}
@@ -1242,9 +1251,9 @@ static unsigned int get_rsvd(struct inode *inode, ext4_lblk_t end,
if (sbi->s_cluster_ratio > 1) {
/* count any remaining partial cluster */
if (rc->partial)
- rc->ndelonly++;
+ rc->ndelonly_cluster++;
- if (rc->ndelonly == 0)
+ if (rc->ndelonly_cluster == 0)
return 0;
first_lclu = EXT4_B2C(sbi, rc->first_do_lblk);
@@ -1261,7 +1270,7 @@ static unsigned int get_rsvd(struct inode *inode, ext4_lblk_t end,
while (es && ext4_es_end(es) >=
EXT4_LBLK_CMASK(sbi, rc->first_do_lblk)) {
if (ext4_es_is_delonly(es)) {
- rc->ndelonly--;
+ rc->ndelonly_cluster--;
left_delonly = true;
break;
}
@@ -1281,7 +1290,7 @@ static unsigned int get_rsvd(struct inode *inode, ext4_lblk_t end,
while (es && es->es_lblk <=
EXT4_LBLK_CFILL(sbi, rc->last_do_lblk)) {
if (ext4_es_is_delonly(es)) {
- rc->ndelonly--;
+ rc->ndelonly_cluster--;
right_delonly = true;
break;
}
@@ -1327,7 +1336,7 @@ static unsigned int get_rsvd(struct inode *inode, ext4_lblk_t end,
if (count_pending) {
pr = __pr_tree_search(&tree->root, first_lclu);
while (pr && pr->lclu <= last_lclu) {
- rc->ndelonly--;
+ rc->ndelonly_cluster--;
node = rb_next(&pr->rb_node);
rb_erase(&pr->rb_node, &tree->root);
__free_pending(pr);
@@ -1338,7 +1347,7 @@ static unsigned int get_rsvd(struct inode *inode, ext4_lblk_t end,
}
}
}
- return rc->ndelonly;
+ return rc->ndelonly_cluster;
}
@@ -1348,16 +1357,17 @@ static unsigned int get_rsvd(struct inode *inode, ext4_lblk_t end,
* @inode - file containing range
* @lblk - first block in range
* @end - last block in range
- * @reserved - number of cluster reservations released
+ * @rinfo - reserved information collected, includes number of
+ * block/cluster reservations released
* @prealloc - pre-allocated es to avoid memory allocation failures
*
- * If @reserved is not NULL and delayed allocation is enabled, counts
+ * If @rinfo is not NULL and delayed allocation is enabled, counts
* block/cluster reservations freed by removing range and if bigalloc
* enabled cancels pending reservations as needed. Returns 0 on success,
* error code on failure.
*/
static int __es_remove_extent(struct inode *inode, ext4_lblk_t lblk,
- ext4_lblk_t end, int *reserved,
+ ext4_lblk_t end, struct rsvd_info *rinfo,
struct extent_status *prealloc)
{
struct ext4_es_tree *tree = &EXT4_I(inode)->i_es_tree;
@@ -1367,11 +1377,15 @@ static int __es_remove_extent(struct inode *inode, ext4_lblk_t lblk,
ext4_lblk_t len1, len2;
ext4_fsblk_t block;
int err = 0;
- bool count_reserved = true;
+ bool count_reserved = false;
struct rsvd_count rc;
- if (reserved == NULL || !test_opt(inode->i_sb, DELALLOC))
- count_reserved = false;
+ if (rinfo) {
+ rinfo->delonly_cluster = 0;
+ rinfo->delonly_block = 0;
+ if (test_opt(inode->i_sb, DELALLOC))
+ count_reserved = true;
+ }
es = __es_tree_search(&tree->root, lblk);
if (!es)
@@ -1469,8 +1483,10 @@ static int __es_remove_extent(struct inode *inode, ext4_lblk_t lblk,
}
out_get_reserved:
- if (count_reserved)
- *reserved = get_rsvd(inode, end, es, &rc);
+ if (count_reserved) {
+ rinfo->delonly_cluster = get_rsvd(inode, end, es, &rc);
+ rinfo->delonly_block = rc.ndelonly_block;
+ }
out:
return err;
}
@@ -1489,8 +1505,8 @@ void ext4_es_remove_extent(struct inode *inode, ext4_lblk_t lblk,
ext4_lblk_t len)
{
ext4_lblk_t end;
+ struct rsvd_info rinfo;
int err = 0;
- int reserved = 0;
struct extent_status *es = NULL;
if (EXT4_SB(inode->i_sb)->s_mount_state & EXT4_FC_REPLAY)
@@ -1515,7 +1531,7 @@ void ext4_es_remove_extent(struct inode *inode, ext4_lblk_t lblk,
* is reclaimed.
*/
write_lock(&EXT4_I(inode)->i_es_lock);
- err = __es_remove_extent(inode, lblk, end, &reserved, es);
+ err = __es_remove_extent(inode, lblk, end, &rinfo, es);
/* Free preallocated extent if it didn't get used. */
if (es) {
if (!es->es_len)
@@ -1527,7 +1543,7 @@ void ext4_es_remove_extent(struct inode *inode, ext4_lblk_t lblk,
goto retry;
ext4_es_print_tree(inode);
- ext4_da_release_space(inode, reserved);
+ ext4_da_release_space(inode, rinfo.delonly_cluster);
return;
}
--
2.31.1
^ permalink raw reply related [flat|nested] 11+ messages in thread* [PATCH 06/10] ext4: update delalloc data reserve spcae in ext4_es_insert_extent()
2024-06-01 3:41 [PATCH 00/10] ext4: simplify the counting and management of delalloc reserved blocks Zhang Yi
` (4 preceding siblings ...)
2024-06-01 3:41 ` [PATCH 05/10] ext4: count removed reserved blocks for delalloc only extent entry Zhang Yi
@ 2024-06-01 3:41 ` Zhang Yi
2024-06-01 3:41 ` [PATCH 07/10] ext4: drop ext4_es_delayed_clu() Zhang Yi
` (3 subsequent siblings)
9 siblings, 0 replies; 11+ messages in thread
From: Zhang Yi @ 2024-06-01 3:41 UTC (permalink / raw)
To: linux-ext4
Cc: linux-fsdevel, linux-kernel, tytso, adilger.kernel, jack,
ritesh.list, yi.zhang, yi.zhang, chengzhihao1, yukuai3
From: Zhang Yi <yi.zhang@huawei.com>
Now that we update data reserved space for delalloc after allocating
new blocks in ext4_{ind|ext}_map_blocks(), and if bigalloc feature is
enabled, we also need to query the extents_status tree to calculate the
exact reserved clusters. This is complicated now and it appears that
it's better to do this job in ext4_es_insert_extent(), because
__es_remove_extent() have already count delalloc blocks when removing
delalloc extents and __revise_pending() return new adding pending count,
we could update the reserved blocks easily in ext4_es_insert_extent().
Thers is one special case needs to concern is the quota claiming, when
bigalloc is enabled, if the delayed cluster allocation has been raced
by another no-delayed allocation(e.g. from fallocate) which doesn't
cover the delayed blocks:
|< one cluster >|
hhhhhhhhhhhhhhhhhhhdddddddddd
^ ^
|< >| < fallocate this range, don't claim quota again
We can't claim quota as usual because the fallocate has already claimed
it in ext4_mb_new_blocks(), we could notice this case through the
removed delalloc blocks count.
Signed-off-by: Zhang Yi <yi.zhang@huawei.com>
---
fs/ext4/extents.c | 37 -------------------------------------
fs/ext4/extents_status.c | 22 +++++++++++++++++++++-
fs/ext4/indirect.c | 7 -------
3 files changed, 21 insertions(+), 45 deletions(-)
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index e57054bdc5fd..8bc8a519f745 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -4355,43 +4355,6 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
goto out;
}
- /*
- * Reduce the reserved cluster count to reflect successful deferred
- * allocation of delayed allocated clusters or direct allocation of
- * clusters discovered to be delayed allocated. Once allocated, a
- * cluster is not included in the reserved count.
- */
- if (test_opt(inode->i_sb, DELALLOC) && allocated_clusters) {
- if (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE) {
- /*
- * When allocating delayed allocated clusters, simply
- * reduce the reserved cluster count and claim quota
- */
- ext4_da_update_reserve_space(inode, allocated_clusters,
- 1);
- } else {
- ext4_lblk_t lblk, len;
- unsigned int n;
-
- /*
- * When allocating non-delayed allocated clusters
- * (from fallocate, filemap, DIO, or clusters
- * allocated when delalloc has been disabled by
- * ext4_nonda_switch), reduce the reserved cluster
- * count by the number of allocated clusters that
- * have previously been delayed allocated. Quota
- * has been claimed by ext4_mb_new_blocks() above,
- * so release the quota reservations made for any
- * previously delayed allocated clusters.
- */
- lblk = EXT4_LBLK_CMASK(sbi, map->m_lblk);
- len = allocated_clusters << sbi->s_cluster_bits;
- n = ext4_es_delayed_clu(inode, lblk, len);
- if (n > 0)
- ext4_da_update_reserve_space(inode, (int) n, 0);
- }
- }
-
/*
* Cache the extent and update transaction to commit on fdatasync only
* when it is _not_ an unwritten extent.
diff --git a/fs/ext4/extents_status.c b/fs/ext4/extents_status.c
index ced9826682bb..841f2f6d536d 100644
--- a/fs/ext4/extents_status.c
+++ b/fs/ext4/extents_status.c
@@ -856,6 +856,8 @@ void ext4_es_insert_extent(struct inode *inode, ext4_lblk_t lblk,
struct extent_status newes;
ext4_lblk_t end = lblk + len - 1;
int err1 = 0, err2 = 0, err3 = 0;
+ struct rsvd_info rinfo;
+ int resv_used, pending = 0;
struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
struct extent_status *es1 = NULL;
struct extent_status *es2 = NULL;
@@ -894,7 +896,7 @@ void ext4_es_insert_extent(struct inode *inode, ext4_lblk_t lblk,
pr = __alloc_pending(true);
write_lock(&EXT4_I(inode)->i_es_lock);
- err1 = __es_remove_extent(inode, lblk, end, NULL, es1);
+ err1 = __es_remove_extent(inode, lblk, end, &rinfo, es1);
if (err1 != 0)
goto error;
/* Free preallocated extent if it didn't get used. */
@@ -924,9 +926,27 @@ void ext4_es_insert_extent(struct inode *inode, ext4_lblk_t lblk,
__free_pending(pr);
pr = NULL;
}
+ pending = err3;
}
error:
write_unlock(&EXT4_I(inode)->i_es_lock);
+ /*
+ * Reduce the reserved cluster count to reflect successful deferred
+ * allocation of delayed allocated clusters or direct allocation of
+ * clusters discovered to be delayed allocated. Once allocated, a
+ * cluster is not included in the reserved count.
+ *
+ * When bigalloc is enabled, allocating non-delayed allocated blocks
+ * which belong to delayed allocated clusters (from fallocate, filemap,
+ * DIO, or clusters allocated when delalloc has been disabled by
+ * ext4_nonda_switch()). Quota has been claimed by ext4_mb_new_blocks(),
+ * so release the quota reservations made for any previously delayed
+ * allocated clusters.
+ */
+ resv_used = rinfo.delonly_cluster + pending;
+ if (resv_used)
+ ext4_da_update_reserve_space(inode, resv_used,
+ rinfo.delonly_block);
if (err1 || err2 || err3 < 0)
goto retry;
diff --git a/fs/ext4/indirect.c b/fs/ext4/indirect.c
index d8ca7f64f952..7404f0935c90 100644
--- a/fs/ext4/indirect.c
+++ b/fs/ext4/indirect.c
@@ -652,13 +652,6 @@ int ext4_ind_map_blocks(handle_t *handle, struct inode *inode,
ext4_update_inode_fsync_trans(handle, inode, 1);
count = ar.len;
- /*
- * Update reserved blocks/metadata blocks after successful block
- * allocation which had been deferred till now.
- */
- if (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE)
- ext4_da_update_reserve_space(inode, count, 1);
-
got_it:
map->m_flags |= EXT4_MAP_MAPPED;
map->m_pblk = le32_to_cpu(chain[depth-1].key);
--
2.31.1
^ permalink raw reply related [flat|nested] 11+ messages in thread* [PATCH 07/10] ext4: drop ext4_es_delayed_clu()
2024-06-01 3:41 [PATCH 00/10] ext4: simplify the counting and management of delalloc reserved blocks Zhang Yi
` (5 preceding siblings ...)
2024-06-01 3:41 ` [PATCH 06/10] ext4: update delalloc data reserve spcae in ext4_es_insert_extent() Zhang Yi
@ 2024-06-01 3:41 ` Zhang Yi
2024-06-01 3:41 ` [PATCH 08/10] ext4: use ext4_map_query_blocks() in ext4_map_blocks() Zhang Yi
` (2 subsequent siblings)
9 siblings, 0 replies; 11+ messages in thread
From: Zhang Yi @ 2024-06-01 3:41 UTC (permalink / raw)
To: linux-ext4
Cc: linux-fsdevel, linux-kernel, tytso, adilger.kernel, jack,
ritesh.list, yi.zhang, yi.zhang, chengzhihao1, yukuai3
From: Zhang Yi <yi.zhang@huawei.com>
Since we move ext4_da_update_reserve_space() to ext4_es_insert_extent(),
no one uses ext4_es_delayed_clu() and __es_delayed_clu(), just drop
them.
Signed-off-by: Zhang Yi <yi.zhang@huawei.com>
---
fs/ext4/extents_status.c | 88 ----------------------------------------
fs/ext4/extents_status.h | 2 -
2 files changed, 90 deletions(-)
diff --git a/fs/ext4/extents_status.c b/fs/ext4/extents_status.c
index 841f2f6d536d..f28435b2618f 100644
--- a/fs/ext4/extents_status.c
+++ b/fs/ext4/extents_status.c
@@ -2189,94 +2189,6 @@ void ext4_es_insert_delayed_extent(struct inode *inode, ext4_lblk_t lblk,
return;
}
-/*
- * __es_delayed_clu - count number of clusters containing blocks that
- * are delayed only
- *
- * @inode - file containing block range
- * @start - logical block defining start of range
- * @end - logical block defining end of range
- *
- * Returns the number of clusters containing only delayed (not delayed
- * and unwritten) blocks in the range specified by @start and @end. Any
- * cluster or part of a cluster within the range and containing a delayed
- * and not unwritten block within the range is counted as a whole cluster.
- */
-static unsigned int __es_delayed_clu(struct inode *inode, ext4_lblk_t start,
- ext4_lblk_t end)
-{
- struct ext4_es_tree *tree = &EXT4_I(inode)->i_es_tree;
- struct extent_status *es;
- struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
- struct rb_node *node;
- ext4_lblk_t first_lclu, last_lclu;
- unsigned long long last_counted_lclu;
- unsigned int n = 0;
-
- /* guaranteed to be unequal to any ext4_lblk_t value */
- last_counted_lclu = ~0ULL;
-
- es = __es_tree_search(&tree->root, start);
-
- while (es && (es->es_lblk <= end)) {
- if (ext4_es_is_delonly(es)) {
- if (es->es_lblk <= start)
- first_lclu = EXT4_B2C(sbi, start);
- else
- first_lclu = EXT4_B2C(sbi, es->es_lblk);
-
- if (ext4_es_end(es) >= end)
- last_lclu = EXT4_B2C(sbi, end);
- else
- last_lclu = EXT4_B2C(sbi, ext4_es_end(es));
-
- if (first_lclu == last_counted_lclu)
- n += last_lclu - first_lclu;
- else
- n += last_lclu - first_lclu + 1;
- last_counted_lclu = last_lclu;
- }
- node = rb_next(&es->rb_node);
- if (!node)
- break;
- es = rb_entry(node, struct extent_status, rb_node);
- }
-
- return n;
-}
-
-/*
- * ext4_es_delayed_clu - count number of clusters containing blocks that
- * are both delayed and unwritten
- *
- * @inode - file containing block range
- * @lblk - logical block defining start of range
- * @len - number of blocks in range
- *
- * Locking for external use of __es_delayed_clu().
- */
-unsigned int ext4_es_delayed_clu(struct inode *inode, ext4_lblk_t lblk,
- ext4_lblk_t len)
-{
- struct ext4_inode_info *ei = EXT4_I(inode);
- ext4_lblk_t end;
- unsigned int n;
-
- if (len == 0)
- return 0;
-
- end = lblk + len - 1;
- WARN_ON(end < lblk);
-
- read_lock(&ei->i_es_lock);
-
- n = __es_delayed_clu(inode, lblk, end);
-
- read_unlock(&ei->i_es_lock);
-
- return n;
-}
-
/*
* __revise_pending - makes, cancels, or leaves unchanged pending cluster
* reservations for a specified block range depending
diff --git a/fs/ext4/extents_status.h b/fs/ext4/extents_status.h
index 3c8e2edee5d5..5b49cb3b9aff 100644
--- a/fs/ext4/extents_status.h
+++ b/fs/ext4/extents_status.h
@@ -252,8 +252,6 @@ extern bool ext4_is_pending(struct inode *inode, ext4_lblk_t lblk);
extern void ext4_es_insert_delayed_extent(struct inode *inode, ext4_lblk_t lblk,
ext4_lblk_t len, bool lclu_allocated,
bool end_allocated);
-extern unsigned int ext4_es_delayed_clu(struct inode *inode, ext4_lblk_t lblk,
- ext4_lblk_t len);
extern void ext4_clear_inode_es(struct inode *inode);
#endif /* _EXT4_EXTENTS_STATUS_H */
--
2.31.1
^ permalink raw reply related [flat|nested] 11+ messages in thread* [PATCH 08/10] ext4: use ext4_map_query_blocks() in ext4_map_blocks()
2024-06-01 3:41 [PATCH 00/10] ext4: simplify the counting and management of delalloc reserved blocks Zhang Yi
` (6 preceding siblings ...)
2024-06-01 3:41 ` [PATCH 07/10] ext4: drop ext4_es_delayed_clu() Zhang Yi
@ 2024-06-01 3:41 ` Zhang Yi
2024-06-01 3:41 ` [PATCH 09/10] ext4: drop ext4_es_is_delonly() Zhang Yi
2024-06-01 3:41 ` [PATCH 10/10] ext4: drop all delonly descriptions Zhang Yi
9 siblings, 0 replies; 11+ messages in thread
From: Zhang Yi @ 2024-06-01 3:41 UTC (permalink / raw)
To: linux-ext4
Cc: linux-fsdevel, linux-kernel, tytso, adilger.kernel, jack,
ritesh.list, yi.zhang, yi.zhang, chengzhihao1, yukuai3
From: Zhang Yi <yi.zhang@huawei.com>
The blocks map querying logic in ext4_map_blocks() are the same as
ext4_map_query_blocks(), so switch to directly use it.
Signed-off-by: Zhang Yi <yi.zhang@huawei.com>
---
fs/ext4/inode.c | 22 +---------------------
1 file changed, 1 insertion(+), 21 deletions(-)
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 0dde2bf078ba..46e151f26655 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -658,27 +658,7 @@ int ext4_map_blocks(handle_t *handle, struct inode *inode,
* file system block.
*/
down_read(&EXT4_I(inode)->i_data_sem);
- if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) {
- retval = ext4_ext_map_blocks(handle, inode, map, 0);
- } else {
- retval = ext4_ind_map_blocks(handle, inode, map, 0);
- }
- if (retval > 0) {
- unsigned int status;
-
- if (unlikely(retval != map->m_len)) {
- ext4_warning(inode->i_sb,
- "ES len assertion failed for inode "
- "%lu: retval %d != map->m_len %d",
- inode->i_ino, retval, map->m_len);
- WARN_ON(1);
- }
-
- status = map->m_flags & EXT4_MAP_UNWRITTEN ?
- EXTENT_STATUS_UNWRITTEN : EXTENT_STATUS_WRITTEN;
- ext4_es_insert_extent(inode, map->m_lblk, map->m_len,
- map->m_pblk, status);
- }
+ retval = ext4_map_query_blocks(handle, inode, map);
up_read((&EXT4_I(inode)->i_data_sem));
found:
--
2.31.1
^ permalink raw reply related [flat|nested] 11+ messages in thread* [PATCH 09/10] ext4: drop ext4_es_is_delonly()
2024-06-01 3:41 [PATCH 00/10] ext4: simplify the counting and management of delalloc reserved blocks Zhang Yi
` (7 preceding siblings ...)
2024-06-01 3:41 ` [PATCH 08/10] ext4: use ext4_map_query_blocks() in ext4_map_blocks() Zhang Yi
@ 2024-06-01 3:41 ` Zhang Yi
2024-06-01 3:41 ` [PATCH 10/10] ext4: drop all delonly descriptions Zhang Yi
9 siblings, 0 replies; 11+ messages in thread
From: Zhang Yi @ 2024-06-01 3:41 UTC (permalink / raw)
To: linux-ext4
Cc: linux-fsdevel, linux-kernel, tytso, adilger.kernel, jack,
ritesh.list, yi.zhang, yi.zhang, chengzhihao1, yukuai3
From: Zhang Yi <yi.zhang@huawei.com>
Since we don't add delayed flag in unwritten extents, so there is no
difference between ext4_es_is_delayed() and ext4_es_is_delonly(),
just drop ext4_es_is_delonly().
Signed-off-by: Zhang Yi <yi.zhang@huawei.com>
---
fs/ext4/extents_status.c | 18 +++++++++---------
fs/ext4/extents_status.h | 5 -----
fs/ext4/inode.c | 4 ++--
3 files changed, 11 insertions(+), 16 deletions(-)
diff --git a/fs/ext4/extents_status.c b/fs/ext4/extents_status.c
index f28435b2618f..db3fe3ada2e5 100644
--- a/fs/ext4/extents_status.c
+++ b/fs/ext4/extents_status.c
@@ -561,8 +561,8 @@ static int ext4_es_can_be_merged(struct extent_status *es1,
if (ext4_es_is_hole(es1))
return 1;
- /* we need to check delayed extent is without unwritten status */
- if (ext4_es_is_delayed(es1) && !ext4_es_is_unwritten(es1))
+ /* we need to check delayed extent */
+ if (ext4_es_is_delayed(es1))
return 1;
return 0;
@@ -1137,7 +1137,7 @@ static void count_rsvd(struct inode *inode, ext4_lblk_t lblk, long len,
struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
ext4_lblk_t i, end, nclu;
- if (!ext4_es_is_delonly(es))
+ if (!ext4_es_is_delayed(es))
return;
WARN_ON(len <= 0);
@@ -1289,7 +1289,7 @@ static unsigned int get_rsvd(struct inode *inode, ext4_lblk_t end,
es = rc->left_es;
while (es && ext4_es_end(es) >=
EXT4_LBLK_CMASK(sbi, rc->first_do_lblk)) {
- if (ext4_es_is_delonly(es)) {
+ if (ext4_es_is_delayed(es)) {
rc->ndelonly_cluster--;
left_delonly = true;
break;
@@ -1309,7 +1309,7 @@ static unsigned int get_rsvd(struct inode *inode, ext4_lblk_t end,
}
while (es && es->es_lblk <=
EXT4_LBLK_CFILL(sbi, rc->last_do_lblk)) {
- if (ext4_es_is_delonly(es)) {
+ if (ext4_es_is_delayed(es)) {
rc->ndelonly_cluster--;
right_delonly = true;
break;
@@ -2237,7 +2237,7 @@ static int __revise_pending(struct inode *inode, ext4_lblk_t lblk,
if (EXT4_B2C(sbi, lblk) == EXT4_B2C(sbi, end)) {
first = EXT4_LBLK_CMASK(sbi, lblk);
if (first != lblk)
- f_del = __es_scan_range(inode, &ext4_es_is_delonly,
+ f_del = __es_scan_range(inode, &ext4_es_is_delayed,
first, lblk - 1);
if (f_del) {
ret = __insert_pending(inode, first, prealloc);
@@ -2249,7 +2249,7 @@ static int __revise_pending(struct inode *inode, ext4_lblk_t lblk,
sbi->s_cluster_ratio - 1;
if (last != end)
l_del = __es_scan_range(inode,
- &ext4_es_is_delonly,
+ &ext4_es_is_delayed,
end + 1, last);
if (l_del) {
ret = __insert_pending(inode, last, prealloc);
@@ -2262,7 +2262,7 @@ static int __revise_pending(struct inode *inode, ext4_lblk_t lblk,
} else {
first = EXT4_LBLK_CMASK(sbi, lblk);
if (first != lblk)
- f_del = __es_scan_range(inode, &ext4_es_is_delonly,
+ f_del = __es_scan_range(inode, &ext4_es_is_delayed,
first, lblk - 1);
if (f_del) {
ret = __insert_pending(inode, first, prealloc);
@@ -2274,7 +2274,7 @@ static int __revise_pending(struct inode *inode, ext4_lblk_t lblk,
last = EXT4_LBLK_CMASK(sbi, end) + sbi->s_cluster_ratio - 1;
if (last != end)
- l_del = __es_scan_range(inode, &ext4_es_is_delonly,
+ l_del = __es_scan_range(inode, &ext4_es_is_delayed,
end + 1, last);
if (l_del) {
ret = __insert_pending(inode, last, prealloc);
diff --git a/fs/ext4/extents_status.h b/fs/ext4/extents_status.h
index 5b49cb3b9aff..e484c60e55e3 100644
--- a/fs/ext4/extents_status.h
+++ b/fs/ext4/extents_status.h
@@ -184,11 +184,6 @@ static inline int ext4_es_is_mapped(struct extent_status *es)
return (ext4_es_is_written(es) || ext4_es_is_unwritten(es));
}
-static inline int ext4_es_is_delonly(struct extent_status *es)
-{
- return (ext4_es_is_delayed(es) && !ext4_es_is_unwritten(es));
-}
-
static inline void ext4_es_set_referenced(struct extent_status *es)
{
es->es_pblk |= ((ext4_fsblk_t)EXTENT_STATUS_REFERENCED) << ES_SHIFT;
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 46e151f26655..f44f114a5c59 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -1645,7 +1645,7 @@ static int ext4_clu_alloc_state(struct inode *inode, ext4_lblk_t lblk)
int ret;
/* Has delalloc reservation? */
- if (ext4_es_scan_clu(inode, &ext4_es_is_delonly, lblk))
+ if (ext4_es_scan_clu(inode, &ext4_es_is_delayed, lblk))
return 1;
/* Already been allocated? */
@@ -1766,7 +1766,7 @@ static int ext4_da_map_blocks(struct inode *inode, struct ext4_map_blocks *map)
* Delayed extent could be allocated by fallocate.
* So we need to check it.
*/
- if (ext4_es_is_delonly(&es)) {
+ if (ext4_es_is_delayed(&es)) {
map->m_flags |= EXT4_MAP_DELAYED;
return 0;
}
--
2.31.1
^ permalink raw reply related [flat|nested] 11+ messages in thread* [PATCH 10/10] ext4: drop all delonly descriptions
2024-06-01 3:41 [PATCH 00/10] ext4: simplify the counting and management of delalloc reserved blocks Zhang Yi
` (8 preceding siblings ...)
2024-06-01 3:41 ` [PATCH 09/10] ext4: drop ext4_es_is_delonly() Zhang Yi
@ 2024-06-01 3:41 ` Zhang Yi
9 siblings, 0 replies; 11+ messages in thread
From: Zhang Yi @ 2024-06-01 3:41 UTC (permalink / raw)
To: linux-ext4
Cc: linux-fsdevel, linux-kernel, tytso, adilger.kernel, jack,
ritesh.list, yi.zhang, yi.zhang, chengzhihao1, yukuai3
From: Zhang Yi <yi.zhang@huawei.com>
Drop all delonly descriptions in parameters and comments.
Signed-off-by: Zhang Yi <yi.zhang@huawei.com>
---
fs/ext4/extents_status.c | 92 ++++++++++++++++++++--------------------
1 file changed, 45 insertions(+), 47 deletions(-)
diff --git a/fs/ext4/extents_status.c b/fs/ext4/extents_status.c
index db3fe3ada2e5..0af14b7d5005 100644
--- a/fs/ext4/extents_status.c
+++ b/fs/ext4/extents_status.c
@@ -142,8 +142,8 @@
*/
struct rsvd_info {
- int delonly_cluster; /* reserved clusters for delalloc es entry */
- int delonly_block; /* reserved blocks for delalloc es entry */
+ int delayed_cluster; /* reserved clusters for delalloc es entry */
+ int delayed_block; /* reserved blocks for delalloc es entry */
};
static struct kmem_cache *ext4_es_cachep;
@@ -943,10 +943,10 @@ void ext4_es_insert_extent(struct inode *inode, ext4_lblk_t lblk,
* so release the quota reservations made for any previously delayed
* allocated clusters.
*/
- resv_used = rinfo.delonly_cluster + pending;
+ resv_used = rinfo.delayed_cluster + pending;
if (resv_used)
ext4_da_update_reserve_space(inode, resv_used,
- rinfo.delonly_block);
+ rinfo.delayed_block);
if (err1 || err2 || err3 < 0)
goto retry;
@@ -1067,8 +1067,8 @@ int ext4_es_lookup_extent(struct inode *inode, ext4_lblk_t lblk,
}
struct rsvd_count {
- int ndelonly_cluster;
- int ndelonly_block;
+ int ndelayed_cluster;
+ int ndelayed_block;
bool first_do_lblk_found;
ext4_lblk_t first_do_lblk;
ext4_lblk_t last_do_lblk;
@@ -1094,11 +1094,11 @@ static void init_rsvd(struct inode *inode, ext4_lblk_t lblk,
struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
struct rb_node *node;
- rc->ndelonly_cluster = 0;
- rc->ndelonly_block = 0;
+ rc->ndelayed_cluster = 0;
+ rc->ndelayed_block = 0;
/*
- * for bigalloc, note the first delonly block in the range has not
+ * for bigalloc, note the first delayed block in the range has not
* been found, record the extent containing the block to the left of
* the region to be removed, if any, and note that there's no partial
* cluster to track
@@ -1118,9 +1118,8 @@ static void init_rsvd(struct inode *inode, ext4_lblk_t lblk,
}
/*
- * count_rsvd - count the clusters containing delayed and not unwritten
- * (delonly) blocks in a range within an extent and add to
- * the running tally in rsvd_count
+ * count_rsvd - count the clusters containing delayed blocks in a range
+ * within an extent and add to the running tally in rsvd_count
*
* @inode - file containing extent
* @lblk - first block in range
@@ -1143,19 +1142,19 @@ static void count_rsvd(struct inode *inode, ext4_lblk_t lblk, long len,
WARN_ON(len <= 0);
if (sbi->s_cluster_ratio == 1) {
- rc->ndelonly_cluster += (int) len;
- rc->ndelonly_block = rc->ndelonly_cluster;
+ rc->ndelayed_cluster += (int) len;
+ rc->ndelayed_block = rc->ndelayed_cluster;
return;
}
/* bigalloc */
- rc->ndelonly_block += (int)len;
+ rc->ndelayed_block += (int)len;
i = (lblk < es->es_lblk) ? es->es_lblk : lblk;
end = lblk + (ext4_lblk_t) len - 1;
end = (end > ext4_es_end(es)) ? ext4_es_end(es) : end;
- /* record the first block of the first delonly extent seen */
+ /* record the first block of the first delayed extent seen */
if (!rc->first_do_lblk_found) {
rc->first_do_lblk = i;
rc->first_do_lblk_found = true;
@@ -1169,7 +1168,7 @@ static void count_rsvd(struct inode *inode, ext4_lblk_t lblk, long len,
* doesn't start with it, count it and stop tracking
*/
if (rc->partial && (rc->lclu != EXT4_B2C(sbi, i))) {
- rc->ndelonly_cluster++;
+ rc->ndelayed_cluster++;
rc->partial = false;
}
@@ -1179,7 +1178,7 @@ static void count_rsvd(struct inode *inode, ext4_lblk_t lblk, long len,
*/
if (EXT4_LBLK_COFF(sbi, i) != 0) {
if (end >= EXT4_LBLK_CFILL(sbi, i)) {
- rc->ndelonly_cluster++;
+ rc->ndelayed_cluster++;
rc->partial = false;
i = EXT4_LBLK_CFILL(sbi, i) + 1;
}
@@ -1187,11 +1186,11 @@ static void count_rsvd(struct inode *inode, ext4_lblk_t lblk, long len,
/*
* if the current cluster starts on a cluster boundary, count the
- * number of whole delonly clusters in the extent
+ * number of whole delayed clusters in the extent
*/
if ((i + sbi->s_cluster_ratio - 1) <= end) {
nclu = (end - i + 1) >> sbi->s_cluster_bits;
- rc->ndelonly_cluster += nclu;
+ rc->ndelayed_cluster += nclu;
i += nclu << sbi->s_cluster_bits;
}
@@ -1251,10 +1250,9 @@ static struct pending_reservation *__pr_tree_search(struct rb_root *root,
* @rc - pointer to reserved count data
*
* The number of reservations to be released is equal to the number of
- * clusters containing delayed and not unwritten (delonly) blocks within
- * the range, minus the number of clusters still containing delonly blocks
- * at the ends of the range, and minus the number of pending reservations
- * within the range.
+ * clusters containing delayed blocks within the range, minus the number of
+ * clusters still containing delayed blocks at the ends of the range, and
+ * minus the number of pending reservations within the range.
*/
static unsigned int get_rsvd(struct inode *inode, ext4_lblk_t end,
struct extent_status *right_es,
@@ -1265,33 +1263,33 @@ static unsigned int get_rsvd(struct inode *inode, ext4_lblk_t end,
struct ext4_pending_tree *tree = &EXT4_I(inode)->i_pending_tree;
struct rb_node *node;
ext4_lblk_t first_lclu, last_lclu;
- bool left_delonly, right_delonly, count_pending;
+ bool left_delayed, right_delayed, count_pending;
struct extent_status *es;
if (sbi->s_cluster_ratio > 1) {
/* count any remaining partial cluster */
if (rc->partial)
- rc->ndelonly_cluster++;
+ rc->ndelayed_cluster++;
- if (rc->ndelonly_cluster == 0)
+ if (rc->ndelayed_cluster == 0)
return 0;
first_lclu = EXT4_B2C(sbi, rc->first_do_lblk);
last_lclu = EXT4_B2C(sbi, rc->last_do_lblk);
/*
- * decrease the delonly count by the number of clusters at the
- * ends of the range that still contain delonly blocks -
+ * decrease the delayed count by the number of clusters at the
+ * ends of the range that still contain delayed blocks -
* these clusters still need to be reserved
*/
- left_delonly = right_delonly = false;
+ left_delayed = right_delayed = false;
es = rc->left_es;
while (es && ext4_es_end(es) >=
EXT4_LBLK_CMASK(sbi, rc->first_do_lblk)) {
if (ext4_es_is_delayed(es)) {
- rc->ndelonly_cluster--;
- left_delonly = true;
+ rc->ndelayed_cluster--;
+ left_delayed = true;
break;
}
node = rb_prev(&es->rb_node);
@@ -1299,7 +1297,7 @@ static unsigned int get_rsvd(struct inode *inode, ext4_lblk_t end,
break;
es = rb_entry(node, struct extent_status, rb_node);
}
- if (right_es && (!left_delonly || first_lclu != last_lclu)) {
+ if (right_es && (!left_delayed || first_lclu != last_lclu)) {
if (end < ext4_es_end(right_es)) {
es = right_es;
} else {
@@ -1310,8 +1308,8 @@ static unsigned int get_rsvd(struct inode *inode, ext4_lblk_t end,
while (es && es->es_lblk <=
EXT4_LBLK_CFILL(sbi, rc->last_do_lblk)) {
if (ext4_es_is_delayed(es)) {
- rc->ndelonly_cluster--;
- right_delonly = true;
+ rc->ndelayed_cluster--;
+ right_delayed = true;
break;
}
node = rb_next(&es->rb_node);
@@ -1325,21 +1323,21 @@ static unsigned int get_rsvd(struct inode *inode, ext4_lblk_t end,
/*
* Determine the block range that should be searched for
* pending reservations, if any. Clusters on the ends of the
- * original removed range containing delonly blocks are
+ * original removed range containing delayed blocks are
* excluded. They've already been accounted for and it's not
* possible to determine if an associated pending reservation
* should be released with the information available in the
* extents status tree.
*/
if (first_lclu == last_lclu) {
- if (left_delonly | right_delonly)
+ if (left_delayed | right_delayed)
count_pending = false;
else
count_pending = true;
} else {
- if (left_delonly)
+ if (left_delayed)
first_lclu++;
- if (right_delonly)
+ if (right_delayed)
last_lclu--;
if (first_lclu <= last_lclu)
count_pending = true;
@@ -1350,13 +1348,13 @@ static unsigned int get_rsvd(struct inode *inode, ext4_lblk_t end,
/*
* a pending reservation found between first_lclu and last_lclu
* represents an allocated cluster that contained at least one
- * delonly block, so the delonly total must be reduced by one
+ * delayed block, so the delayed total must be reduced by one
* for each pending reservation found and released
*/
if (count_pending) {
pr = __pr_tree_search(&tree->root, first_lclu);
while (pr && pr->lclu <= last_lclu) {
- rc->ndelonly_cluster--;
+ rc->ndelayed_cluster--;
node = rb_next(&pr->rb_node);
rb_erase(&pr->rb_node, &tree->root);
__free_pending(pr);
@@ -1367,7 +1365,7 @@ static unsigned int get_rsvd(struct inode *inode, ext4_lblk_t end,
}
}
}
- return rc->ndelonly_cluster;
+ return rc->ndelayed_cluster;
}
@@ -1401,8 +1399,8 @@ static int __es_remove_extent(struct inode *inode, ext4_lblk_t lblk,
struct rsvd_count rc;
if (rinfo) {
- rinfo->delonly_cluster = 0;
- rinfo->delonly_block = 0;
+ rinfo->delayed_cluster = 0;
+ rinfo->delayed_block = 0;
if (test_opt(inode->i_sb, DELALLOC))
count_reserved = true;
}
@@ -1504,8 +1502,8 @@ static int __es_remove_extent(struct inode *inode, ext4_lblk_t lblk,
out_get_reserved:
if (count_reserved) {
- rinfo->delonly_cluster = get_rsvd(inode, end, es, &rc);
- rinfo->delonly_block = rc.ndelonly_block;
+ rinfo->delayed_cluster = get_rsvd(inode, end, es, &rc);
+ rinfo->delayed_block = rc.ndelayed_block;
}
out:
return err;
@@ -1563,7 +1561,7 @@ void ext4_es_remove_extent(struct inode *inode, ext4_lblk_t lblk,
goto retry;
ext4_es_print_tree(inode);
- ext4_da_release_space(inode, rinfo.delonly_cluster);
+ ext4_da_release_space(inode, rinfo.delayed_cluster);
return;
}
--
2.31.1
^ permalink raw reply related [flat|nested] 11+ messages in thread