All of lore.kernel.org
 help / color / mirror / Atom feed
From: Baokun Li <libaokun@linux.alibaba.com>
To: linux-ext4@vger.kernel.org
Cc: linux-crypto@vger.kernel.org, ebiggers@kernel.org,
	ardb@kernel.org, tytso@mit.edu, adilger.kernel@dilger.ca,
	jack@suse.cz, yi.zhang@huawei.com, ojaswin@linux.ibm.com,
	ritesh.list@gmail.com, Baokun Li <libaokun@linux.alibaba.com>
Subject: [PATCH RFC 15/17] ext4: use fast incremental CRC update in __ext4_new_inode()
Date: Fri,  8 May 2026 20:15:37 +0800	[thread overview]
Message-ID: <20260508121539.4174601-16-libaokun@linux.alibaba.com> (raw)
In-Reply-To: <20260508121539.4174601-1-libaokun@linux.alibaba.com>

Merge the bitmap modification and group descriptor update into a single
group lock acquisition in __ext4_new_inode(). Previously the bitmap bit
was set under one lock/unlock pair, and the GDP fields (UNINIT,
itable_unused, free_inodes, dirs, csum) were updated under a separate
lock/unlock pair with a gap in between. Another thread could modify the
bitmap and update the checksum during that gap, making incremental CRC
incorrect.

Now the full sequence -- set bit, update free inodes, clear UNINIT,
update itable_unused, and compute checksum -- happens atomically under
the same ext4_lock_group(). The alloc_sem is acquired before the group
lock to maintain correct locking order with itable lazyinit.

Use ext4_inode_bitmap_csum_set_fast() for the normal path where the
stored checksum is valid. When EXT4_BG_INODE_UNINIT is set, fall back
to ext4_inode_bitmap_csum_set() for a full recalculation to establish
a correct baseline (mkfs leaves the checksum as zero for UNINIT groups).

Signed-off-by: Baokun Li <libaokun@linux.alibaba.com>
---
 fs/ext4/ialloc.c | 129 +++++++++++++++++++++++------------------------
 1 file changed, 63 insertions(+), 66 deletions(-)

diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c
index 8b75b331b26e..9dd1cdb367ba 100644
--- a/fs/ext4/ialloc.c
+++ b/fs/ext4/ialloc.c
@@ -1135,7 +1135,25 @@ struct inode *__ext4_new_inode(struct mnt_idmap *idmap,
 			ext4_std_error(sb, err);
 			goto out;
 		}
+
+		BUFFER_TRACE(group_desc_bh, "get_write_access");
+		err = ext4_journal_get_write_access(handle, sb, group_desc_bh,
+						    EXT4_JTR_NONE);
+		if (err) {
+			ext4_std_error(sb, err);
+			goto out;
+		}
+
+		/* We may have to initialize the block bitmap if it isn't already */
+		err = ext4_might_init_block_bitmap(handle, sb, group, gdp);
+		if (err)
+			goto out;
+
+		if (ext4_has_group_desc_csum(sb) &&
+		    !(sbi->s_mount_state & EXT4_FC_REPLAY))
+			down_read(&grp->alloc_sem);
 		ext4_lock_group(sb, group);
+
 		ret2 = ext4_test_and_set_bit(bit, inode_bitmap_bh->b_data);
 		if (ret2) {
 			/* Someone already took the bit. Repeat the search
@@ -1147,9 +1165,54 @@ struct inode *__ext4_new_inode(struct mnt_idmap *idmap,
 				ret2 = 0;
 			} else {
 				ret2 = 1; /* we didn't grab the inode */
+				goto unlock_group;
+			}
+		}
+
+		/* Update the relevant bg descriptor fields */
+		ext4_free_inodes_set(sb, gdp,
+				     ext4_free_inodes_count(sb, gdp) - 1);
+		if (S_ISDIR(mode)) {
+			ext4_used_dirs_set(sb, gdp,
+					   ext4_used_dirs_count(sb, gdp) + 1);
+			if (sbi->s_log_groups_per_flex) {
+				ext4_group_t f = ext4_flex_group(sbi, group);
+				atomic_inc(&sbi_array_rcu_deref(sbi, s_flex_groups,
+								f)->used_dirs);
+			}
+		}
+
+		if (ext4_has_group_desc_csum(sb)) {
+			bool fast_crc = true;
+			int free = EXT4_INODES_PER_GROUP(sb) -
+					ext4_itable_unused_count(sb, gdp);
+
+			if (gdp->bg_flags & cpu_to_le16(EXT4_BG_INODE_UNINIT)) {
+				gdp->bg_flags &= cpu_to_le16(~EXT4_BG_INODE_UNINIT);
+				free = 0;
+				/* Incremental CRC needs a valid csum baseline */
+				fast_crc = false;
 			}
+			/*
+			 * Check the relative inode number against the
+			 * last used relative inode number in this group.
+			 * If it is greater we need to update the
+			 * bg_itable_unused count.
+			 */
+			if (bit >= free)
+				ext4_itable_unused_set(sb, gdp,
+					EXT4_INODES_PER_GROUP(sb) - bit - 1);
+			if (fast_crc)
+				ext4_inode_bitmap_csum_set_fast(sb, gdp, bit);
+			else
+				ext4_inode_bitmap_csum_set(sb, gdp, inode_bitmap_bh);
+			ext4_group_desc_csum_set(sb, group, gdp);
 		}
+unlock_group:
 		ext4_unlock_group(sb, group);
+		if (ext4_has_group_desc_csum(sb) &&
+		    !(sbi->s_mount_state & EXT4_FC_REPLAY))
+			up_read(&grp->alloc_sem);
 		if (!ret2)
 			goto got; /* we grabbed the inode! */
 
@@ -1168,72 +1231,6 @@ struct inode *__ext4_new_inode(struct mnt_idmap *idmap,
 		goto out;
 	}
 
-	BUFFER_TRACE(group_desc_bh, "get_write_access");
-	err = ext4_journal_get_write_access(handle, sb, group_desc_bh,
-					    EXT4_JTR_NONE);
-	if (err) {
-		ext4_std_error(sb, err);
-		goto out;
-	}
-
-	/* We may have to initialize the block bitmap if it isn't already */
-	err = ext4_might_init_block_bitmap(handle, sb, group, gdp);
-	if (err)
-		goto out;
-
-	/* Update the relevant bg descriptor fields */
-	if (ext4_has_group_desc_csum(sb)) {
-		int free;
-		struct ext4_group_info *grp = NULL;
-
-		if (!(sbi->s_mount_state & EXT4_FC_REPLAY)) {
-			grp = ext4_get_group_info(sb, group);
-			if (!grp) {
-				err = -EFSCORRUPTED;
-				goto out;
-			}
-			down_read(&grp->alloc_sem); /*
-						     * protect vs itable
-						     * lazyinit
-						     */
-		}
-		ext4_lock_group(sb, group); /* while we modify the bg desc */
-		free = EXT4_INODES_PER_GROUP(sb) -
-			ext4_itable_unused_count(sb, gdp);
-		if (gdp->bg_flags & cpu_to_le16(EXT4_BG_INODE_UNINIT)) {
-			gdp->bg_flags &= cpu_to_le16(~EXT4_BG_INODE_UNINIT);
-			free = 0;
-		}
-		/*
-		 * Check the relative inode number against the last used
-		 * relative inode number in this group. if it is greater
-		 * we need to update the bg_itable_unused count
-		 */
-		if (bit >= free)
-			ext4_itable_unused_set(sb, gdp,
-					(EXT4_INODES_PER_GROUP(sb) - bit - 1));
-		if (!(sbi->s_mount_state & EXT4_FC_REPLAY))
-			up_read(&grp->alloc_sem);
-	} else {
-		ext4_lock_group(sb, group);
-	}
-
-	ext4_free_inodes_set(sb, gdp, ext4_free_inodes_count(sb, gdp) - 1);
-	if (S_ISDIR(mode)) {
-		ext4_used_dirs_set(sb, gdp, ext4_used_dirs_count(sb, gdp) + 1);
-		if (sbi->s_log_groups_per_flex) {
-			ext4_group_t f = ext4_flex_group(sbi, group);
-
-			atomic_inc(&sbi_array_rcu_deref(sbi, s_flex_groups,
-							f)->used_dirs);
-		}
-	}
-	if (ext4_has_group_desc_csum(sb)) {
-		ext4_inode_bitmap_csum_set(sb, gdp, inode_bitmap_bh);
-		ext4_group_desc_csum_set(sb, group, gdp);
-	}
-	ext4_unlock_group(sb, group);
-
 	BUFFER_TRACE(group_desc_bh, "call ext4_handle_dirty_metadata");
 	err = ext4_handle_dirty_metadata(handle, NULL, group_desc_bh);
 	if (err) {
-- 
2.43.7


  parent reply	other threads:[~2026-05-08 12:16 UTC|newest]

Thread overview: 26+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2026-05-08 12:15 [PATCH RFC 00/17] ext4/lib-crc: LBS performance part 1 - incremental CRC32c for bitmap checksums Baokun Li
2026-05-08 12:15 ` [PATCH RFC 01/17] lib/crc: add crc32c_flip_range() for incremental CRC update Baokun Li
     [not found]   ` <20260508204019.9E5A5C2BCB0@smtp.kernel.org>
2026-05-10  9:44     ` Baokun Li
2026-05-14  3:52   ` Eric Biggers
2026-05-08 12:15 ` [PATCH RFC 02/17] lib/crc: crc_kunit: add kunit test for crc32c_flip_range() Baokun Li
2026-05-08 12:15 ` [PATCH RFC 03/17] lib/crc: crc_kunit: add benchmark " Baokun Li
     [not found]   ` <20260508205415.8B843C2BCB0@smtp.kernel.org>
2026-05-10 10:03     ` Baokun Li
2026-05-08 12:15 ` [PATCH RFC 04/17] ext4: fix incorrect block bitmap free clusters update on metadata overlap Baokun Li
     [not found]   ` <20260508211732.E50B4C2BCB0@smtp.kernel.org>
2026-05-11  6:17     ` Baokun Li
2026-05-08 12:15 ` [PATCH RFC 05/17] ext4: extract block bitmap checksum get and store helpers Baokun Li
2026-05-08 12:15 ` [PATCH RFC 06/17] ext4: add ext4_block_bitmap_csum_set_range() for incremental checksum update Baokun Li
     [not found]   ` <20260508214640.B3A74C2BCB0@smtp.kernel.org>
2026-05-11  8:09     ` Baokun Li
2026-05-11  8:31     ` Baokun Li
2026-05-08 12:15 ` [PATCH RFC 07/17] ext4: use fast incremental CRC update in ext4_mb_mark_context() Baokun Li
     [not found]   ` <20260508223130.20E7AC2BCB0@smtp.kernel.org>
2026-05-11  8:15     ` Baokun Li
2026-05-08 12:15 ` [PATCH RFC 08/17] ext4: extract inode bitmap checksum get and store helpers Baokun Li
2026-05-08 12:15 ` [PATCH RFC 09/17] ext4: add ext4_inode_bitmap_csum_set_fast() for incremental checksum update Baokun Li
     [not found]   ` <20260508225807.71D9FC2BCB0@smtp.kernel.org>
2026-05-11  8:35     ` Baokun Li
2026-05-08 12:15 ` [PATCH RFC 10/17] ext4: use fast incremental CRC update in ext4_free_inode() Baokun Li
2026-05-08 12:15 ` [PATCH RFC 11/17] ext4: fix missing bg_used_dirs_count update in fast commit replay Baokun Li
2026-05-08 12:15 ` [PATCH RFC 12/17] ext4: factor out ext4_might_init_block_bitmap() helper Baokun Li
2026-05-08 12:15 ` [PATCH RFC 13/17] ext4: use fast incremental CRC update in ext4_mark_inode_used() Baokun Li
2026-05-08 12:15 ` [PATCH RFC 14/17] ext4: rename ino to bit in __ext4_new_inode() Baokun Li
2026-05-08 12:15 ` Baokun Li [this message]
2026-05-08 12:15 ` [PATCH RFC 16/17] ext4: extract ext4_update_inode_group_desc() to reduce duplication Baokun Li
2026-05-08 12:15 ` [PATCH RFC 17/17] ext4: add ext4_get_flex_group() helper to simplify flex group lookups Baokun Li

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20260508121539.4174601-16-libaokun@linux.alibaba.com \
    --to=libaokun@linux.alibaba.com \
    --cc=adilger.kernel@dilger.ca \
    --cc=ardb@kernel.org \
    --cc=ebiggers@kernel.org \
    --cc=jack@suse.cz \
    --cc=linux-crypto@vger.kernel.org \
    --cc=linux-ext4@vger.kernel.org \
    --cc=ojaswin@linux.ibm.com \
    --cc=ritesh.list@gmail.com \
    --cc=tytso@mit.edu \
    --cc=yi.zhang@huawei.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.