From: Harshad Shirwadkar <harshadshirwadkar@gmail.com>
To: linux-ext4@vger.kernel.org
Cc: tytso@mit.edu, Harshad Shirwadkar <harshadshirwadkar@gmail.com>
Subject: [PATCH v6 18/20] ext4: disable certain features in replay path
Date: Wed, 8 Apr 2020 14:55:28 -0700 [thread overview]
Message-ID: <20200408215530.25649-18-harshads@google.com> (raw)
In-Reply-To: <20200408215530.25649-1-harshads@google.com>
From: Harshad Shirwadkar <harshadshirwadkar@gmail.com>
Replay path uses similar code paths for replaying committed changes.
But since it runs before full initialization of the file system and
also since we don't have to be super careful about performance, we
can and need to disable certain file system features during the replay
path. More specifically, we disable most of the extent status tree
stuff, mballoc and some places where we mark file system with errors.
Signed-off-by: Harshad Shirwadkar <harshadshirwadkar@gmail.com>
---
fs/ext4/balloc.c | 7 +++++-
fs/ext4/ext4_jbd2.c | 2 +-
fs/ext4/extents_status.c | 24 +++++++++++++++++++
fs/ext4/ialloc.c | 52 +++++++++++++++++++++++++++-------------
fs/ext4/inode.c | 14 +++++++----
fs/ext4/mballoc.c | 21 ++++++++++------
6 files changed, 90 insertions(+), 30 deletions(-)
diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c
index 25960bb4fe69..220d818030ef 100644
--- a/fs/ext4/balloc.c
+++ b/fs/ext4/balloc.c
@@ -368,7 +368,12 @@ static int ext4_validate_block_bitmap(struct super_block *sb,
struct buffer_head *bh)
{
ext4_fsblk_t blk;
- struct ext4_group_info *grp = ext4_get_group_info(sb, block_group);
+ struct ext4_group_info *grp;
+
+ if (EXT4_SB(sb)->s_mount_state & EXT4_FC_REPLAY)
+ return 0;
+
+ grp = ext4_get_group_info(sb, block_group);
if (buffer_verified(bh))
return 0;
diff --git a/fs/ext4/ext4_jbd2.c b/fs/ext4/ext4_jbd2.c
index bbba1b067fdf..b9b3833c8fdd 100644
--- a/fs/ext4/ext4_jbd2.c
+++ b/fs/ext4/ext4_jbd2.c
@@ -101,7 +101,7 @@ handle_t *__ext4_journal_start_sb(struct super_block *sb, unsigned int line,
return ERR_PTR(err);
journal = EXT4_SB(sb)->s_journal;
- if (!journal)
+ if (!journal || (EXT4_SB(sb)->s_mount_state & EXT4_FC_REPLAY))
return ext4_get_nojournal();
return jbd2__journal_start(journal, blocks, rsv_blocks, revoke_creds,
GFP_NOFS, type, line);
diff --git a/fs/ext4/extents_status.c b/fs/ext4/extents_status.c
index d996b44d2265..69c16ac7416e 100644
--- a/fs/ext4/extents_status.c
+++ b/fs/ext4/extents_status.c
@@ -311,6 +311,9 @@ void ext4_es_find_extent_range(struct inode *inode,
ext4_lblk_t lblk, ext4_lblk_t end,
struct extent_status *es)
{
+ if (EXT4_SB(inode->i_sb)->s_mount_state & EXT4_FC_REPLAY)
+ return;
+
trace_ext4_es_find_extent_range_enter(inode, lblk);
read_lock(&EXT4_I(inode)->i_es_lock);
@@ -361,6 +364,9 @@ bool ext4_es_scan_range(struct inode *inode,
{
bool ret;
+ if (EXT4_SB(inode->i_sb)->s_mount_state & EXT4_FC_REPLAY)
+ return false;
+
read_lock(&EXT4_I(inode)->i_es_lock);
ret = __es_scan_range(inode, matching_fn, lblk, end);
read_unlock(&EXT4_I(inode)->i_es_lock);
@@ -404,6 +410,9 @@ bool ext4_es_scan_clu(struct inode *inode,
{
bool ret;
+ if (EXT4_SB(inode->i_sb)->s_mount_state & EXT4_FC_REPLAY)
+ return false;
+
read_lock(&EXT4_I(inode)->i_es_lock);
ret = __es_scan_clu(inode, matching_fn, lblk);
read_unlock(&EXT4_I(inode)->i_es_lock);
@@ -812,6 +821,9 @@ int ext4_es_insert_extent(struct inode *inode, ext4_lblk_t lblk,
int err = 0;
struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
+ if (EXT4_SB(inode->i_sb)->s_mount_state & EXT4_FC_REPLAY)
+ return 0;
+
es_debug("add [%u/%u) %llu %x to extent status tree of inode %lu\n",
lblk, len, pblk, status, inode->i_ino);
@@ -873,6 +885,9 @@ void ext4_es_cache_extent(struct inode *inode, ext4_lblk_t lblk,
struct extent_status newes;
ext4_lblk_t end = lblk + len - 1;
+ if (EXT4_SB(inode->i_sb)->s_mount_state & EXT4_FC_REPLAY)
+ return;
+
newes.es_lblk = lblk;
newes.es_len = len;
ext4_es_store_pblock_status(&newes, pblk, status);
@@ -908,6 +923,9 @@ int ext4_es_lookup_extent(struct inode *inode, ext4_lblk_t lblk,
struct rb_node *node;
int found = 0;
+ if (EXT4_SB(inode->i_sb)->s_mount_state & EXT4_FC_REPLAY)
+ return 0;
+
trace_ext4_es_lookup_extent_enter(inode, lblk);
es_debug("lookup extent in block %u\n", lblk);
@@ -1419,6 +1437,9 @@ int ext4_es_remove_extent(struct inode *inode, ext4_lblk_t lblk,
int err = 0;
int reserved = 0;
+ if (EXT4_SB(inode->i_sb)->s_mount_state & EXT4_FC_REPLAY)
+ return 0;
+
trace_ext4_es_remove_extent(inode, lblk, len);
es_debug("remove [%u/%u) from extent status tree of inode %lu\n",
lblk, len, inode->i_ino);
@@ -1969,6 +1990,9 @@ int ext4_es_insert_delayed_block(struct inode *inode, ext4_lblk_t lblk,
struct extent_status newes;
int err = 0;
+ if (EXT4_SB(inode->i_sb)->s_mount_state & EXT4_FC_REPLAY)
+ return 0;
+
es_debug("add [%u/1) delayed to extent status tree of inode %lu\n",
lblk, inode->i_ino);
diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c
index 7ecac922d8a4..3ccdf7834ab6 100644
--- a/fs/ext4/ialloc.c
+++ b/fs/ext4/ialloc.c
@@ -82,7 +82,12 @@ static int ext4_validate_inode_bitmap(struct super_block *sb,
struct buffer_head *bh)
{
ext4_fsblk_t blk;
- struct ext4_group_info *grp = ext4_get_group_info(sb, block_group);
+ struct ext4_group_info *grp;
+
+ if (EXT4_SB(sb)->s_mount_state & EXT4_FC_REPLAY)
+ return 0;
+
+ grp = ext4_get_group_info(sb, block_group);
if (buffer_verified(bh))
return 0;
@@ -284,15 +289,17 @@ void ext4_free_inode(handle_t *handle, struct inode *inode)
bit = (ino - 1) % EXT4_INODES_PER_GROUP(sb);
bitmap_bh = ext4_read_inode_bitmap(sb, block_group);
/* Don't bother if the inode bitmap is corrupt. */
- grp = ext4_get_group_info(sb, block_group);
if (IS_ERR(bitmap_bh)) {
fatal = PTR_ERR(bitmap_bh);
bitmap_bh = NULL;
goto error_return;
}
- if (unlikely(EXT4_MB_GRP_IBITMAP_CORRUPT(grp))) {
- fatal = -EFSCORRUPTED;
- goto error_return;
+ if (!(sbi->s_mount_state & EXT4_FC_REPLAY)) {
+ grp = ext4_get_group_info(sb, block_group);
+ if (unlikely(EXT4_MB_GRP_IBITMAP_CORRUPT(grp))) {
+ fatal = -EFSCORRUPTED;
+ goto error_return;
+ }
}
BUFFER_TRACE(bitmap_bh, "get_write_access");
@@ -884,7 +891,7 @@ struct inode *__ext4_new_inode(handle_t *handle, struct inode *dir,
struct inode *ret;
ext4_group_t i;
ext4_group_t flex_group;
- struct ext4_group_info *grp;
+ struct ext4_group_info *grp = NULL;
int encrypt = 0;
/* Cannot create files in a deleted directory */
@@ -1022,15 +1029,21 @@ struct inode *__ext4_new_inode(handle_t *handle, struct inode *dir,
if (ext4_free_inodes_count(sb, gdp) == 0)
goto next_group;
- grp = ext4_get_group_info(sb, group);
- /* Skip groups with already-known suspicious inode tables */
- if (EXT4_MB_GRP_IBITMAP_CORRUPT(grp))
- goto next_group;
+ if (!(sbi->s_mount_state & EXT4_FC_REPLAY)) {
+ grp = ext4_get_group_info(sb, group);
+ /*
+ * Skip groups with already-known suspicious inode
+ * tables
+ */
+ if (EXT4_MB_GRP_IBITMAP_CORRUPT(grp))
+ goto next_group;
+ }
brelse(inode_bitmap_bh);
inode_bitmap_bh = ext4_read_inode_bitmap(sb, group);
/* Skip groups with suspicious inode tables */
- if (EXT4_MB_GRP_IBITMAP_CORRUPT(grp) ||
+ if (((!(sbi->s_mount_state & EXT4_FC_REPLAY))
+ && EXT4_MB_GRP_IBITMAP_CORRUPT(grp)) ||
IS_ERR(inode_bitmap_bh)) {
inode_bitmap_bh = NULL;
goto next_group;
@@ -1049,7 +1062,7 @@ struct inode *__ext4_new_inode(handle_t *handle, struct inode *dir,
goto next_group;
}
- if (!handle) {
+ if ((!(sbi->s_mount_state & EXT4_FC_REPLAY)) && !handle) {
BUG_ON(nblocks <= 0);
handle = __ext4_journal_start_sb(dir->i_sb, line_no,
handle_type, nblocks, 0,
@@ -1153,9 +1166,15 @@ struct inode *__ext4_new_inode(handle_t *handle, struct inode *dir,
/* Update the relevant bg descriptor fields */
if (ext4_has_group_desc_csum(sb)) {
int free;
- struct ext4_group_info *grp = ext4_get_group_info(sb, group);
-
- down_read(&grp->alloc_sem); /* protect vs itable lazyinit */
+ struct ext4_group_info *grp = NULL;
+
+ if (!(sbi->s_mount_state & EXT4_FC_REPLAY)) {
+ grp = ext4_get_group_info(sb, group);
+ down_read(&grp->alloc_sem); /*
+ * protect vs itable
+ * lazyinit
+ */
+ }
ext4_lock_group(sb, group); /* while we modify the bg desc */
free = EXT4_INODES_PER_GROUP(sb) -
ext4_itable_unused_count(sb, gdp);
@@ -1171,7 +1190,8 @@ struct inode *__ext4_new_inode(handle_t *handle, struct inode *dir,
if (ino > free)
ext4_itable_unused_set(sb, gdp,
(EXT4_INODES_PER_GROUP(sb) - ino));
- up_read(&grp->alloc_sem);
+ if (!(sbi->s_mount_state & EXT4_FC_REPLAY))
+ up_read(&grp->alloc_sem);
} else {
ext4_lock_group(sb, group);
}
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index b5ca07497bbc..d6e5ffce5cf7 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -508,7 +508,8 @@ int ext4_map_blocks(handle_t *handle, struct inode *inode,
return -EFSCORRUPTED;
/* Lookup extent status tree firstly */
- if (ext4_es_lookup_extent(inode, map->m_lblk, NULL, &es)) {
+ if (!(EXT4_SB(inode->i_sb)->s_mount_state & EXT4_FC_REPLAY) &&
+ ext4_es_lookup_extent(inode, map->m_lblk, NULL, &es)) {
if (ext4_es_is_written(&es) || ext4_es_is_unwritten(&es)) {
map->m_pblk = ext4_es_pblock(&es) +
map->m_lblk - es.es_lblk;
@@ -820,7 +821,8 @@ struct buffer_head *ext4_getblk(handle_t *handle, struct inode *inode,
int create = map_flags & EXT4_GET_BLOCKS_CREATE;
int err;
- J_ASSERT(handle != NULL || create == 0);
+ J_ASSERT((EXT4_SB(inode->i_sb)->s_mount_state | EXT4_FC_REPLAY)
+ || handle != NULL || create == 0);
map.m_lblk = block;
map.m_len = 1;
@@ -836,7 +838,8 @@ struct buffer_head *ext4_getblk(handle_t *handle, struct inode *inode,
return ERR_PTR(-ENOMEM);
if (map.m_flags & EXT4_MAP_NEW) {
J_ASSERT(create != 0);
- J_ASSERT(handle != NULL);
+ J_ASSERT((EXT4_SB(inode->i_sb)->s_mount_state & EXT4_FC_REPLAY)
+ || (handle != NULL));
/*
* Now that we do not always journal data, we should
@@ -4631,8 +4634,9 @@ struct inode *__ext4_iget(struct super_block *sb, unsigned long ino,
if (!ext4_inode_csum_verify(inode, raw_inode, ei) ||
ext4_simulate_fail(sb, EXT4_SIM_INODE_CRC)) {
- ext4_error_inode_err(inode, function, line, 0, EFSBADCRC,
- "iget: checksum invalid");
+ if (!(EXT4_SB(sb)->s_mount_state & EXT4_FC_REPLAY))
+ ext4_error_inode_err(inode, function, line, 0,
+ EFSBADCRC, "iget: checksum invalid");
ret = -EFSBADCRC;
goto bad_inode;
}
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index db08208c1137..d6cdcc3e125c 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -1449,14 +1449,16 @@ static void mb_free_blocks(struct inode *inode, struct ext4_buddy *e4b,
blocknr = ext4_group_first_block_no(sb, e4b->bd_group);
blocknr += EXT4_C2B(sbi, block);
- ext4_grp_locked_error(sb, e4b->bd_group,
- inode ? inode->i_ino : 0,
- blocknr,
- "freeing already freed block "
- "(bit %u); block bitmap corrupt.",
- block);
- ext4_mark_group_bitmap_corrupted(sb, e4b->bd_group,
+ if (!(sbi->s_mount_state & EXT4_FC_REPLAY)) {
+ ext4_grp_locked_error(sb, e4b->bd_group,
+ inode ? inode->i_ino : 0,
+ blocknr,
+ "freeing already freed block (bit %u); block bitmap corrupt.",
+ block);
+ ext4_mark_group_bitmap_corrupted(
+ sb, e4b->bd_group,
EXT4_GROUP_INFO_BBITMAP_CORRUPT);
+ }
mb_regenerate_buddy(e4b);
goto done;
}
@@ -4116,6 +4118,9 @@ void ext4_discard_preallocations(struct inode *inode)
return;
}
+ if (EXT4_SB(sb)->s_mount_state & EXT4_FC_REPLAY)
+ return;
+
mb_debug(1, "discard preallocation for inode %lu\n", inode->i_ino);
trace_ext4_discard_preallocations(inode);
@@ -4591,6 +4596,8 @@ ext4_fsblk_t ext4_mb_new_blocks(handle_t *handle,
sb = ar->inode->i_sb;
sbi = EXT4_SB(sb);
+ WARN_ON(sbi->s_mount_state & EXT4_FC_REPLAY);
+
trace_ext4_request_blocks(ar);
/* Allow to use superuser reservation for quota file */
--
2.26.0.110.g2183baf09c-goog
next prev parent reply other threads:[~2020-04-08 21:56 UTC|newest]
Thread overview: 24+ messages / expand[flat|nested] mbox.gz Atom feed top
2020-04-08 21:55 [PATCH v6 01/20] ext4: update docs for fast commit feature Harshad Shirwadkar
2020-04-08 21:55 ` [PATCH v6 02/20] ext4: add handling for extended mount options Harshad Shirwadkar
2020-04-09 19:47 ` Andreas Dilger
2020-04-08 21:55 ` [PATCH v6 03/20] ext4, jbd2: add fast commit initialization routines Harshad Shirwadkar
2020-04-10 12:12 ` Andreas Dilger
2020-04-10 17:51 ` harshad shirwadkar
2020-04-08 21:55 ` [PATCH v6 04/20] jbd2: add fast commit block tracker variables Harshad Shirwadkar
2020-04-08 21:55 ` [PATCH v6 05/20] jbd2: disable fast commits if journal is empty Harshad Shirwadkar
2020-04-08 21:55 ` [PATCH v6 06/20] jbd2: fast commit main commit path changes Harshad Shirwadkar
2020-04-08 21:55 ` [PATCH v6 07/20] ext4: add generic diff tracking routines and range tracking Harshad Shirwadkar
2020-04-08 21:55 ` [PATCH v6 08/20] ext4: add directory entry tracking routines Harshad Shirwadkar
2020-04-08 21:55 ` [PATCH v6 09/20] ext4: add inode tracking and ineligible marking routines Harshad Shirwadkar
2020-04-08 21:55 ` [PATCH v6 10/20] ext4: break ext4_unlink() and ext4_link() Harshad Shirwadkar
2020-04-08 21:55 ` [PATCH v6 11/20] ext4: add fast commit track points Harshad Shirwadkar
2020-04-08 21:55 ` [PATCH v6 12/20] ext4: add fast commit on-disk format structs Harshad Shirwadkar
2020-04-08 21:55 ` [PATCH v6 13/20] jbd2: add new APIs for commit path of fast commits Harshad Shirwadkar
2020-04-08 21:55 ` [PATCH v6 14/20] ext4: main commit routine for " Harshad Shirwadkar
2020-04-08 21:55 ` [PATCH v6 15/20] jbd2: add fast commit recovery path support Harshad Shirwadkar
2020-04-08 21:55 ` [PATCH v6 16/20] ext4: fast commit recovery path preparation Harshad Shirwadkar
2020-04-08 21:55 ` [PATCH v6 17/20] ext4: add idempotent helpers to manipulate bitmaps Harshad Shirwadkar
2020-04-08 21:55 ` Harshad Shirwadkar [this message]
2020-04-08 21:55 ` [PATCH v6 19/20] ext4: add fast commit replay path Harshad Shirwadkar
2020-04-08 21:55 ` [PATCH v6 20/20] ext4: add debug mount option to test fast commit replay Harshad Shirwadkar
2020-04-09 22:01 ` [PATCH v6 01/20] ext4: update docs for fast commit feature Andreas Dilger
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20200408215530.25649-18-harshads@google.com \
--to=harshadshirwadkar@gmail.com \
--cc=linux-ext4@vger.kernel.org \
--cc=tytso@mit.edu \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).