From: Jaegeuk Kim <jaegeuk@kernel.org>
To: Chunhai Guo <guochunhai@vivo.com>
Cc: linux-f2fs-devel@lists.sourceforge.net, frank.li@vivo.com
Subject: Re: [f2fs-dev] [PATCH v7] fsck.f2fs: Detect and fix looped node chain efficiently
Date: Tue, 23 May 2023 18:29:16 -0700 [thread overview]
Message-ID: <ZG1obHA7vrFzXx6r@google.com> (raw)
In-Reply-To: <20230523104059.78315-1-guochunhai@vivo.com>
On 05/23, Chunhai Guo wrote:
> find_fsync_inode() detect the looped node chain by comparing the loop
> counter with free blocks. While it may take tens of seconds to quit when
> the free blocks are large enough. We can use Floyd's cycle detection
> algorithm to make the detection more efficient, and fix the issue by
> filling a NULL address in the last node of the chain.
>
> Below is the log we encounter on a 256GB UFS storage and it takes about
> 25 seconds to detect looped node chain. After changing the algorithm, it
> takes about 20ms to finish the same job.
>
> [ 10.822904] fsck.f2fs: Info: version timestamp cur: 17, prev: 430
> [ 10.822949] fsck.f2fs: [update_superblock: 762] Info: Done to
> update superblock
> [ 10.822953] fsck.f2fs: Info: superblock features = 1499 :
> encrypt verity extra_attr project_quota quota_ino casefold
> [ 10.822956] fsck.f2fs: Info: superblock encrypt level = 0, salt =
> 00000000000000000000000000000000
> [ 10.822960] fsck.f2fs: Info: total FS sectors = 59249811 (231444
> MB)
> [ 35.852827] fsck.f2fs: detect looped node chain,
> blkaddr:1114802, next:1114803
> [ 35.852842] fsck.f2fs: [f2fs_do_mount:3846] record_fsync_data
> failed
> [ 35.856106] fsck.f2fs: fsck.f2fs terminated by exit(255)
>
> Signed-off-by: Chunhai Guo <guochunhai@vivo.com>
> ---
> v6 -> v7 : Correct logic error to handle is_detecting return by
> find_node_blk_fast().
> v5 -> v6 : Simplify the code by removing unnecessary retry logic.
> v4 -> v5 : Use IS_INODE() to make the code more clear.
> v3 -> v4 : Set c.bug_on with ASSERT_MSG() when issue is detected and fix
> it only if c.fix_on is 1.
> v2 -> v3 : Write inode with write_inode() to avoid chksum being broken.
> v1 -> v2 : Fix looped node chain directly after it is detected.
> ---
> fsck/mount.c | 132 ++++++++++++++++++++++++++++++++++++++++++++-------
> 1 file changed, 114 insertions(+), 18 deletions(-)
>
> diff --git a/fsck/mount.c b/fsck/mount.c
> index 4c7488840c7c..d7fa81f61cb7 100644
> --- a/fsck/mount.c
> +++ b/fsck/mount.c
> @@ -3518,22 +3518,90 @@ static void destroy_fsync_dnodes(struct list_head *head)
> del_fsync_inode(entry);
> }
>
> +static int find_node_blk_fast(struct f2fs_sb_info *sbi, block_t *blkaddr_fast,
> + struct f2fs_node *node_blk_fast, bool *is_detecting)
> +{
> + int i, err;
> +
> + for (i = 0; i < 2; i++) {
> + if (!f2fs_is_valid_blkaddr(sbi, *blkaddr_fast, META_POR)) {
> + *is_detecting = false;
> + return 0;
> + }
> +
> + err = dev_read_block(node_blk_fast, *blkaddr_fast);
> + if (err)
> + return err;
> +
> + if (!is_recoverable_dnode(sbi, node_blk_fast)) {
> + *is_detecting = false;
> + return 0;
> + }
> +
> + *blkaddr_fast = next_blkaddr_of_node(node_blk_fast);
> + }
> +
> + return 0;
> +}
> +
> +static int loop_node_chain_fix(struct f2fs_sb_info *sbi,
> + block_t blkaddr_fast, struct f2fs_node *node_blk_fast,
> + block_t blkaddr, struct f2fs_node *node_blk)
> +{
> + block_t blkaddr_entry, blkaddr_tmp;
> + int err;
> +
> + /* find the entry point of the looped node chain */
> + while (blkaddr_fast != blkaddr) {
> + err = dev_read_block(node_blk_fast, blkaddr_fast);
> + if (err)
> + return err;
> + blkaddr_fast = next_blkaddr_of_node(node_blk_fast);
> +
> + err = dev_read_block(node_blk, blkaddr);
> + if (err)
> + return err;
> + blkaddr = next_blkaddr_of_node(node_blk);
> + }
> + blkaddr_entry = blkaddr;
> +
> + /* find the last node of the chain */
> + do {
> + blkaddr_tmp = blkaddr;
> + err = dev_read_block(node_blk, blkaddr);
> + if (err)
> + return err;
> + blkaddr = next_blkaddr_of_node(node_blk);
> + } while (blkaddr != blkaddr_entry);
> +
> + /* fix the blkaddr of last node with NULL_ADDR. */
> + node_blk->footer.next_blkaddr = NULL_ADDR;
> + if (IS_INODE(node_blk))
> + err = write_inode(node_blk, blkaddr_tmp);
> + else
> + err = dev_write_block(node_blk, blkaddr_tmp);
> + if (!err)
> + FIX_MSG("Fix looped node chain on blkaddr %u\n",
> + blkaddr_tmp);
> + return err;
> +}
> +
> static int find_fsync_inode(struct f2fs_sb_info *sbi, struct list_head *head)
> {
> struct curseg_info *curseg;
> - struct f2fs_node *node_blk;
> - block_t blkaddr;
> - unsigned int loop_cnt = 0;
> - unsigned int free_blocks = MAIN_SEGS(sbi) * sbi->blocks_per_seg -
> - sbi->total_valid_block_count;
> + struct f2fs_node *node_blk, *node_blk_fast;
> + block_t blkaddr, blkaddr_fast;
> + bool is_detecting = true;
> int err = 0;
>
> + node_blk = calloc(F2FS_BLKSIZE, 1);
> + node_blk_fast = calloc(F2FS_BLKSIZE, 1);
> + ASSERT(node_blk && node_blk_fast);
> +
> /* get node pages in the current segment */
> curseg = CURSEG_I(sbi, CURSEG_WARM_NODE);
> blkaddr = NEXT_FREE_BLKADDR(sbi, curseg);
> -
> - node_blk = calloc(F2FS_BLKSIZE, 1);
> - ASSERT(node_blk);
> + blkaddr_fast = blkaddr;
>
> while (1) {
> struct fsync_inode_entry *entry;
> @@ -3564,19 +3632,47 @@ static int find_fsync_inode(struct f2fs_sb_info *sbi, struct list_head *head)
> if (IS_INODE(node_blk) && is_dent_dnode(node_blk))
> entry->last_dentry = blkaddr;
> next:
> - /* sanity check in order to detect looped node chain */
> - if (++loop_cnt >= free_blocks ||
> - blkaddr == next_blkaddr_of_node(node_blk)) {
> - MSG(0, "\tdetect looped node chain, blkaddr:%u, next:%u\n",
> - blkaddr,
> - next_blkaddr_of_node(node_blk));
> - err = -1;
> - break;
> - }
> -
> blkaddr = next_blkaddr_of_node(node_blk);
> +
> + /* sanity check to detect looped node chain with Floyd's cycle
> + * detection algorithm
> + */
I added this instead of the below long if body.
if (!is_detecting)
continue;
> + if (is_detecting) {
> + err = find_node_blk_fast(sbi, &blkaddr_fast,
> + node_blk_fast, &is_detecting);
> + if (err)
> + break;
> +
> + if (!is_detecting)
> + continue;
> +
> + if (blkaddr_fast != blkaddr)
> + continue;
> +
> + ASSERT_MSG("\tdetect looped node chain, blkaddr:%u\n",
> + blkaddr);
> +
> + if (!c.fix_on) {
> + err = -1;
> + break;
> + }
> +
> + err = loop_node_chain_fix(sbi,
> + NEXT_FREE_BLKADDR(sbi, curseg),
> + node_blk_fast, blkaddr, node_blk);
> + if (err)
> + break;
> +
> + /* Since we call get_fsync_inode() to ensure there are
> + * no duplicate inodes in the inode_list even if there
> + * are duplicate blkaddr, we can continue running
> + * after fixing the looped node chain.
> + */
> + is_detecting = false;
> + }
> }
>
> + free(node_blk_fast);
> free(node_blk);
> return err;
> }
> --
> 2.25.1
_______________________________________________
Linux-f2fs-devel mailing list
Linux-f2fs-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel
prev parent reply other threads:[~2023-05-24 1:29 UTC|newest]
Thread overview: 3+ messages / expand[flat|nested] mbox.gz Atom feed top
2023-05-23 10:40 [f2fs-dev] [PATCH v7] fsck.f2fs: Detect and fix looped node chain efficiently Chunhai Guo via Linux-f2fs-devel
2023-05-23 12:36 ` Chao Yu
2023-05-24 1:29 ` Jaegeuk Kim [this message]
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=ZG1obHA7vrFzXx6r@google.com \
--to=jaegeuk@kernel.org \
--cc=frank.li@vivo.com \
--cc=guochunhai@vivo.com \
--cc=linux-f2fs-devel@lists.sourceforge.net \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.