From: Jaegeuk Kim <jaegeuk@kernel.org>
To: Yonggil Song <yonggil.song@samsung.com>
Cc: "linux-kernel@vger.kernel.org" <linux-kernel@vger.kernel.org>,
"linux-f2fs-devel@lists.sourceforge.net"
<linux-f2fs-devel@lists.sourceforge.net>
Subject: Re: [f2fs-dev] [PATCH v1] f2fs: Fix system crash due to lack of free space in LFS
Date: Thu, 16 Mar 2023 10:13:14 -0700 [thread overview]
Message-ID: <ZBNOKq/EYNMnMSFi@google.com> (raw)
In-Reply-To: <20230314074733epcms2p511d7a7fa11d5b54ac2fbaa840db3f1cb@epcms2p5>
On 03/14, Yonggil Song wrote:
> When f2fs tries to checkpoint during foreground gc in LFS mode, system
> crash occurs due to lack of free space if the amount of dirty node and
> dentry pages generated by data migration exceeds free space.
> The reproduction sequence is as follows.
>
> - 20GiB capacity block device (null_blk)
> - format and mount with LFS mode
> - create a file and write 20,000MiB
> - 4k random write on full range of the file
>
> RIP: 0010:new_curseg+0x48a/0x510 [f2fs]
> Code: 55 e7 f5 89 c0 48 0f af c3 48 8b 5d c0 48 c1 e8 20 83 c0 01 89 43 6c 48 83 c4 28 5b 41 5c 41 5d 41 5e 41 5f 5d c3 cc cc cc cc <0f> 0b f0 41 80 4f 48 04 45 85 f6 0f 84 ba fd ff ff e9 ef fe ff ff
> RSP: 0018:ffff977bc397b218 EFLAGS: 00010246
> RAX: 00000000000027b9 RBX: 0000000000000000 RCX: 00000000000027c0
> RDX: 0000000000000000 RSI: 00000000000027b9 RDI: ffff8c25ab4e74f8
> RBP: ffff977bc397b268 R08: 00000000000027b9 R09: ffff8c29e4a34b40
> R10: 0000000000000001 R11: ffff977bc397b0d8 R12: 0000000000000000
> R13: ffff8c25b4dd81a0 R14: 0000000000000000 R15: ffff8c2f667f9000
> FS: 0000000000000000(0000) GS:ffff8c344ec80000(0000) knlGS:0000000000000000
> CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
> CR2: 000000c00055d000 CR3: 0000000e30810003 CR4: 00000000003706e0
> DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
> DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
> Call Trace:
> <TASK>
> allocate_segment_by_default+0x9c/0x110 [f2fs]
> f2fs_allocate_data_block+0x243/0xa30 [f2fs]
> ? __mod_lruvec_page_state+0xa0/0x150
> do_write_page+0x80/0x160 [f2fs]
> f2fs_do_write_node_page+0x32/0x50 [f2fs]
> __write_node_page+0x339/0x730 [f2fs]
> f2fs_sync_node_pages+0x5a6/0x780 [f2fs]
> block_operations+0x257/0x340 [f2fs]
> f2fs_write_checkpoint+0x102/0x1050 [f2fs]
> f2fs_gc+0x27c/0x630 [f2fs]
> ? folio_mark_dirty+0x36/0x70
> f2fs_balance_fs+0x16f/0x180 [f2fs]
>
> This patch adds checking whether free sections are enough before checkpoint
> during gc.
>
> Signed-off-by: Yonggil Song <yonggil.song@samsung.com>
> ---
> fs/f2fs/gc.c | 7 ++++++-
> fs/f2fs/segment.h | 26 +++++++++++++++++++++-----
> 2 files changed, 27 insertions(+), 6 deletions(-)
>
> diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c
> index 4546e01b2ee0..b22f49a6f128 100644
> --- a/fs/f2fs/gc.c
> +++ b/fs/f2fs/gc.c
> @@ -1773,6 +1773,7 @@ int f2fs_gc(struct f2fs_sb_info *sbi, struct f2fs_gc_control *gc_control)
> .iroot = RADIX_TREE_INIT(gc_list.iroot, GFP_NOFS),
> };
> unsigned int skipped_round = 0, round = 0;
> + unsigned int nr_needed_secs = 0, node_blocks = 0, dent_blocks = 0;
>
> trace_f2fs_gc_begin(sbi->sb, gc_type, gc_control->no_bg_gc,
> gc_control->nr_free_secs,
> @@ -1858,8 +1859,12 @@ int f2fs_gc(struct f2fs_sb_info *sbi, struct f2fs_gc_control *gc_control)
> }
> }
>
> + /* need more three extra sections for writer's data/node/dentry */
> + nr_needed_secs = get_min_need_secs(sbi, &node_blocks, &dent_blocks) + 3;
get_min_need_secs(&lower, &upper)
{
...
*lower = node_secs + dent_secs;
*upper = *lower + (node_blocks ? 1 : 0) + (dent_blocks ? 1 : 0);
}
> + nr_needed_secs += ((node_blocks ? 1 : 0) + (dent_blocks ? 1 : 0));
> +
> /* Write checkpoint to reclaim prefree segments */
> - if (free_sections(sbi) < NR_CURSEG_PERSIST_TYPE &&
> + if (free_sections(sbi) <= nr_needed_secs &&
#define NR_GC_CHECKPOINT_SECS (3) /* data/node/dentry sections */
if (free_sections(sbi) <= upper + NR_GC_CHECKPOINT_SECS &&
> prefree_segments(sbi)) {
> ret = f2fs_write_checkpoint(sbi, &cpc);
> if (ret)
> diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h
> index be8f2d7d007b..ac11c47bfe37 100644
> --- a/fs/f2fs/segment.h
> +++ b/fs/f2fs/segment.h
> @@ -605,8 +605,11 @@ static inline bool has_curseg_enough_space(struct f2fs_sb_info *sbi,
> return true;
> }
>
> -static inline bool has_not_enough_free_secs(struct f2fs_sb_info *sbi,
> - int freed, int needed)
> +/*
> + * calculate the minimum number of sections (needed) for dirty node/dentry
> + */
> +static inline unsigned int get_min_need_secs(struct f2fs_sb_info *sbi,
> + unsigned int *node_blocks, unsigned int *dent_blocks)
> {
> unsigned int total_node_blocks = get_pages(sbi, F2FS_DIRTY_NODES) +
> get_pages(sbi, F2FS_DIRTY_DENTS) +
> @@ -614,15 +617,28 @@ static inline bool has_not_enough_free_secs(struct f2fs_sb_info *sbi,
> unsigned int total_dent_blocks = get_pages(sbi, F2FS_DIRTY_DENTS);
> unsigned int node_secs = total_node_blocks / CAP_BLKS_PER_SEC(sbi);
> unsigned int dent_secs = total_dent_blocks / CAP_BLKS_PER_SEC(sbi);
> - unsigned int node_blocks = total_node_blocks % CAP_BLKS_PER_SEC(sbi);
> - unsigned int dent_blocks = total_dent_blocks % CAP_BLKS_PER_SEC(sbi);
> +
> + f2fs_bug_on(sbi, (!node_blocks || !dent_blocks));
> +
> + *node_blocks = total_node_blocks % CAP_BLKS_PER_SEC(sbi);
> + *dent_blocks = total_dent_blocks % CAP_BLKS_PER_SEC(sbi);
> +
> + return (node_secs + dent_secs);
> +}
> +
> +static inline bool has_not_enough_free_secs(struct f2fs_sb_info *sbi,
> + int freed, int needed)
> +{
> + unsigned int node_blocks = 0;
> + unsigned int dent_blocks = 0;
> unsigned int free, need_lower, need_upper;
>
> if (unlikely(is_sbi_flag_set(sbi, SBI_POR_DOING)))
> return false;
>
> free = free_sections(sbi) + freed;
> - need_lower = node_secs + dent_secs + reserved_sections(sbi) + needed;
> + need_lower = get_min_need_secs(sbi, &node_blocks, &dent_blocks) + needed +
> + reserved_sections(sbi);
> need_upper = need_lower + (node_blocks ? 1 : 0) + (dent_blocks ? 1 : 0);
>
> if (free > need_upper)
> --
> 2.34.1
_______________________________________________
Linux-f2fs-devel mailing list
Linux-f2fs-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel
WARNING: multiple messages have this Message-ID (diff)
From: Jaegeuk Kim <jaegeuk@kernel.org>
To: Yonggil Song <yonggil.song@samsung.com>
Cc: "chao@kernel.org" <chao@kernel.org>,
"linux-f2fs-devel@lists.sourceforge.net"
<linux-f2fs-devel@lists.sourceforge.net>,
"linux-kernel@vger.kernel.org" <linux-kernel@vger.kernel.org>
Subject: Re: [PATCH v1] f2fs: Fix system crash due to lack of free space in LFS
Date: Thu, 16 Mar 2023 10:13:14 -0700 [thread overview]
Message-ID: <ZBNOKq/EYNMnMSFi@google.com> (raw)
In-Reply-To: <20230314074733epcms2p511d7a7fa11d5b54ac2fbaa840db3f1cb@epcms2p5>
On 03/14, Yonggil Song wrote:
> When f2fs tries to checkpoint during foreground gc in LFS mode, system
> crash occurs due to lack of free space if the amount of dirty node and
> dentry pages generated by data migration exceeds free space.
> The reproduction sequence is as follows.
>
> - 20GiB capacity block device (null_blk)
> - format and mount with LFS mode
> - create a file and write 20,000MiB
> - 4k random write on full range of the file
>
> RIP: 0010:new_curseg+0x48a/0x510 [f2fs]
> Code: 55 e7 f5 89 c0 48 0f af c3 48 8b 5d c0 48 c1 e8 20 83 c0 01 89 43 6c 48 83 c4 28 5b 41 5c 41 5d 41 5e 41 5f 5d c3 cc cc cc cc <0f> 0b f0 41 80 4f 48 04 45 85 f6 0f 84 ba fd ff ff e9 ef fe ff ff
> RSP: 0018:ffff977bc397b218 EFLAGS: 00010246
> RAX: 00000000000027b9 RBX: 0000000000000000 RCX: 00000000000027c0
> RDX: 0000000000000000 RSI: 00000000000027b9 RDI: ffff8c25ab4e74f8
> RBP: ffff977bc397b268 R08: 00000000000027b9 R09: ffff8c29e4a34b40
> R10: 0000000000000001 R11: ffff977bc397b0d8 R12: 0000000000000000
> R13: ffff8c25b4dd81a0 R14: 0000000000000000 R15: ffff8c2f667f9000
> FS: 0000000000000000(0000) GS:ffff8c344ec80000(0000) knlGS:0000000000000000
> CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
> CR2: 000000c00055d000 CR3: 0000000e30810003 CR4: 00000000003706e0
> DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
> DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
> Call Trace:
> <TASK>
> allocate_segment_by_default+0x9c/0x110 [f2fs]
> f2fs_allocate_data_block+0x243/0xa30 [f2fs]
> ? __mod_lruvec_page_state+0xa0/0x150
> do_write_page+0x80/0x160 [f2fs]
> f2fs_do_write_node_page+0x32/0x50 [f2fs]
> __write_node_page+0x339/0x730 [f2fs]
> f2fs_sync_node_pages+0x5a6/0x780 [f2fs]
> block_operations+0x257/0x340 [f2fs]
> f2fs_write_checkpoint+0x102/0x1050 [f2fs]
> f2fs_gc+0x27c/0x630 [f2fs]
> ? folio_mark_dirty+0x36/0x70
> f2fs_balance_fs+0x16f/0x180 [f2fs]
>
> This patch adds checking whether free sections are enough before checkpoint
> during gc.
>
> Signed-off-by: Yonggil Song <yonggil.song@samsung.com>
> ---
> fs/f2fs/gc.c | 7 ++++++-
> fs/f2fs/segment.h | 26 +++++++++++++++++++++-----
> 2 files changed, 27 insertions(+), 6 deletions(-)
>
> diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c
> index 4546e01b2ee0..b22f49a6f128 100644
> --- a/fs/f2fs/gc.c
> +++ b/fs/f2fs/gc.c
> @@ -1773,6 +1773,7 @@ int f2fs_gc(struct f2fs_sb_info *sbi, struct f2fs_gc_control *gc_control)
> .iroot = RADIX_TREE_INIT(gc_list.iroot, GFP_NOFS),
> };
> unsigned int skipped_round = 0, round = 0;
> + unsigned int nr_needed_secs = 0, node_blocks = 0, dent_blocks = 0;
>
> trace_f2fs_gc_begin(sbi->sb, gc_type, gc_control->no_bg_gc,
> gc_control->nr_free_secs,
> @@ -1858,8 +1859,12 @@ int f2fs_gc(struct f2fs_sb_info *sbi, struct f2fs_gc_control *gc_control)
> }
> }
>
> + /* need more three extra sections for writer's data/node/dentry */
> + nr_needed_secs = get_min_need_secs(sbi, &node_blocks, &dent_blocks) + 3;
get_min_need_secs(&lower, &upper)
{
...
*lower = node_secs + dent_secs;
*upper = *lower + (node_blocks ? 1 : 0) + (dent_blocks ? 1 : 0);
}
> + nr_needed_secs += ((node_blocks ? 1 : 0) + (dent_blocks ? 1 : 0));
> +
> /* Write checkpoint to reclaim prefree segments */
> - if (free_sections(sbi) < NR_CURSEG_PERSIST_TYPE &&
> + if (free_sections(sbi) <= nr_needed_secs &&
#define NR_GC_CHECKPOINT_SECS (3) /* data/node/dentry sections */
if (free_sections(sbi) <= upper + NR_GC_CHECKPOINT_SECS &&
> prefree_segments(sbi)) {
> ret = f2fs_write_checkpoint(sbi, &cpc);
> if (ret)
> diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h
> index be8f2d7d007b..ac11c47bfe37 100644
> --- a/fs/f2fs/segment.h
> +++ b/fs/f2fs/segment.h
> @@ -605,8 +605,11 @@ static inline bool has_curseg_enough_space(struct f2fs_sb_info *sbi,
> return true;
> }
>
> -static inline bool has_not_enough_free_secs(struct f2fs_sb_info *sbi,
> - int freed, int needed)
> +/*
> + * calculate the minimum number of sections (needed) for dirty node/dentry
> + */
> +static inline unsigned int get_min_need_secs(struct f2fs_sb_info *sbi,
> + unsigned int *node_blocks, unsigned int *dent_blocks)
> {
> unsigned int total_node_blocks = get_pages(sbi, F2FS_DIRTY_NODES) +
> get_pages(sbi, F2FS_DIRTY_DENTS) +
> @@ -614,15 +617,28 @@ static inline bool has_not_enough_free_secs(struct f2fs_sb_info *sbi,
> unsigned int total_dent_blocks = get_pages(sbi, F2FS_DIRTY_DENTS);
> unsigned int node_secs = total_node_blocks / CAP_BLKS_PER_SEC(sbi);
> unsigned int dent_secs = total_dent_blocks / CAP_BLKS_PER_SEC(sbi);
> - unsigned int node_blocks = total_node_blocks % CAP_BLKS_PER_SEC(sbi);
> - unsigned int dent_blocks = total_dent_blocks % CAP_BLKS_PER_SEC(sbi);
> +
> + f2fs_bug_on(sbi, (!node_blocks || !dent_blocks));
> +
> + *node_blocks = total_node_blocks % CAP_BLKS_PER_SEC(sbi);
> + *dent_blocks = total_dent_blocks % CAP_BLKS_PER_SEC(sbi);
> +
> + return (node_secs + dent_secs);
> +}
> +
> +static inline bool has_not_enough_free_secs(struct f2fs_sb_info *sbi,
> + int freed, int needed)
> +{
> + unsigned int node_blocks = 0;
> + unsigned int dent_blocks = 0;
> unsigned int free, need_lower, need_upper;
>
> if (unlikely(is_sbi_flag_set(sbi, SBI_POR_DOING)))
> return false;
>
> free = free_sections(sbi) + freed;
> - need_lower = node_secs + dent_secs + reserved_sections(sbi) + needed;
> + need_lower = get_min_need_secs(sbi, &node_blocks, &dent_blocks) + needed +
> + reserved_sections(sbi);
> need_upper = need_lower + (node_blocks ? 1 : 0) + (dent_blocks ? 1 : 0);
>
> if (free > need_upper)
> --
> 2.34.1
next prev parent reply other threads:[~2023-03-16 17:13 UTC|newest]
Thread overview: 6+ messages / expand[flat|nested] mbox.gz Atom feed top
[not found] <CGME20230314074733epcms2p511d7a7fa11d5b54ac2fbaa840db3f1cb@epcms2p5>
2023-03-14 7:47 ` [f2fs-dev] [PATCH v1] f2fs: Fix system crash due to lack of free space in LFS Yonggil Song
2023-03-14 7:47 ` Yonggil Song
2023-03-16 17:13 ` Jaegeuk Kim [this message]
2023-03-16 17:13 ` Jaegeuk Kim
2023-03-17 2:02 ` [f2fs-dev] (2) " Yonggil Song
2023-03-17 2:02 ` Yonggil Song
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=ZBNOKq/EYNMnMSFi@google.com \
--to=jaegeuk@kernel.org \
--cc=linux-f2fs-devel@lists.sourceforge.net \
--cc=linux-kernel@vger.kernel.org \
--cc=yonggil.song@samsung.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.