From: Chao Yu <chao@kernel.org>
To: Daeho Jeong <daeho43@gmail.com>,
linux-kernel@vger.kernel.org,
linux-f2fs-devel@lists.sourceforge.net, kernel-team@android.com
Cc: Jaegeuk Kim <jaegeuk@kernel.org>, Daeho Jeong <daehojeong@google.com>
Subject: Re: [f2fs-dev] [PATCH v3 2/2] f2fs: support file pinning for zoned devices
Date: Fri, 23 Feb 2024 11:23:25 +0800 [thread overview]
Message-ID: <cdf8048e-e37e-4ab7-b6b5-b758ae2dfef4@kernel.org> (raw)
In-Reply-To: <20240213173812.1432663-2-daeho43@gmail.com>
Hi Daeho,
On 2024/2/14 1:38, Daeho Jeong wrote:
> From: Daeho Jeong <daehojeong@google.com>
>
> Support file pinning with conventional storage area for zoned devices
>
> Signed-off-by: Daeho Jeong <daehojeong@google.com>
> Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
> ---
> v3: check the hole when migrating blocks for swap.
> do not use the remainder of cold pin section.
> v2: flush previous dirty pages before swapon.
> do not re-check for the last extent of swap area.
> merge this patch with swap file pinning support patch.
> ---
> fs/f2fs/data.c | 58 ++++++++++++++++++++++++++-------------
> fs/f2fs/f2fs.h | 17 +++++++++++-
> fs/f2fs/file.c | 24 ++++++++++++-----
> fs/f2fs/gc.c | 14 +++++++---
> fs/f2fs/segment.c | 69 +++++++++++++++++++++++++++++++++++++++++------
> fs/f2fs/segment.h | 10 +++++++
> 6 files changed, 154 insertions(+), 38 deletions(-)
>
> diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
> index 828c797cd47c..0c9aa3082fcf 100644
> --- a/fs/f2fs/data.c
> +++ b/fs/f2fs/data.c
> @@ -3839,25 +3839,34 @@ static int f2fs_migrate_blocks(struct inode *inode, block_t start_blk,
> unsigned int blkofs;
> unsigned int blk_per_sec = BLKS_PER_SEC(sbi);
> unsigned int secidx = start_blk / blk_per_sec;
> - unsigned int end_sec = secidx + blkcnt / blk_per_sec;
> + unsigned int end_sec;
> int ret = 0;
>
> + if (!blkcnt)
> + return 0;
> + end_sec = secidx + (blkcnt - 1) / blk_per_sec;
> +
> f2fs_down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
> filemap_invalidate_lock(inode->i_mapping);
>
> set_inode_flag(inode, FI_ALIGNED_WRITE);
> set_inode_flag(inode, FI_OPU_WRITE);
>
> - for (; secidx < end_sec; secidx++) {
> + for (; secidx <= end_sec; secidx++) {
> + unsigned int blkofs_end = secidx == end_sec ?
> + (blkcnt - 1) % blk_per_sec : blk_per_sec - 1;
(start_blk + blkcnt - 1) % blk_per_sec ?
> +
> f2fs_down_write(&sbi->pin_sem);
>
> - f2fs_lock_op(sbi);
> - f2fs_allocate_new_section(sbi, CURSEG_COLD_DATA_PINNED, false);
> - f2fs_unlock_op(sbi);
> + ret = f2fs_allocate_pinning_section(sbi);
> + if (ret) {
> + f2fs_up_write(&sbi->pin_sem);
> + break;
> + }
>
> set_inode_flag(inode, FI_SKIP_WRITES);
>
> - for (blkofs = 0; blkofs < blk_per_sec; blkofs++) {
> + for (blkofs = 0; blkofs <= blkofs_end; blkofs++) {
> struct page *page;
> unsigned int blkidx = secidx * blk_per_sec + blkofs;
>
> @@ -3946,27 +3955,34 @@ static int check_swap_activate(struct swap_info_struct *sis,
> nr_pblocks = map.m_len;
>
> if ((pblock - SM_I(sbi)->main_blkaddr) & sec_blks_mask ||
> - nr_pblocks & sec_blks_mask) {
> + nr_pblocks & sec_blks_mask ||
> + !f2fs_valid_pinned_area(sbi, pblock)) {
> + bool last_extent = false;
> +
> not_aligned++;
>
> nr_pblocks = roundup(nr_pblocks, blks_per_sec);
> if (cur_lblock + nr_pblocks > sis->max)
> nr_pblocks -= blks_per_sec;
>
> + /* this extent is last one */
> if (!nr_pblocks) {
> - /* this extent is last one */
> - nr_pblocks = map.m_len;
> - f2fs_warn(sbi, "Swapfile: last extent is not aligned to section");
> - goto next;
> + nr_pblocks = last_lblock - cur_lblock;
> + last_extent = true;
> }
>
> ret = f2fs_migrate_blocks(inode, cur_lblock,
> nr_pblocks);
> - if (ret)
> + if (ret) {
> + if (ret == -ENOENT)
> + ret = -EINVAL;
> goto out;
> - goto retry;
> + }
> +
> + if (!last_extent)
> + goto retry;
> }
> -next:
> +
> if (cur_lblock + nr_pblocks >= sis->max)
> nr_pblocks = sis->max - cur_lblock;
>
> @@ -4004,17 +4020,17 @@ static int f2fs_swap_activate(struct swap_info_struct *sis, struct file *file,
> sector_t *span)
> {
> struct inode *inode = file_inode(file);
> + struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
> int ret;
>
> if (!S_ISREG(inode->i_mode))
> return -EINVAL;
>
> - if (f2fs_readonly(F2FS_I_SB(inode)->sb))
> + if (f2fs_readonly(sbi->sb))
> return -EROFS;
>
> - if (f2fs_lfs_mode(F2FS_I_SB(inode))) {
> - f2fs_err(F2FS_I_SB(inode),
> - "Swapfile not supported in LFS mode");
> + if (f2fs_lfs_mode(sbi) && !f2fs_sb_has_blkzoned(sbi)) {
> + f2fs_err(sbi, "Swapfile not supported in LFS mode");
> return -EINVAL;
> }
>
> @@ -4027,13 +4043,17 @@ static int f2fs_swap_activate(struct swap_info_struct *sis, struct file *file,
>
> f2fs_precache_extents(inode);
>
> + ret = filemap_fdatawrite(inode->i_mapping);
> + if (ret < 0)
> + return ret;
What do you think of exchanging position of f2fs_precache_extents()
and filemap_fdatawrite()? so that f2fs_precache_extents() can load
extent info after physical addresses of all data are fixed.
Thanks,
> +
> ret = check_swap_activate(sis, file, span);
> if (ret < 0)
> return ret;
>
> stat_inc_swapfile_inode(inode);
> set_inode_flag(inode, FI_PIN_FILE);
> - f2fs_update_time(F2FS_I_SB(inode), REQ_TIME);
> + f2fs_update_time(sbi, REQ_TIME);
> return ret;
> }
>
> diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
> index 40eb590ed646..351133a11518 100644
> --- a/fs/f2fs/f2fs.h
> +++ b/fs/f2fs/f2fs.h
> @@ -3696,7 +3696,8 @@ void f2fs_get_new_segment(struct f2fs_sb_info *sbi,
> unsigned int *newseg, bool new_sec, int dir);
> void f2fs_allocate_segment_for_resize(struct f2fs_sb_info *sbi, int type,
> unsigned int start, unsigned int end);
> -void f2fs_allocate_new_section(struct f2fs_sb_info *sbi, int type, bool force);
> +int f2fs_allocate_new_section(struct f2fs_sb_info *sbi, int type, bool force);
> +int f2fs_allocate_pinning_section(struct f2fs_sb_info *sbi);
> void f2fs_allocate_new_segments(struct f2fs_sb_info *sbi);
> int f2fs_trim_fs(struct f2fs_sb_info *sbi, struct fstrim_range *range);
> bool f2fs_exist_trim_candidates(struct f2fs_sb_info *sbi,
> @@ -3870,6 +3871,9 @@ void f2fs_stop_gc_thread(struct f2fs_sb_info *sbi);
> block_t f2fs_start_bidx_of_node(unsigned int node_ofs, struct inode *inode);
> int f2fs_gc(struct f2fs_sb_info *sbi, struct f2fs_gc_control *gc_control);
> void f2fs_build_gc_manager(struct f2fs_sb_info *sbi);
> +int f2fs_gc_range(struct f2fs_sb_info *sbi,
> + unsigned int start_seg, unsigned int end_seg,
> + bool dry_run, unsigned int dry_run_sections);
> int f2fs_resize_fs(struct file *filp, __u64 block_count);
> int __init f2fs_create_garbage_collection_cache(void);
> void f2fs_destroy_garbage_collection_cache(void);
> @@ -4524,6 +4528,17 @@ static inline bool f2fs_lfs_mode(struct f2fs_sb_info *sbi)
> return F2FS_OPTION(sbi).fs_mode == FS_MODE_LFS;
> }
>
> +static inline bool f2fs_valid_pinned_area(struct f2fs_sb_info *sbi,
> + block_t blkaddr)
> +{
> + if (f2fs_sb_has_blkzoned(sbi)) {
> + int devi = f2fs_target_device_index(sbi, blkaddr);
> +
> + return !bdev_is_zoned(FDEV(devi).bdev);
> + }
> + return true;
> +}
> +
> static inline bool f2fs_low_mem_mode(struct f2fs_sb_info *sbi)
> {
> return F2FS_OPTION(sbi).memory_mode == MEMORY_MODE_LOW;
> diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
> index 2c13b340c8a0..21c3aa93a8db 100644
> --- a/fs/f2fs/file.c
> +++ b/fs/f2fs/file.c
> @@ -1733,9 +1733,11 @@ static int f2fs_expand_inode_data(struct inode *inode, loff_t offset,
>
> f2fs_down_write(&sbi->pin_sem);
>
> - f2fs_lock_op(sbi);
> - f2fs_allocate_new_section(sbi, CURSEG_COLD_DATA_PINNED, false);
> - f2fs_unlock_op(sbi);
> + err = f2fs_allocate_pinning_section(sbi);
> + if (err) {
> + f2fs_up_write(&sbi->pin_sem);
> + goto out_err;
> + }
>
> map.m_seg_type = CURSEG_COLD_DATA_PINNED;
> err = f2fs_map_blocks(inode, &map, F2FS_GET_BLOCK_PRE_DIO);
> @@ -3185,6 +3187,7 @@ int f2fs_pin_file_control(struct inode *inode, bool inc)
> static int f2fs_ioc_set_pin_file(struct file *filp, unsigned long arg)
> {
> struct inode *inode = file_inode(filp);
> + struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
> __u32 pin;
> int ret = 0;
>
> @@ -3194,7 +3197,7 @@ static int f2fs_ioc_set_pin_file(struct file *filp, unsigned long arg)
> if (!S_ISREG(inode->i_mode))
> return -EINVAL;
>
> - if (f2fs_readonly(F2FS_I_SB(inode)->sb))
> + if (f2fs_readonly(sbi->sb))
> return -EROFS;
>
> ret = mnt_want_write_file(filp);
> @@ -3207,9 +3210,18 @@ static int f2fs_ioc_set_pin_file(struct file *filp, unsigned long arg)
> clear_inode_flag(inode, FI_PIN_FILE);
> f2fs_i_gc_failures_write(inode, 0);
> goto done;
> + } else if (f2fs_is_pinned_file(inode)) {
> + goto done;
> }
>
> - if (f2fs_should_update_outplace(inode, NULL)) {
> + if (f2fs_sb_has_blkzoned(sbi) && F2FS_HAS_BLOCKS(inode)) {
> + ret = -EFBIG;
> + goto out;
> + }
> +
> + /* Let's allow file pinning on zoned device. */
> + if (!f2fs_sb_has_blkzoned(sbi) &&
> + f2fs_should_update_outplace(inode, NULL)) {
> ret = -EINVAL;
> goto out;
> }
> @@ -3231,7 +3243,7 @@ static int f2fs_ioc_set_pin_file(struct file *filp, unsigned long arg)
> set_inode_flag(inode, FI_PIN_FILE);
> ret = F2FS_I(inode)->i_gc_failures[GC_FAILURE_PIN];
> done:
> - f2fs_update_time(F2FS_I_SB(inode), REQ_TIME);
> + f2fs_update_time(sbi, REQ_TIME);
> out:
> inode_unlock(inode);
> mnt_drop_write_file(filp);
> diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c
> index a089a938355b..3ff126316d42 100644
> --- a/fs/f2fs/gc.c
> +++ b/fs/f2fs/gc.c
> @@ -1961,10 +1961,12 @@ void f2fs_build_gc_manager(struct f2fs_sb_info *sbi)
> init_atgc_management(sbi);
> }
>
> -static int f2fs_gc_range(struct f2fs_sb_info *sbi,
> - unsigned int start_seg, unsigned int end_seg, bool dry_run)
> +int f2fs_gc_range(struct f2fs_sb_info *sbi,
> + unsigned int start_seg, unsigned int end_seg,
> + bool dry_run, unsigned int dry_run_sections)
> {
> unsigned int segno;
> + unsigned int gc_secs = dry_run_sections;
>
> for (segno = start_seg; segno <= end_seg; segno += SEGS_PER_SEC(sbi)) {
> struct gc_inode_list gc_list = {
> @@ -1972,11 +1974,15 @@ static int f2fs_gc_range(struct f2fs_sb_info *sbi,
> .iroot = RADIX_TREE_INIT(gc_list.iroot, GFP_NOFS),
> };
>
> - do_garbage_collect(sbi, segno, &gc_list, FG_GC, true);
> + do_garbage_collect(sbi, segno, &gc_list, FG_GC,
> + dry_run_sections == 0);
> put_gc_inode(&gc_list);
>
> if (!dry_run && get_valid_blocks(sbi, segno, true))
> return -EAGAIN;
> + if (dry_run && dry_run_sections &&
> + !get_valid_blocks(sbi, segno, true) && --gc_secs == 0)
> + break;
>
> if (fatal_signal_pending(current))
> return -ERESTARTSYS;
> @@ -2014,7 +2020,7 @@ static int free_segment_range(struct f2fs_sb_info *sbi,
> f2fs_allocate_segment_for_resize(sbi, type, start, end);
>
> /* do GC to move out valid blocks in the range */
> - err = f2fs_gc_range(sbi, start, end, dry_run);
> + err = f2fs_gc_range(sbi, start, end, dry_run, 0);
> if (err || dry_run)
> goto out;
>
> diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
> index 4e985750c938..0b72c8536ccf 100644
> --- a/fs/f2fs/segment.c
> +++ b/fs/f2fs/segment.c
> @@ -2632,7 +2632,7 @@ static int is_next_segment_free(struct f2fs_sb_info *sbi,
> * This function should be returned with success, otherwise BUG
> */
> static void get_new_segment(struct f2fs_sb_info *sbi,
> - unsigned int *newseg, bool new_sec)
> + unsigned int *newseg, bool new_sec, bool pinning)
> {
> struct free_segmap_info *free_i = FREE_I(sbi);
> unsigned int segno, secno, zoneno;
> @@ -2650,6 +2650,16 @@ static void get_new_segment(struct f2fs_sb_info *sbi,
> if (segno < GET_SEG_FROM_SEC(sbi, hint + 1))
> goto got_it;
> }
> +
> + /*
> + * If we format f2fs on zoned storage, let's try to get pinned sections
> + * from beginning of the storage, which should be a conventional one.
> + */
> + if (f2fs_sb_has_blkzoned(sbi)) {
> + segno = pinning ? 0 : max(first_zoned_segno(sbi), *newseg);
> + hint = GET_SEC_FROM_SEG(sbi, segno);
> + }
> +
> find_other_zone:
> secno = find_next_zero_bit(free_i->free_secmap, MAIN_SECS(sbi), hint);
> if (secno >= MAIN_SECS(sbi)) {
> @@ -2749,21 +2759,30 @@ static unsigned int __get_next_segno(struct f2fs_sb_info *sbi, int type)
> * Allocate a current working segment.
> * This function always allocates a free segment in LFS manner.
> */
> -static void new_curseg(struct f2fs_sb_info *sbi, int type, bool new_sec)
> +static int new_curseg(struct f2fs_sb_info *sbi, int type, bool new_sec)
> {
> struct curseg_info *curseg = CURSEG_I(sbi, type);
> unsigned int segno = curseg->segno;
> + bool pinning = type == CURSEG_COLD_DATA_PINNED;
>
> if (curseg->inited)
> write_sum_page(sbi, curseg->sum_blk, GET_SUM_BLOCK(sbi, segno));
> +
> segno = __get_next_segno(sbi, type);
> - get_new_segment(sbi, &segno, new_sec);
> + get_new_segment(sbi, &segno, new_sec, pinning);
> + if (new_sec && pinning &&
> + !f2fs_valid_pinned_area(sbi, START_BLOCK(sbi, segno))) {
> + __set_free(sbi, segno);
> + return -EAGAIN;
> + }
> +
> curseg->next_segno = segno;
> reset_curseg(sbi, type, 1);
> curseg->alloc_type = LFS;
> if (F2FS_OPTION(sbi).fs_mode == FS_MODE_FRAGMENT_BLK)
> curseg->fragment_remained_chunk =
> get_random_u32_inclusive(1, sbi->max_fragment_chunk);
> + return 0;
> }
>
> static int __next_free_blkoff(struct f2fs_sb_info *sbi,
> @@ -3036,7 +3055,7 @@ void f2fs_allocate_segment_for_resize(struct f2fs_sb_info *sbi, int type,
> f2fs_up_read(&SM_I(sbi)->curseg_lock);
> }
>
> -static void __allocate_new_segment(struct f2fs_sb_info *sbi, int type,
> +static int __allocate_new_segment(struct f2fs_sb_info *sbi, int type,
> bool new_sec, bool force)
> {
> struct curseg_info *curseg = CURSEG_I(sbi, type);
> @@ -3046,21 +3065,49 @@ static void __allocate_new_segment(struct f2fs_sb_info *sbi, int type,
> !curseg->next_blkoff &&
> !get_valid_blocks(sbi, curseg->segno, new_sec) &&
> !get_ckpt_valid_blocks(sbi, curseg->segno, new_sec))
> - return;
> + return 0;
>
> old_segno = curseg->segno;
> - new_curseg(sbi, type, true);
> + if (new_curseg(sbi, type, true))
> + return -EAGAIN;
> stat_inc_seg_type(sbi, curseg);
> locate_dirty_segment(sbi, old_segno);
> + return 0;
> }
>
> -void f2fs_allocate_new_section(struct f2fs_sb_info *sbi, int type, bool force)
> +int f2fs_allocate_new_section(struct f2fs_sb_info *sbi, int type, bool force)
> {
> + int ret;
> +
> f2fs_down_read(&SM_I(sbi)->curseg_lock);
> down_write(&SIT_I(sbi)->sentry_lock);
> - __allocate_new_segment(sbi, type, true, force);
> + ret = __allocate_new_segment(sbi, type, true, force);
> up_write(&SIT_I(sbi)->sentry_lock);
> f2fs_up_read(&SM_I(sbi)->curseg_lock);
> +
> + return ret;
> +}
> +
> +int f2fs_allocate_pinning_section(struct f2fs_sb_info *sbi)
> +{
> + int err;
> + bool gc_required = true;
> +
> +retry:
> + f2fs_lock_op(sbi);
> + err = f2fs_allocate_new_section(sbi, CURSEG_COLD_DATA_PINNED, false);
> + f2fs_unlock_op(sbi);
> +
> + if (f2fs_sb_has_blkzoned(sbi) && err && gc_required) {
> + f2fs_down_write(&sbi->gc_lock);
> + f2fs_gc_range(sbi, 0, GET_SEGNO(sbi, FDEV(0).end_blk), true, 1);
> + f2fs_up_write(&sbi->gc_lock);
> +
> + gc_required = false;
> + goto retry;
> + }
> +
> + return err;
> }
>
> void f2fs_allocate_new_segments(struct f2fs_sb_info *sbi)
> @@ -3426,6 +3473,10 @@ void f2fs_allocate_data_block(struct f2fs_sb_info *sbi, struct page *page,
> * new segment.
> */
> if (segment_full) {
> + if (type == CURSEG_COLD_DATA_PINNED &&
> + !((curseg->segno + 1) % sbi->segs_per_sec))
> + goto skip_new_segment;
> +
> if (from_gc) {
> get_atssr_segment(sbi, type, se->type,
> AT_SSR, se->mtime);
> @@ -3437,6 +3488,8 @@ void f2fs_allocate_data_block(struct f2fs_sb_info *sbi, struct page *page,
> stat_inc_seg_type(sbi, curseg);
> }
> }
> +
> +skip_new_segment:
> /*
> * segment dirty status should be updated after segment allocation,
> * so we just need to update status only one time after previous
> diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h
> index 60d93a16f2ac..953af072915f 100644
> --- a/fs/f2fs/segment.h
> +++ b/fs/f2fs/segment.h
> @@ -942,3 +942,13 @@ static inline void wake_up_discard_thread(struct f2fs_sb_info *sbi, bool force)
> dcc->discard_wake = true;
> wake_up_interruptible_all(&dcc->discard_wait_queue);
> }
> +
> +static inline unsigned int first_zoned_segno(struct f2fs_sb_info *sbi)
> +{
> + int devi;
> +
> + for (devi = 0; devi < sbi->s_ndevs; devi++)
> + if (bdev_is_zoned(FDEV(devi).bdev))
> + return GET_SEGNO(sbi, FDEV(devi).start_blk);
> + return 0;
> +}
_______________________________________________
Linux-f2fs-devel mailing list
Linux-f2fs-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel
WARNING: multiple messages have this Message-ID (diff)
From: Chao Yu <chao@kernel.org>
To: Daeho Jeong <daeho43@gmail.com>,
linux-kernel@vger.kernel.org,
linux-f2fs-devel@lists.sourceforge.net, kernel-team@android.com
Cc: Jaegeuk Kim <jaegeuk@kernel.org>, Daeho Jeong <daehojeong@google.com>
Subject: Re: [f2fs-dev] [PATCH v3 2/2] f2fs: support file pinning for zoned devices
Date: Fri, 23 Feb 2024 11:23:25 +0800 [thread overview]
Message-ID: <cdf8048e-e37e-4ab7-b6b5-b758ae2dfef4@kernel.org> (raw)
In-Reply-To: <20240213173812.1432663-2-daeho43@gmail.com>
Hi Daeho,
On 2024/2/14 1:38, Daeho Jeong wrote:
> From: Daeho Jeong <daehojeong@google.com>
>
> Support file pinning with conventional storage area for zoned devices
>
> Signed-off-by: Daeho Jeong <daehojeong@google.com>
> Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
> ---
> v3: check the hole when migrating blocks for swap.
> do not use the remainder of cold pin section.
> v2: flush previous dirty pages before swapon.
> do not re-check for the last extent of swap area.
> merge this patch with swap file pinning support patch.
> ---
> fs/f2fs/data.c | 58 ++++++++++++++++++++++++++-------------
> fs/f2fs/f2fs.h | 17 +++++++++++-
> fs/f2fs/file.c | 24 ++++++++++++-----
> fs/f2fs/gc.c | 14 +++++++---
> fs/f2fs/segment.c | 69 +++++++++++++++++++++++++++++++++++++++++------
> fs/f2fs/segment.h | 10 +++++++
> 6 files changed, 154 insertions(+), 38 deletions(-)
>
> diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
> index 828c797cd47c..0c9aa3082fcf 100644
> --- a/fs/f2fs/data.c
> +++ b/fs/f2fs/data.c
> @@ -3839,25 +3839,34 @@ static int f2fs_migrate_blocks(struct inode *inode, block_t start_blk,
> unsigned int blkofs;
> unsigned int blk_per_sec = BLKS_PER_SEC(sbi);
> unsigned int secidx = start_blk / blk_per_sec;
> - unsigned int end_sec = secidx + blkcnt / blk_per_sec;
> + unsigned int end_sec;
> int ret = 0;
>
> + if (!blkcnt)
> + return 0;
> + end_sec = secidx + (blkcnt - 1) / blk_per_sec;
> +
> f2fs_down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
> filemap_invalidate_lock(inode->i_mapping);
>
> set_inode_flag(inode, FI_ALIGNED_WRITE);
> set_inode_flag(inode, FI_OPU_WRITE);
>
> - for (; secidx < end_sec; secidx++) {
> + for (; secidx <= end_sec; secidx++) {
> + unsigned int blkofs_end = secidx == end_sec ?
> + (blkcnt - 1) % blk_per_sec : blk_per_sec - 1;
(start_blk + blkcnt - 1) % blk_per_sec ?
> +
> f2fs_down_write(&sbi->pin_sem);
>
> - f2fs_lock_op(sbi);
> - f2fs_allocate_new_section(sbi, CURSEG_COLD_DATA_PINNED, false);
> - f2fs_unlock_op(sbi);
> + ret = f2fs_allocate_pinning_section(sbi);
> + if (ret) {
> + f2fs_up_write(&sbi->pin_sem);
> + break;
> + }
>
> set_inode_flag(inode, FI_SKIP_WRITES);
>
> - for (blkofs = 0; blkofs < blk_per_sec; blkofs++) {
> + for (blkofs = 0; blkofs <= blkofs_end; blkofs++) {
> struct page *page;
> unsigned int blkidx = secidx * blk_per_sec + blkofs;
>
> @@ -3946,27 +3955,34 @@ static int check_swap_activate(struct swap_info_struct *sis,
> nr_pblocks = map.m_len;
>
> if ((pblock - SM_I(sbi)->main_blkaddr) & sec_blks_mask ||
> - nr_pblocks & sec_blks_mask) {
> + nr_pblocks & sec_blks_mask ||
> + !f2fs_valid_pinned_area(sbi, pblock)) {
> + bool last_extent = false;
> +
> not_aligned++;
>
> nr_pblocks = roundup(nr_pblocks, blks_per_sec);
> if (cur_lblock + nr_pblocks > sis->max)
> nr_pblocks -= blks_per_sec;
>
> + /* this extent is last one */
> if (!nr_pblocks) {
> - /* this extent is last one */
> - nr_pblocks = map.m_len;
> - f2fs_warn(sbi, "Swapfile: last extent is not aligned to section");
> - goto next;
> + nr_pblocks = last_lblock - cur_lblock;
> + last_extent = true;
> }
>
> ret = f2fs_migrate_blocks(inode, cur_lblock,
> nr_pblocks);
> - if (ret)
> + if (ret) {
> + if (ret == -ENOENT)
> + ret = -EINVAL;
> goto out;
> - goto retry;
> + }
> +
> + if (!last_extent)
> + goto retry;
> }
> -next:
> +
> if (cur_lblock + nr_pblocks >= sis->max)
> nr_pblocks = sis->max - cur_lblock;
>
> @@ -4004,17 +4020,17 @@ static int f2fs_swap_activate(struct swap_info_struct *sis, struct file *file,
> sector_t *span)
> {
> struct inode *inode = file_inode(file);
> + struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
> int ret;
>
> if (!S_ISREG(inode->i_mode))
> return -EINVAL;
>
> - if (f2fs_readonly(F2FS_I_SB(inode)->sb))
> + if (f2fs_readonly(sbi->sb))
> return -EROFS;
>
> - if (f2fs_lfs_mode(F2FS_I_SB(inode))) {
> - f2fs_err(F2FS_I_SB(inode),
> - "Swapfile not supported in LFS mode");
> + if (f2fs_lfs_mode(sbi) && !f2fs_sb_has_blkzoned(sbi)) {
> + f2fs_err(sbi, "Swapfile not supported in LFS mode");
> return -EINVAL;
> }
>
> @@ -4027,13 +4043,17 @@ static int f2fs_swap_activate(struct swap_info_struct *sis, struct file *file,
>
> f2fs_precache_extents(inode);
>
> + ret = filemap_fdatawrite(inode->i_mapping);
> + if (ret < 0)
> + return ret;
What do you think of exchanging position of f2fs_precache_extents()
and filemap_fdatawrite()? so that f2fs_precache_extents() can load
extent info after physical addresses of all data are fixed.
Thanks,
> +
> ret = check_swap_activate(sis, file, span);
> if (ret < 0)
> return ret;
>
> stat_inc_swapfile_inode(inode);
> set_inode_flag(inode, FI_PIN_FILE);
> - f2fs_update_time(F2FS_I_SB(inode), REQ_TIME);
> + f2fs_update_time(sbi, REQ_TIME);
> return ret;
> }
>
> diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
> index 40eb590ed646..351133a11518 100644
> --- a/fs/f2fs/f2fs.h
> +++ b/fs/f2fs/f2fs.h
> @@ -3696,7 +3696,8 @@ void f2fs_get_new_segment(struct f2fs_sb_info *sbi,
> unsigned int *newseg, bool new_sec, int dir);
> void f2fs_allocate_segment_for_resize(struct f2fs_sb_info *sbi, int type,
> unsigned int start, unsigned int end);
> -void f2fs_allocate_new_section(struct f2fs_sb_info *sbi, int type, bool force);
> +int f2fs_allocate_new_section(struct f2fs_sb_info *sbi, int type, bool force);
> +int f2fs_allocate_pinning_section(struct f2fs_sb_info *sbi);
> void f2fs_allocate_new_segments(struct f2fs_sb_info *sbi);
> int f2fs_trim_fs(struct f2fs_sb_info *sbi, struct fstrim_range *range);
> bool f2fs_exist_trim_candidates(struct f2fs_sb_info *sbi,
> @@ -3870,6 +3871,9 @@ void f2fs_stop_gc_thread(struct f2fs_sb_info *sbi);
> block_t f2fs_start_bidx_of_node(unsigned int node_ofs, struct inode *inode);
> int f2fs_gc(struct f2fs_sb_info *sbi, struct f2fs_gc_control *gc_control);
> void f2fs_build_gc_manager(struct f2fs_sb_info *sbi);
> +int f2fs_gc_range(struct f2fs_sb_info *sbi,
> + unsigned int start_seg, unsigned int end_seg,
> + bool dry_run, unsigned int dry_run_sections);
> int f2fs_resize_fs(struct file *filp, __u64 block_count);
> int __init f2fs_create_garbage_collection_cache(void);
> void f2fs_destroy_garbage_collection_cache(void);
> @@ -4524,6 +4528,17 @@ static inline bool f2fs_lfs_mode(struct f2fs_sb_info *sbi)
> return F2FS_OPTION(sbi).fs_mode == FS_MODE_LFS;
> }
>
> +static inline bool f2fs_valid_pinned_area(struct f2fs_sb_info *sbi,
> + block_t blkaddr)
> +{
> + if (f2fs_sb_has_blkzoned(sbi)) {
> + int devi = f2fs_target_device_index(sbi, blkaddr);
> +
> + return !bdev_is_zoned(FDEV(devi).bdev);
> + }
> + return true;
> +}
> +
> static inline bool f2fs_low_mem_mode(struct f2fs_sb_info *sbi)
> {
> return F2FS_OPTION(sbi).memory_mode == MEMORY_MODE_LOW;
> diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
> index 2c13b340c8a0..21c3aa93a8db 100644
> --- a/fs/f2fs/file.c
> +++ b/fs/f2fs/file.c
> @@ -1733,9 +1733,11 @@ static int f2fs_expand_inode_data(struct inode *inode, loff_t offset,
>
> f2fs_down_write(&sbi->pin_sem);
>
> - f2fs_lock_op(sbi);
> - f2fs_allocate_new_section(sbi, CURSEG_COLD_DATA_PINNED, false);
> - f2fs_unlock_op(sbi);
> + err = f2fs_allocate_pinning_section(sbi);
> + if (err) {
> + f2fs_up_write(&sbi->pin_sem);
> + goto out_err;
> + }
>
> map.m_seg_type = CURSEG_COLD_DATA_PINNED;
> err = f2fs_map_blocks(inode, &map, F2FS_GET_BLOCK_PRE_DIO);
> @@ -3185,6 +3187,7 @@ int f2fs_pin_file_control(struct inode *inode, bool inc)
> static int f2fs_ioc_set_pin_file(struct file *filp, unsigned long arg)
> {
> struct inode *inode = file_inode(filp);
> + struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
> __u32 pin;
> int ret = 0;
>
> @@ -3194,7 +3197,7 @@ static int f2fs_ioc_set_pin_file(struct file *filp, unsigned long arg)
> if (!S_ISREG(inode->i_mode))
> return -EINVAL;
>
> - if (f2fs_readonly(F2FS_I_SB(inode)->sb))
> + if (f2fs_readonly(sbi->sb))
> return -EROFS;
>
> ret = mnt_want_write_file(filp);
> @@ -3207,9 +3210,18 @@ static int f2fs_ioc_set_pin_file(struct file *filp, unsigned long arg)
> clear_inode_flag(inode, FI_PIN_FILE);
> f2fs_i_gc_failures_write(inode, 0);
> goto done;
> + } else if (f2fs_is_pinned_file(inode)) {
> + goto done;
> }
>
> - if (f2fs_should_update_outplace(inode, NULL)) {
> + if (f2fs_sb_has_blkzoned(sbi) && F2FS_HAS_BLOCKS(inode)) {
> + ret = -EFBIG;
> + goto out;
> + }
> +
> + /* Let's allow file pinning on zoned device. */
> + if (!f2fs_sb_has_blkzoned(sbi) &&
> + f2fs_should_update_outplace(inode, NULL)) {
> ret = -EINVAL;
> goto out;
> }
> @@ -3231,7 +3243,7 @@ static int f2fs_ioc_set_pin_file(struct file *filp, unsigned long arg)
> set_inode_flag(inode, FI_PIN_FILE);
> ret = F2FS_I(inode)->i_gc_failures[GC_FAILURE_PIN];
> done:
> - f2fs_update_time(F2FS_I_SB(inode), REQ_TIME);
> + f2fs_update_time(sbi, REQ_TIME);
> out:
> inode_unlock(inode);
> mnt_drop_write_file(filp);
> diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c
> index a089a938355b..3ff126316d42 100644
> --- a/fs/f2fs/gc.c
> +++ b/fs/f2fs/gc.c
> @@ -1961,10 +1961,12 @@ void f2fs_build_gc_manager(struct f2fs_sb_info *sbi)
> init_atgc_management(sbi);
> }
>
> -static int f2fs_gc_range(struct f2fs_sb_info *sbi,
> - unsigned int start_seg, unsigned int end_seg, bool dry_run)
> +int f2fs_gc_range(struct f2fs_sb_info *sbi,
> + unsigned int start_seg, unsigned int end_seg,
> + bool dry_run, unsigned int dry_run_sections)
> {
> unsigned int segno;
> + unsigned int gc_secs = dry_run_sections;
>
> for (segno = start_seg; segno <= end_seg; segno += SEGS_PER_SEC(sbi)) {
> struct gc_inode_list gc_list = {
> @@ -1972,11 +1974,15 @@ static int f2fs_gc_range(struct f2fs_sb_info *sbi,
> .iroot = RADIX_TREE_INIT(gc_list.iroot, GFP_NOFS),
> };
>
> - do_garbage_collect(sbi, segno, &gc_list, FG_GC, true);
> + do_garbage_collect(sbi, segno, &gc_list, FG_GC,
> + dry_run_sections == 0);
> put_gc_inode(&gc_list);
>
> if (!dry_run && get_valid_blocks(sbi, segno, true))
> return -EAGAIN;
> + if (dry_run && dry_run_sections &&
> + !get_valid_blocks(sbi, segno, true) && --gc_secs == 0)
> + break;
>
> if (fatal_signal_pending(current))
> return -ERESTARTSYS;
> @@ -2014,7 +2020,7 @@ static int free_segment_range(struct f2fs_sb_info *sbi,
> f2fs_allocate_segment_for_resize(sbi, type, start, end);
>
> /* do GC to move out valid blocks in the range */
> - err = f2fs_gc_range(sbi, start, end, dry_run);
> + err = f2fs_gc_range(sbi, start, end, dry_run, 0);
> if (err || dry_run)
> goto out;
>
> diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
> index 4e985750c938..0b72c8536ccf 100644
> --- a/fs/f2fs/segment.c
> +++ b/fs/f2fs/segment.c
> @@ -2632,7 +2632,7 @@ static int is_next_segment_free(struct f2fs_sb_info *sbi,
> * This function should be returned with success, otherwise BUG
> */
> static void get_new_segment(struct f2fs_sb_info *sbi,
> - unsigned int *newseg, bool new_sec)
> + unsigned int *newseg, bool new_sec, bool pinning)
> {
> struct free_segmap_info *free_i = FREE_I(sbi);
> unsigned int segno, secno, zoneno;
> @@ -2650,6 +2650,16 @@ static void get_new_segment(struct f2fs_sb_info *sbi,
> if (segno < GET_SEG_FROM_SEC(sbi, hint + 1))
> goto got_it;
> }
> +
> + /*
> + * If we format f2fs on zoned storage, let's try to get pinned sections
> + * from beginning of the storage, which should be a conventional one.
> + */
> + if (f2fs_sb_has_blkzoned(sbi)) {
> + segno = pinning ? 0 : max(first_zoned_segno(sbi), *newseg);
> + hint = GET_SEC_FROM_SEG(sbi, segno);
> + }
> +
> find_other_zone:
> secno = find_next_zero_bit(free_i->free_secmap, MAIN_SECS(sbi), hint);
> if (secno >= MAIN_SECS(sbi)) {
> @@ -2749,21 +2759,30 @@ static unsigned int __get_next_segno(struct f2fs_sb_info *sbi, int type)
> * Allocate a current working segment.
> * This function always allocates a free segment in LFS manner.
> */
> -static void new_curseg(struct f2fs_sb_info *sbi, int type, bool new_sec)
> +static int new_curseg(struct f2fs_sb_info *sbi, int type, bool new_sec)
> {
> struct curseg_info *curseg = CURSEG_I(sbi, type);
> unsigned int segno = curseg->segno;
> + bool pinning = type == CURSEG_COLD_DATA_PINNED;
>
> if (curseg->inited)
> write_sum_page(sbi, curseg->sum_blk, GET_SUM_BLOCK(sbi, segno));
> +
> segno = __get_next_segno(sbi, type);
> - get_new_segment(sbi, &segno, new_sec);
> + get_new_segment(sbi, &segno, new_sec, pinning);
> + if (new_sec && pinning &&
> + !f2fs_valid_pinned_area(sbi, START_BLOCK(sbi, segno))) {
> + __set_free(sbi, segno);
> + return -EAGAIN;
> + }
> +
> curseg->next_segno = segno;
> reset_curseg(sbi, type, 1);
> curseg->alloc_type = LFS;
> if (F2FS_OPTION(sbi).fs_mode == FS_MODE_FRAGMENT_BLK)
> curseg->fragment_remained_chunk =
> get_random_u32_inclusive(1, sbi->max_fragment_chunk);
> + return 0;
> }
>
> static int __next_free_blkoff(struct f2fs_sb_info *sbi,
> @@ -3036,7 +3055,7 @@ void f2fs_allocate_segment_for_resize(struct f2fs_sb_info *sbi, int type,
> f2fs_up_read(&SM_I(sbi)->curseg_lock);
> }
>
> -static void __allocate_new_segment(struct f2fs_sb_info *sbi, int type,
> +static int __allocate_new_segment(struct f2fs_sb_info *sbi, int type,
> bool new_sec, bool force)
> {
> struct curseg_info *curseg = CURSEG_I(sbi, type);
> @@ -3046,21 +3065,49 @@ static void __allocate_new_segment(struct f2fs_sb_info *sbi, int type,
> !curseg->next_blkoff &&
> !get_valid_blocks(sbi, curseg->segno, new_sec) &&
> !get_ckpt_valid_blocks(sbi, curseg->segno, new_sec))
> - return;
> + return 0;
>
> old_segno = curseg->segno;
> - new_curseg(sbi, type, true);
> + if (new_curseg(sbi, type, true))
> + return -EAGAIN;
> stat_inc_seg_type(sbi, curseg);
> locate_dirty_segment(sbi, old_segno);
> + return 0;
> }
>
> -void f2fs_allocate_new_section(struct f2fs_sb_info *sbi, int type, bool force)
> +int f2fs_allocate_new_section(struct f2fs_sb_info *sbi, int type, bool force)
> {
> + int ret;
> +
> f2fs_down_read(&SM_I(sbi)->curseg_lock);
> down_write(&SIT_I(sbi)->sentry_lock);
> - __allocate_new_segment(sbi, type, true, force);
> + ret = __allocate_new_segment(sbi, type, true, force);
> up_write(&SIT_I(sbi)->sentry_lock);
> f2fs_up_read(&SM_I(sbi)->curseg_lock);
> +
> + return ret;
> +}
> +
> +int f2fs_allocate_pinning_section(struct f2fs_sb_info *sbi)
> +{
> + int err;
> + bool gc_required = true;
> +
> +retry:
> + f2fs_lock_op(sbi);
> + err = f2fs_allocate_new_section(sbi, CURSEG_COLD_DATA_PINNED, false);
> + f2fs_unlock_op(sbi);
> +
> + if (f2fs_sb_has_blkzoned(sbi) && err && gc_required) {
> + f2fs_down_write(&sbi->gc_lock);
> + f2fs_gc_range(sbi, 0, GET_SEGNO(sbi, FDEV(0).end_blk), true, 1);
> + f2fs_up_write(&sbi->gc_lock);
> +
> + gc_required = false;
> + goto retry;
> + }
> +
> + return err;
> }
>
> void f2fs_allocate_new_segments(struct f2fs_sb_info *sbi)
> @@ -3426,6 +3473,10 @@ void f2fs_allocate_data_block(struct f2fs_sb_info *sbi, struct page *page,
> * new segment.
> */
> if (segment_full) {
> + if (type == CURSEG_COLD_DATA_PINNED &&
> + !((curseg->segno + 1) % sbi->segs_per_sec))
> + goto skip_new_segment;
> +
> if (from_gc) {
> get_atssr_segment(sbi, type, se->type,
> AT_SSR, se->mtime);
> @@ -3437,6 +3488,8 @@ void f2fs_allocate_data_block(struct f2fs_sb_info *sbi, struct page *page,
> stat_inc_seg_type(sbi, curseg);
> }
> }
> +
> +skip_new_segment:
> /*
> * segment dirty status should be updated after segment allocation,
> * so we just need to update status only one time after previous
> diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h
> index 60d93a16f2ac..953af072915f 100644
> --- a/fs/f2fs/segment.h
> +++ b/fs/f2fs/segment.h
> @@ -942,3 +942,13 @@ static inline void wake_up_discard_thread(struct f2fs_sb_info *sbi, bool force)
> dcc->discard_wake = true;
> wake_up_interruptible_all(&dcc->discard_wait_queue);
> }
> +
> +static inline unsigned int first_zoned_segno(struct f2fs_sb_info *sbi)
> +{
> + int devi;
> +
> + for (devi = 0; devi < sbi->s_ndevs; devi++)
> + if (bdev_is_zoned(FDEV(devi).bdev))
> + return GET_SEGNO(sbi, FDEV(devi).start_blk);
> + return 0;
> +}
next prev parent reply other threads:[~2024-02-23 3:23 UTC|newest]
Thread overview: 18+ messages / expand[flat|nested] mbox.gz Atom feed top
2024-02-13 17:38 [f2fs-dev] [PATCH v3 1/2] f2fs: separate f2fs_gc_range() to use GC for a range Daeho Jeong
2024-02-13 17:38 ` Daeho Jeong
2024-02-13 17:38 ` [f2fs-dev] [PATCH v3 2/2] f2fs: support file pinning for zoned devices Daeho Jeong
2024-02-13 17:38 ` Daeho Jeong
2024-02-23 3:23 ` Chao Yu [this message]
2024-02-23 3:23 ` [f2fs-dev] " Chao Yu
2024-02-23 3:43 ` Chao Yu
2024-02-23 3:43 ` Chao Yu
2024-02-23 3:52 ` Chao Yu
2024-02-23 3:52 ` Chao Yu
2024-02-23 17:31 ` Jaegeuk Kim
2024-02-23 17:31 ` Jaegeuk Kim
2024-02-25 6:30 ` Chao Yu
2024-02-25 6:30 ` Chao Yu
2024-02-20 8:37 ` [f2fs-dev] [PATCH v3 1/2] f2fs: separate f2fs_gc_range() to use GC for a range Chao Yu
2024-02-20 8:37 ` Chao Yu
2024-02-21 18:10 ` patchwork-bot+f2fs
2024-02-21 18:10 ` patchwork-bot+f2fs
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=cdf8048e-e37e-4ab7-b6b5-b758ae2dfef4@kernel.org \
--to=chao@kernel.org \
--cc=daeho43@gmail.com \
--cc=daehojeong@google.com \
--cc=jaegeuk@kernel.org \
--cc=kernel-team@android.com \
--cc=linux-f2fs-devel@lists.sourceforge.net \
--cc=linux-kernel@vger.kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.