linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH 1/2] f2fs: register inodes which is able to donate pages
@ 2025-01-13 18:39 Jaegeuk Kim
  2025-01-13 18:39 ` [PATCH 2/2] f2fs: add a sysfs entry to request donate file-backed pages Jaegeuk Kim
                   ` (3 more replies)
  0 siblings, 4 replies; 12+ messages in thread
From: Jaegeuk Kim @ 2025-01-13 18:39 UTC (permalink / raw)
  To: linux-kernel, linux-f2fs-devel; +Cc: Jaegeuk Kim

This patch introduces an inode list to keep the page cache ranges that users
can donate pages together.

 #define F2FS_IOC_DONATE_RANGE		_IOW(F2FS_IOCTL_MAGIC, 27,	\
						struct f2fs_donate_range)
 struct f2fs_donate_range {
	__u64 start;
	__u64 len;
 };

e.g., ioctl(F2FS_IOC_DONATE_RANGE, &range);

Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 fs/f2fs/debug.c           |  3 +++
 fs/f2fs/f2fs.h            |  9 +++++++-
 fs/f2fs/file.c            | 48 +++++++++++++++++++++++++++++++++++++++
 fs/f2fs/inode.c           | 14 ++++++++++++
 fs/f2fs/super.c           |  1 +
 include/uapi/linux/f2fs.h |  7 ++++++
 6 files changed, 81 insertions(+), 1 deletion(-)

diff --git a/fs/f2fs/debug.c b/fs/f2fs/debug.c
index 468828288a4a..1b099c123670 100644
--- a/fs/f2fs/debug.c
+++ b/fs/f2fs/debug.c
@@ -164,6 +164,7 @@ static void update_general_status(struct f2fs_sb_info *sbi)
 	si->ndirty_imeta = get_pages(sbi, F2FS_DIRTY_IMETA);
 	si->ndirty_dirs = sbi->ndirty_inode[DIR_INODE];
 	si->ndirty_files = sbi->ndirty_inode[FILE_INODE];
+	si->ndonate_files = sbi->ndirty_inode[DONATE_INODE];
 	si->nquota_files = sbi->nquota_files;
 	si->ndirty_all = sbi->ndirty_inode[DIRTY_META];
 	si->aw_cnt = atomic_read(&sbi->atomic_files);
@@ -501,6 +502,8 @@ static int stat_show(struct seq_file *s, void *v)
 			   si->compr_inode, si->compr_blocks);
 		seq_printf(s, "  - Swapfile Inode: %u\n",
 			   si->swapfile_inode);
+		seq_printf(s, "  - Donate Inode: %d\n",
+			   si->ndonate_files);
 		seq_printf(s, "  - Orphan/Append/Update Inode: %u, %u, %u\n",
 			   si->orphans, si->append, si->update);
 		seq_printf(s, "\nMain area: %d segs, %d secs %d zones\n",
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index 4bfe162eefd3..7ce3e3eab17a 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -850,6 +850,11 @@ struct f2fs_inode_info {
 #endif
 	struct list_head dirty_list;	/* dirty list for dirs and files */
 	struct list_head gdirty_list;	/* linked in global dirty list */
+
+	/* linked in global inode list for cache donation */
+	struct list_head gdonate_list;
+	loff_t donate_start, donate_end; /* inclusive */
+
 	struct task_struct *atomic_write_task;	/* store atomic write task */
 	struct extent_tree *extent_tree[NR_EXTENT_CACHES];
 					/* cached extent_tree entry */
@@ -1274,6 +1279,7 @@ enum inode_type {
 	DIR_INODE,			/* for dirty dir inode */
 	FILE_INODE,			/* for dirty regular/symlink inode */
 	DIRTY_META,			/* for all dirtied inode metadata */
+	DONATE_INODE,			/* for all inode to donate pages */
 	NR_INODE_TYPE,
 };
 
@@ -3984,7 +3990,8 @@ struct f2fs_stat_info {
 	unsigned long long allocated_data_blocks;
 	int ndirty_node, ndirty_dent, ndirty_meta, ndirty_imeta;
 	int ndirty_data, ndirty_qdata;
-	unsigned int ndirty_dirs, ndirty_files, nquota_files, ndirty_all;
+	unsigned int ndirty_dirs, ndirty_files, ndirty_all;
+	unsigned int nquota_files, ndonate_files;
 	int nats, dirty_nats, sits, dirty_sits;
 	int free_nids, avail_nids, alloc_nids;
 	int total_count, utilization;
diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
index 9980d17ef9f5..d6dea6258c2d 100644
--- a/fs/f2fs/file.c
+++ b/fs/f2fs/file.c
@@ -2493,6 +2493,51 @@ static int f2fs_ioc_shutdown(struct file *filp, unsigned long arg)
 	return ret;
 }
 
+static int f2fs_ioc_donate_range(struct file *filp, unsigned long arg)
+{
+	struct inode *inode = file_inode(filp);
+	struct mnt_idmap *idmap = file_mnt_idmap(filp);
+	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
+	struct f2fs_donate_range range;
+	int ret;
+
+	if (copy_from_user(&range, (struct f2fs_donate_range __user *)arg,
+							sizeof(range)))
+		return -EFAULT;
+
+	if (!inode_owner_or_capable(idmap, inode))
+		return -EACCES;
+
+	if (!S_ISREG(inode->i_mode))
+		return -EINVAL;
+
+	ret = mnt_want_write_file(filp);
+	if (ret)
+		return ret;
+
+	inode_lock(inode);
+
+	if (f2fs_is_atomic_file(inode))
+		goto out;
+
+	spin_lock(&sbi->inode_lock[DONATE_INODE]);
+	if (list_empty(&F2FS_I(inode)->gdonate_list)) {
+		list_add_tail(&F2FS_I(inode)->gdonate_list,
+				&sbi->inode_list[DONATE_INODE]);
+		stat_inc_dirty_inode(sbi, DONATE_INODE);
+	} else {
+		list_move_tail(&F2FS_I(inode)->gdonate_list,
+				&sbi->inode_list[DONATE_INODE]);
+	}
+	F2FS_I(inode)->donate_start = range.start;
+	F2FS_I(inode)->donate_end = range.start + range.len - 1;
+	spin_unlock(&sbi->inode_lock[DONATE_INODE]);
+out:
+	inode_unlock(inode);
+	mnt_drop_write_file(filp);
+	return ret;
+}
+
 static int f2fs_ioc_fitrim(struct file *filp, unsigned long arg)
 {
 	struct inode *inode = file_inode(filp);
@@ -4522,6 +4567,8 @@ static long __f2fs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
 		return -EOPNOTSUPP;
 	case F2FS_IOC_SHUTDOWN:
 		return f2fs_ioc_shutdown(filp, arg);
+	case F2FS_IOC_DONATE_RANGE:
+		return f2fs_ioc_donate_range(filp, arg);
 	case FITRIM:
 		return f2fs_ioc_fitrim(filp, arg);
 	case FS_IOC_SET_ENCRYPTION_POLICY:
@@ -5273,6 +5320,7 @@ long f2fs_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
 	case F2FS_IOC_RELEASE_VOLATILE_WRITE:
 	case F2FS_IOC_ABORT_ATOMIC_WRITE:
 	case F2FS_IOC_SHUTDOWN:
+	case F2FS_IOC_DONATE_RANGE:
 	case FITRIM:
 	case FS_IOC_SET_ENCRYPTION_POLICY:
 	case FS_IOC_GET_ENCRYPTION_PWSALT:
diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c
index 7de33da8b3ea..e38dc5fe2f2e 100644
--- a/fs/f2fs/inode.c
+++ b/fs/f2fs/inode.c
@@ -804,6 +804,19 @@ int f2fs_write_inode(struct inode *inode, struct writeback_control *wbc)
 	return 0;
 }
 
+static void f2fs_remove_donate_inode(struct inode *inode)
+{
+	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
+
+	if (list_empty(&F2FS_I(inode)->gdonate_list))
+		return;
+
+	spin_lock(&sbi->inode_lock[DONATE_INODE]);
+	list_del_init(&F2FS_I(inode)->gdonate_list);
+	stat_dec_dirty_inode(sbi, DONATE_INODE);
+	spin_unlock(&sbi->inode_lock[DONATE_INODE]);
+}
+
 /*
  * Called at the last iput() if i_nlink is zero
  */
@@ -838,6 +851,7 @@ void f2fs_evict_inode(struct inode *inode)
 
 	f2fs_bug_on(sbi, get_dirty_pages(inode));
 	f2fs_remove_dirty_inode(inode);
+	f2fs_remove_donate_inode(inode);
 
 	if (!IS_DEVICE_ALIASING(inode))
 		f2fs_destroy_extent_tree(inode);
diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
index fc7d463dee15..ef639a6d82e5 100644
--- a/fs/f2fs/super.c
+++ b/fs/f2fs/super.c
@@ -1441,6 +1441,7 @@ static struct inode *f2fs_alloc_inode(struct super_block *sb)
 	spin_lock_init(&fi->i_size_lock);
 	INIT_LIST_HEAD(&fi->dirty_list);
 	INIT_LIST_HEAD(&fi->gdirty_list);
+	INIT_LIST_HEAD(&fi->gdonate_list);
 	init_f2fs_rwsem(&fi->i_gc_rwsem[READ]);
 	init_f2fs_rwsem(&fi->i_gc_rwsem[WRITE]);
 	init_f2fs_rwsem(&fi->i_xattr_sem);
diff --git a/include/uapi/linux/f2fs.h b/include/uapi/linux/f2fs.h
index f7aaf8d23e20..cd38a7c166e6 100644
--- a/include/uapi/linux/f2fs.h
+++ b/include/uapi/linux/f2fs.h
@@ -44,6 +44,8 @@
 #define F2FS_IOC_COMPRESS_FILE		_IO(F2FS_IOCTL_MAGIC, 24)
 #define F2FS_IOC_START_ATOMIC_REPLACE	_IO(F2FS_IOCTL_MAGIC, 25)
 #define F2FS_IOC_GET_DEV_ALIAS_FILE	_IOR(F2FS_IOCTL_MAGIC, 26, __u32)
+#define F2FS_IOC_DONATE_RANGE		_IOW(F2FS_IOCTL_MAGIC, 27,	\
+						struct f2fs_donate_range)
 
 /*
  * should be same as XFS_IOC_GOINGDOWN.
@@ -97,4 +99,9 @@ struct f2fs_comp_option {
 	__u8 log_cluster_size;
 };
 
+struct f2fs_donate_range {
+	__u64 start;
+	__u64 len;
+};
+
 #endif /* _UAPI_LINUX_F2FS_H */
-- 
2.47.1.688.g23fc6f90ad-goog


^ permalink raw reply related	[flat|nested] 12+ messages in thread

* [PATCH 2/2] f2fs: add a sysfs entry to request donate file-backed pages
  2025-01-13 18:39 [PATCH 1/2] f2fs: register inodes which is able to donate pages Jaegeuk Kim
@ 2025-01-13 18:39 ` Jaegeuk Kim
  2025-01-14  7:34   ` [f2fs-dev] " Chao Yu
  2025-01-14 20:50   ` [PATCH 2/2 v2] " Jaegeuk Kim
  2025-01-14  6:34 ` [f2fs-dev] [PATCH 1/2] f2fs: register inodes which is able to donate pages Chao Yu
                   ` (2 subsequent siblings)
  3 siblings, 2 replies; 12+ messages in thread
From: Jaegeuk Kim @ 2025-01-13 18:39 UTC (permalink / raw)
  To: linux-kernel, linux-f2fs-devel; +Cc: Jaegeuk Kim

1. ioctl(fd1, F2FS_IOC_DONATE_RANGE, {0,3});
2. ioctl(fd2, F2FS_IOC_DONATE_RANGE, {1,2});
3. ioctl(fd3, F2FS_IOC_DONATE_RANGE, {3,1});
4. echo 3 > /sys/fs/f2fs/blk/donate_caches

will reclaim 3 page cache ranges, registered by #1, #2, and #3.

Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 Documentation/ABI/testing/sysfs-fs-f2fs |  7 +++++++
 fs/f2fs/f2fs.h                          |  4 ++++
 fs/f2fs/shrinker.c                      | 27 +++++++++++++++++++++++++
 fs/f2fs/sysfs.c                         |  8 ++++++++
 4 files changed, 46 insertions(+)

diff --git a/Documentation/ABI/testing/sysfs-fs-f2fs b/Documentation/ABI/testing/sysfs-fs-f2fs
index 3e1630c70d8a..6f9d8b8889fd 100644
--- a/Documentation/ABI/testing/sysfs-fs-f2fs
+++ b/Documentation/ABI/testing/sysfs-fs-f2fs
@@ -828,3 +828,10 @@ Date:		November 2024
 Contact:	"Chao Yu" <chao@kernel.org>
 Description:	It controls max read extent count for per-inode, the value of threshold
 		is 10240 by default.
+
+What:		/sys/fs/f2fs/<disk>/donate_caches
+Date:		December 2024
+Contact:	"Jaegeuk Kim" <jaegeuk@kernel.org>
+Description:	It reclaims the certian file-backed pages registered by
+		ioctl(F2FS_IOC_DONATE_RANGE).
+		For example, writing N tries to drop N address spaces in LRU.
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index 7ce3e3eab17a..6c434ae94cb1 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -1635,6 +1635,9 @@ struct f2fs_sb_info {
 	unsigned int warm_data_age_threshold;
 	unsigned int last_age_weight;
 
+	/* control donate caches */
+	unsigned int donate_caches;
+
 	/* basic filesystem units */
 	unsigned int log_sectors_per_block;	/* log2 sectors per block */
 	unsigned int log_blocksize;		/* log2 block size */
@@ -4256,6 +4259,7 @@ unsigned long f2fs_shrink_count(struct shrinker *shrink,
 			struct shrink_control *sc);
 unsigned long f2fs_shrink_scan(struct shrinker *shrink,
 			struct shrink_control *sc);
+void f2fs_donate_caches(struct f2fs_sb_info *sbi);
 void f2fs_join_shrinker(struct f2fs_sb_info *sbi);
 void f2fs_leave_shrinker(struct f2fs_sb_info *sbi);
 
diff --git a/fs/f2fs/shrinker.c b/fs/f2fs/shrinker.c
index 83d6fb97dcae..a3e2063392a7 100644
--- a/fs/f2fs/shrinker.c
+++ b/fs/f2fs/shrinker.c
@@ -130,6 +130,33 @@ unsigned long f2fs_shrink_scan(struct shrinker *shrink,
 	return freed;
 }
 
+void f2fs_donate_caches(struct f2fs_sb_info *sbi)
+{
+	struct inode *inode = NULL;
+	struct f2fs_inode_info *fi;
+	int nfiles = sbi->donate_caches;
+next:
+	spin_lock(&sbi->inode_lock[DONATE_INODE]);
+	if (list_empty(&sbi->inode_list[DONATE_INODE]) || !nfiles) {
+		spin_unlock(&sbi->inode_lock[DONATE_INODE]);
+		return;
+	}
+
+	fi = list_first_entry(&sbi->inode_list[DONATE_INODE],
+				struct f2fs_inode_info, gdonate_list);
+	list_move_tail(&fi->gdonate_list, &sbi->inode_list[DONATE_INODE]);
+	inode = igrab(&fi->vfs_inode);
+	spin_unlock(&sbi->inode_lock[DONATE_INODE]);
+
+	if (inode) {
+		invalidate_inode_pages2_range(inode->i_mapping,
+			fi->donate_start, fi->donate_end);
+		iput(inode);
+	}
+	if (nfiles--)
+		goto next;
+}
+
 void f2fs_join_shrinker(struct f2fs_sb_info *sbi)
 {
 	spin_lock(&f2fs_list_lock);
diff --git a/fs/f2fs/sysfs.c b/fs/f2fs/sysfs.c
index 6b99dc49f776..7570580ec3c0 100644
--- a/fs/f2fs/sysfs.c
+++ b/fs/f2fs/sysfs.c
@@ -811,6 +811,12 @@ static ssize_t __sbi_store(struct f2fs_attr *a,
 		return count;
 	}
 
+	if (!strcmp(a->attr.name, "donate_caches")) {
+		sbi->donate_caches = min(t, sbi->ndirty_inode[DONATE_INODE]);
+		f2fs_donate_caches(sbi);
+		return count;
+	}
+
 	*ui = (unsigned int)t;
 
 	return count;
@@ -1030,6 +1036,7 @@ F2FS_SBI_GENERAL_RW_ATTR(max_victim_search);
 F2FS_SBI_GENERAL_RW_ATTR(migration_granularity);
 F2FS_SBI_GENERAL_RW_ATTR(migration_window_granularity);
 F2FS_SBI_GENERAL_RW_ATTR(dir_level);
+F2FS_SBI_GENERAL_RW_ATTR(donate_caches);
 #ifdef CONFIG_F2FS_IOSTAT
 F2FS_SBI_GENERAL_RW_ATTR(iostat_enable);
 F2FS_SBI_GENERAL_RW_ATTR(iostat_period_ms);
@@ -1178,6 +1185,7 @@ static struct attribute *f2fs_attrs[] = {
 	ATTR_LIST(migration_granularity),
 	ATTR_LIST(migration_window_granularity),
 	ATTR_LIST(dir_level),
+	ATTR_LIST(donate_caches),
 	ATTR_LIST(ram_thresh),
 	ATTR_LIST(ra_nid_pages),
 	ATTR_LIST(dirty_nats_ratio),
-- 
2.47.1.688.g23fc6f90ad-goog


^ permalink raw reply related	[flat|nested] 12+ messages in thread

* Re: [f2fs-dev] [PATCH 1/2] f2fs: register inodes which is able to donate pages
  2025-01-13 18:39 [PATCH 1/2] f2fs: register inodes which is able to donate pages Jaegeuk Kim
  2025-01-13 18:39 ` [PATCH 2/2] f2fs: add a sysfs entry to request donate file-backed pages Jaegeuk Kim
@ 2025-01-14  6:34 ` Chao Yu
  2025-01-14 17:15   ` Jaegeuk Kim
  2025-01-14 17:20 ` [PATCH 1/2 v2] " Jaegeuk Kim
  2025-01-14 21:16 ` [PATCH 1/2] " Eric Biggers
  3 siblings, 1 reply; 12+ messages in thread
From: Chao Yu @ 2025-01-14  6:34 UTC (permalink / raw)
  To: Jaegeuk Kim, linux-kernel, linux-f2fs-devel; +Cc: chao

On 1/14/25 02:39, Jaegeuk Kim via Linux-f2fs-devel wrote:
> This patch introduces an inode list to keep the page cache ranges that users
> can donate pages together.
> 
>   #define F2FS_IOC_DONATE_RANGE		_IOW(F2FS_IOCTL_MAGIC, 27,	\
> 						struct f2fs_donate_range)
>   struct f2fs_donate_range {
> 	__u64 start;
> 	__u64 len;
>   };
> 
> e.g., ioctl(F2FS_IOC_DONATE_RANGE, &range);

I guess we need to add documentation for all ioctls including this one, maybe
later? :)

> 
> Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
> ---
>   fs/f2fs/debug.c           |  3 +++
>   fs/f2fs/f2fs.h            |  9 +++++++-
>   fs/f2fs/file.c            | 48 +++++++++++++++++++++++++++++++++++++++
>   fs/f2fs/inode.c           | 14 ++++++++++++
>   fs/f2fs/super.c           |  1 +
>   include/uapi/linux/f2fs.h |  7 ++++++
>   6 files changed, 81 insertions(+), 1 deletion(-)
> 
> diff --git a/fs/f2fs/debug.c b/fs/f2fs/debug.c
> index 468828288a4a..1b099c123670 100644
> --- a/fs/f2fs/debug.c
> +++ b/fs/f2fs/debug.c
> @@ -164,6 +164,7 @@ static void update_general_status(struct f2fs_sb_info *sbi)
>   	si->ndirty_imeta = get_pages(sbi, F2FS_DIRTY_IMETA);
>   	si->ndirty_dirs = sbi->ndirty_inode[DIR_INODE];
>   	si->ndirty_files = sbi->ndirty_inode[FILE_INODE];
> +	si->ndonate_files = sbi->ndirty_inode[DONATE_INODE];
>   	si->nquota_files = sbi->nquota_files;
>   	si->ndirty_all = sbi->ndirty_inode[DIRTY_META];
>   	si->aw_cnt = atomic_read(&sbi->atomic_files);
> @@ -501,6 +502,8 @@ static int stat_show(struct seq_file *s, void *v)
>   			   si->compr_inode, si->compr_blocks);
>   		seq_printf(s, "  - Swapfile Inode: %u\n",
>   			   si->swapfile_inode);
> +		seq_printf(s, "  - Donate Inode: %d\n",

%u instead of %d due to si->ndonate_files is type of unsigned int.

> +			   si->ndonate_files);
>   		seq_printf(s, "  - Orphan/Append/Update Inode: %u, %u, %u\n",
>   			   si->orphans, si->append, si->update);
>   		seq_printf(s, "\nMain area: %d segs, %d secs %d zones\n",
> diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
> index 4bfe162eefd3..7ce3e3eab17a 100644
> --- a/fs/f2fs/f2fs.h
> +++ b/fs/f2fs/f2fs.h
> @@ -850,6 +850,11 @@ struct f2fs_inode_info {
>   #endif
>   	struct list_head dirty_list;	/* dirty list for dirs and files */
>   	struct list_head gdirty_list;	/* linked in global dirty list */
> +
> +	/* linked in global inode list for cache donation */
> +	struct list_head gdonate_list;
> +	loff_t donate_start, donate_end; /* inclusive */
> +
>   	struct task_struct *atomic_write_task;	/* store atomic write task */
>   	struct extent_tree *extent_tree[NR_EXTENT_CACHES];
>   					/* cached extent_tree entry */
> @@ -1274,6 +1279,7 @@ enum inode_type {
>   	DIR_INODE,			/* for dirty dir inode */
>   	FILE_INODE,			/* for dirty regular/symlink inode */
>   	DIRTY_META,			/* for all dirtied inode metadata */
> +	DONATE_INODE,			/* for all inode to donate pages */
>   	NR_INODE_TYPE,
>   };
>   
> @@ -3984,7 +3990,8 @@ struct f2fs_stat_info {
>   	unsigned long long allocated_data_blocks;
>   	int ndirty_node, ndirty_dent, ndirty_meta, ndirty_imeta;
>   	int ndirty_data, ndirty_qdata;
> -	unsigned int ndirty_dirs, ndirty_files, nquota_files, ndirty_all;
> +	unsigned int ndirty_dirs, ndirty_files, ndirty_all;
> +	unsigned int nquota_files, ndonate_files;
>   	int nats, dirty_nats, sits, dirty_sits;
>   	int free_nids, avail_nids, alloc_nids;
>   	int total_count, utilization;
> diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
> index 9980d17ef9f5..d6dea6258c2d 100644
> --- a/fs/f2fs/file.c
> +++ b/fs/f2fs/file.c
> @@ -2493,6 +2493,51 @@ static int f2fs_ioc_shutdown(struct file *filp, unsigned long arg)
>   	return ret;
>   }
>   
> +static int f2fs_ioc_donate_range(struct file *filp, unsigned long arg)
> +{
> +	struct inode *inode = file_inode(filp);
> +	struct mnt_idmap *idmap = file_mnt_idmap(filp);
> +	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
> +	struct f2fs_donate_range range;
> +	int ret;
> +
> +	if (copy_from_user(&range, (struct f2fs_donate_range __user *)arg,
> +							sizeof(range)))
> +		return -EFAULT;

What about doing sanity check on donate range here? in order to avoid overflow
during fi->donate_end calculation.

F2FS_I(inode)->donate_end = range.start + range.len - 1;

> +
> +	if (!inode_owner_or_capable(idmap, inode))
> +		return -EACCES;
> +
> +	if (!S_ISREG(inode->i_mode))
> +		return -EINVAL;
> +
> +	ret = mnt_want_write_file(filp);
> +	if (ret)
> +		return ret;
> +
> +	inode_lock(inode);
> +
> +	if (f2fs_is_atomic_file(inode))
> +		goto out;
> +
> +	spin_lock(&sbi->inode_lock[DONATE_INODE]);
> +	if (list_empty(&F2FS_I(inode)->gdonate_list)) {
> +		list_add_tail(&F2FS_I(inode)->gdonate_list,
> +				&sbi->inode_list[DONATE_INODE]);
> +		stat_inc_dirty_inode(sbi, DONATE_INODE);
> +	} else {
> +		list_move_tail(&F2FS_I(inode)->gdonate_list,
> +				&sbi->inode_list[DONATE_INODE]);
> +	}
> +	F2FS_I(inode)->donate_start = range.start;
> +	F2FS_I(inode)->donate_end = range.start + range.len - 1;
> +	spin_unlock(&sbi->inode_lock[DONATE_INODE]);
> +out:
> +	inode_unlock(inode);
> +	mnt_drop_write_file(filp);
> +	return ret;
> +}
> +
>   static int f2fs_ioc_fitrim(struct file *filp, unsigned long arg)
>   {
>   	struct inode *inode = file_inode(filp);
> @@ -4522,6 +4567,8 @@ static long __f2fs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
>   		return -EOPNOTSUPP;
>   	case F2FS_IOC_SHUTDOWN:
>   		return f2fs_ioc_shutdown(filp, arg);
> +	case F2FS_IOC_DONATE_RANGE:
> +		return f2fs_ioc_donate_range(filp, arg);
>   	case FITRIM:
>   		return f2fs_ioc_fitrim(filp, arg);
>   	case FS_IOC_SET_ENCRYPTION_POLICY:
> @@ -5273,6 +5320,7 @@ long f2fs_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
>   	case F2FS_IOC_RELEASE_VOLATILE_WRITE:
>   	case F2FS_IOC_ABORT_ATOMIC_WRITE:
>   	case F2FS_IOC_SHUTDOWN:
> +	case F2FS_IOC_DONATE_RANGE:
>   	case FITRIM:
>   	case FS_IOC_SET_ENCRYPTION_POLICY:
>   	case FS_IOC_GET_ENCRYPTION_PWSALT:
> diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c
> index 7de33da8b3ea..e38dc5fe2f2e 100644
> --- a/fs/f2fs/inode.c
> +++ b/fs/f2fs/inode.c
> @@ -804,6 +804,19 @@ int f2fs_write_inode(struct inode *inode, struct writeback_control *wbc)
>   	return 0;
>   }
>   
> +static void f2fs_remove_donate_inode(struct inode *inode)
> +{
> +	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
> +
> +	if (list_empty(&F2FS_I(inode)->gdonate_list))

It will be more safe to access gdonate_list w/ inode_lock[DONATE_INODE]?

Thanks,

> +		return;
> +
> +	spin_lock(&sbi->inode_lock[DONATE_INODE]);
> +	list_del_init(&F2FS_I(inode)->gdonate_list);
> +	stat_dec_dirty_inode(sbi, DONATE_INODE);
> +	spin_unlock(&sbi->inode_lock[DONATE_INODE]);
> +}
> +
>   /*
>    * Called at the last iput() if i_nlink is zero
>    */
> @@ -838,6 +851,7 @@ void f2fs_evict_inode(struct inode *inode)
>   
>   	f2fs_bug_on(sbi, get_dirty_pages(inode));
>   	f2fs_remove_dirty_inode(inode);
> +	f2fs_remove_donate_inode(inode);
>   
>   	if (!IS_DEVICE_ALIASING(inode))
>   		f2fs_destroy_extent_tree(inode);
> diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
> index fc7d463dee15..ef639a6d82e5 100644
> --- a/fs/f2fs/super.c
> +++ b/fs/f2fs/super.c
> @@ -1441,6 +1441,7 @@ static struct inode *f2fs_alloc_inode(struct super_block *sb)
>   	spin_lock_init(&fi->i_size_lock);
>   	INIT_LIST_HEAD(&fi->dirty_list);
>   	INIT_LIST_HEAD(&fi->gdirty_list);
> +	INIT_LIST_HEAD(&fi->gdonate_list);
>   	init_f2fs_rwsem(&fi->i_gc_rwsem[READ]);
>   	init_f2fs_rwsem(&fi->i_gc_rwsem[WRITE]);
>   	init_f2fs_rwsem(&fi->i_xattr_sem);
> diff --git a/include/uapi/linux/f2fs.h b/include/uapi/linux/f2fs.h
> index f7aaf8d23e20..cd38a7c166e6 100644
> --- a/include/uapi/linux/f2fs.h
> +++ b/include/uapi/linux/f2fs.h
> @@ -44,6 +44,8 @@
>   #define F2FS_IOC_COMPRESS_FILE		_IO(F2FS_IOCTL_MAGIC, 24)
>   #define F2FS_IOC_START_ATOMIC_REPLACE	_IO(F2FS_IOCTL_MAGIC, 25)
>   #define F2FS_IOC_GET_DEV_ALIAS_FILE	_IOR(F2FS_IOCTL_MAGIC, 26, __u32)
> +#define F2FS_IOC_DONATE_RANGE		_IOW(F2FS_IOCTL_MAGIC, 27,	\
> +						struct f2fs_donate_range)
>   
>   /*
>    * should be same as XFS_IOC_GOINGDOWN.
> @@ -97,4 +99,9 @@ struct f2fs_comp_option {
>   	__u8 log_cluster_size;
>   };
>   
> +struct f2fs_donate_range {
> +	__u64 start;
> +	__u64 len;
> +};
> +
>   #endif /* _UAPI_LINUX_F2FS_H */


^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [f2fs-dev] [PATCH 2/2] f2fs: add a sysfs entry to request donate file-backed pages
  2025-01-13 18:39 ` [PATCH 2/2] f2fs: add a sysfs entry to request donate file-backed pages Jaegeuk Kim
@ 2025-01-14  7:34   ` Chao Yu
  2025-01-14 17:18     ` Jaegeuk Kim
  2025-01-14 20:50   ` [PATCH 2/2 v2] " Jaegeuk Kim
  1 sibling, 1 reply; 12+ messages in thread
From: Chao Yu @ 2025-01-14  7:34 UTC (permalink / raw)
  To: Jaegeuk Kim, linux-kernel, linux-f2fs-devel; +Cc: chao

On 1/14/25 02:39, Jaegeuk Kim via Linux-f2fs-devel wrote:
> 1. ioctl(fd1, F2FS_IOC_DONATE_RANGE, {0,3});
> 2. ioctl(fd2, F2FS_IOC_DONATE_RANGE, {1,2});
> 3. ioctl(fd3, F2FS_IOC_DONATE_RANGE, {3,1});
> 4. echo 3 > /sys/fs/f2fs/blk/donate_caches
> 
> will reclaim 3 page cache ranges, registered by #1, #2, and #3.
> 
> Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
> ---
>   Documentation/ABI/testing/sysfs-fs-f2fs |  7 +++++++
>   fs/f2fs/f2fs.h                          |  4 ++++
>   fs/f2fs/shrinker.c                      | 27 +++++++++++++++++++++++++
>   fs/f2fs/sysfs.c                         |  8 ++++++++
>   4 files changed, 46 insertions(+)
> 
> diff --git a/Documentation/ABI/testing/sysfs-fs-f2fs b/Documentation/ABI/testing/sysfs-fs-f2fs
> index 3e1630c70d8a..6f9d8b8889fd 100644
> --- a/Documentation/ABI/testing/sysfs-fs-f2fs
> +++ b/Documentation/ABI/testing/sysfs-fs-f2fs
> @@ -828,3 +828,10 @@ Date:		November 2024
>   Contact:	"Chao Yu" <chao@kernel.org>
>   Description:	It controls max read extent count for per-inode, the value of threshold
>   		is 10240 by default.
> +
> +What:		/sys/fs/f2fs/<disk>/donate_caches
> +Date:		December 2024
> +Contact:	"Jaegeuk Kim" <jaegeuk@kernel.org>
> +Description:	It reclaims the certian file-backed pages registered by
> +		ioctl(F2FS_IOC_DONATE_RANGE).
> +		For example, writing N tries to drop N address spaces in LRU.
> diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
> index 7ce3e3eab17a..6c434ae94cb1 100644
> --- a/fs/f2fs/f2fs.h
> +++ b/fs/f2fs/f2fs.h
> @@ -1635,6 +1635,9 @@ struct f2fs_sb_info {
>   	unsigned int warm_data_age_threshold;
>   	unsigned int last_age_weight;
>   
> +	/* control donate caches */
> +	unsigned int donate_caches;
> +
>   	/* basic filesystem units */
>   	unsigned int log_sectors_per_block;	/* log2 sectors per block */
>   	unsigned int log_blocksize;		/* log2 block size */
> @@ -4256,6 +4259,7 @@ unsigned long f2fs_shrink_count(struct shrinker *shrink,
>   			struct shrink_control *sc);
>   unsigned long f2fs_shrink_scan(struct shrinker *shrink,
>   			struct shrink_control *sc);
> +void f2fs_donate_caches(struct f2fs_sb_info *sbi);
>   void f2fs_join_shrinker(struct f2fs_sb_info *sbi);
>   void f2fs_leave_shrinker(struct f2fs_sb_info *sbi);
>   
> diff --git a/fs/f2fs/shrinker.c b/fs/f2fs/shrinker.c
> index 83d6fb97dcae..a3e2063392a7 100644
> --- a/fs/f2fs/shrinker.c
> +++ b/fs/f2fs/shrinker.c
> @@ -130,6 +130,33 @@ unsigned long f2fs_shrink_scan(struct shrinker *shrink,
>   	return freed;
>   }
>   
> +void f2fs_donate_caches(struct f2fs_sb_info *sbi)
> +{
> +	struct inode *inode = NULL;
> +	struct f2fs_inode_info *fi;
> +	int nfiles = sbi->donate_caches;
> +next:
> +	spin_lock(&sbi->inode_lock[DONATE_INODE]);
> +	if (list_empty(&sbi->inode_list[DONATE_INODE]) || !nfiles) {
> +		spin_unlock(&sbi->inode_lock[DONATE_INODE]);
> +		return;
> +	}
> +
> +	fi = list_first_entry(&sbi->inode_list[DONATE_INODE],
> +				struct f2fs_inode_info, gdonate_list);
> +	list_move_tail(&fi->gdonate_list, &sbi->inode_list[DONATE_INODE]);

Not needed to drop it from the global list, right?

Thanks,

> +	inode = igrab(&fi->vfs_inode);
> +	spin_unlock(&sbi->inode_lock[DONATE_INODE]);
> +
> +	if (inode) {
> +		invalidate_inode_pages2_range(inode->i_mapping,
> +			fi->donate_start, fi->donate_end);
> +		iput(inode);
> +	}
> +	if (nfiles--)
> +		goto next;
> +}
> +
>   void f2fs_join_shrinker(struct f2fs_sb_info *sbi)
>   {
>   	spin_lock(&f2fs_list_lock);
> diff --git a/fs/f2fs/sysfs.c b/fs/f2fs/sysfs.c
> index 6b99dc49f776..7570580ec3c0 100644
> --- a/fs/f2fs/sysfs.c
> +++ b/fs/f2fs/sysfs.c
> @@ -811,6 +811,12 @@ static ssize_t __sbi_store(struct f2fs_attr *a,
>   		return count;
>   	}
>   
> +	if (!strcmp(a->attr.name, "donate_caches")) {
> +		sbi->donate_caches = min(t, sbi->ndirty_inode[DONATE_INODE]);
> +		f2fs_donate_caches(sbi);
> +		return count;
> +	}
> +
>   	*ui = (unsigned int)t;
>   
>   	return count;
> @@ -1030,6 +1036,7 @@ F2FS_SBI_GENERAL_RW_ATTR(max_victim_search);
>   F2FS_SBI_GENERAL_RW_ATTR(migration_granularity);
>   F2FS_SBI_GENERAL_RW_ATTR(migration_window_granularity);
>   F2FS_SBI_GENERAL_RW_ATTR(dir_level);
> +F2FS_SBI_GENERAL_RW_ATTR(donate_caches);
>   #ifdef CONFIG_F2FS_IOSTAT
>   F2FS_SBI_GENERAL_RW_ATTR(iostat_enable);
>   F2FS_SBI_GENERAL_RW_ATTR(iostat_period_ms);
> @@ -1178,6 +1185,7 @@ static struct attribute *f2fs_attrs[] = {
>   	ATTR_LIST(migration_granularity),
>   	ATTR_LIST(migration_window_granularity),
>   	ATTR_LIST(dir_level),
> +	ATTR_LIST(donate_caches),
>   	ATTR_LIST(ram_thresh),
>   	ATTR_LIST(ra_nid_pages),
>   	ATTR_LIST(dirty_nats_ratio),


^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [f2fs-dev] [PATCH 1/2] f2fs: register inodes which is able to donate pages
  2025-01-14  6:34 ` [f2fs-dev] [PATCH 1/2] f2fs: register inodes which is able to donate pages Chao Yu
@ 2025-01-14 17:15   ` Jaegeuk Kim
  2025-01-15  2:12     ` Chao Yu
  0 siblings, 1 reply; 12+ messages in thread
From: Jaegeuk Kim @ 2025-01-14 17:15 UTC (permalink / raw)
  To: Chao Yu; +Cc: linux-kernel, linux-f2fs-devel

On 01/14, Chao Yu wrote:
> On 1/14/25 02:39, Jaegeuk Kim via Linux-f2fs-devel wrote:
> > This patch introduces an inode list to keep the page cache ranges that users
> > can donate pages together.
> > 
> >   #define F2FS_IOC_DONATE_RANGE		_IOW(F2FS_IOCTL_MAGIC, 27,	\
> > 						struct f2fs_donate_range)
> >   struct f2fs_donate_range {
> > 	__u64 start;
> > 	__u64 len;
> >   };
> > 
> > e.g., ioctl(F2FS_IOC_DONATE_RANGE, &range);
> 
> I guess we need to add documentation for all ioctls including this one, maybe
> later? :)

Yeah, later.

> 
> > 
> > Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
> > ---
> >   fs/f2fs/debug.c           |  3 +++
> >   fs/f2fs/f2fs.h            |  9 +++++++-
> >   fs/f2fs/file.c            | 48 +++++++++++++++++++++++++++++++++++++++
> >   fs/f2fs/inode.c           | 14 ++++++++++++
> >   fs/f2fs/super.c           |  1 +
> >   include/uapi/linux/f2fs.h |  7 ++++++
> >   6 files changed, 81 insertions(+), 1 deletion(-)
> > 
> > diff --git a/fs/f2fs/debug.c b/fs/f2fs/debug.c
> > index 468828288a4a..1b099c123670 100644
> > --- a/fs/f2fs/debug.c
> > +++ b/fs/f2fs/debug.c
> > @@ -164,6 +164,7 @@ static void update_general_status(struct f2fs_sb_info *sbi)
> >   	si->ndirty_imeta = get_pages(sbi, F2FS_DIRTY_IMETA);
> >   	si->ndirty_dirs = sbi->ndirty_inode[DIR_INODE];
> >   	si->ndirty_files = sbi->ndirty_inode[FILE_INODE];
> > +	si->ndonate_files = sbi->ndirty_inode[DONATE_INODE];
> >   	si->nquota_files = sbi->nquota_files;
> >   	si->ndirty_all = sbi->ndirty_inode[DIRTY_META];
> >   	si->aw_cnt = atomic_read(&sbi->atomic_files);
> > @@ -501,6 +502,8 @@ static int stat_show(struct seq_file *s, void *v)
> >   			   si->compr_inode, si->compr_blocks);
> >   		seq_printf(s, "  - Swapfile Inode: %u\n",
> >   			   si->swapfile_inode);
> > +		seq_printf(s, "  - Donate Inode: %d\n",
> 
> %u instead of %d due to si->ndonate_files is type of unsigned int.
> 
> > +			   si->ndonate_files);
> >   		seq_printf(s, "  - Orphan/Append/Update Inode: %u, %u, %u\n",
> >   			   si->orphans, si->append, si->update);
> >   		seq_printf(s, "\nMain area: %d segs, %d secs %d zones\n",
> > diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
> > index 4bfe162eefd3..7ce3e3eab17a 100644
> > --- a/fs/f2fs/f2fs.h
> > +++ b/fs/f2fs/f2fs.h
> > @@ -850,6 +850,11 @@ struct f2fs_inode_info {
> >   #endif
> >   	struct list_head dirty_list;	/* dirty list for dirs and files */
> >   	struct list_head gdirty_list;	/* linked in global dirty list */
> > +
> > +	/* linked in global inode list for cache donation */
> > +	struct list_head gdonate_list;
> > +	loff_t donate_start, donate_end; /* inclusive */
> > +
> >   	struct task_struct *atomic_write_task;	/* store atomic write task */
> >   	struct extent_tree *extent_tree[NR_EXTENT_CACHES];
> >   					/* cached extent_tree entry */
> > @@ -1274,6 +1279,7 @@ enum inode_type {
> >   	DIR_INODE,			/* for dirty dir inode */
> >   	FILE_INODE,			/* for dirty regular/symlink inode */
> >   	DIRTY_META,			/* for all dirtied inode metadata */
> > +	DONATE_INODE,			/* for all inode to donate pages */
> >   	NR_INODE_TYPE,
> >   };
> > @@ -3984,7 +3990,8 @@ struct f2fs_stat_info {
> >   	unsigned long long allocated_data_blocks;
> >   	int ndirty_node, ndirty_dent, ndirty_meta, ndirty_imeta;
> >   	int ndirty_data, ndirty_qdata;
> > -	unsigned int ndirty_dirs, ndirty_files, nquota_files, ndirty_all;
> > +	unsigned int ndirty_dirs, ndirty_files, ndirty_all;
> > +	unsigned int nquota_files, ndonate_files;
> >   	int nats, dirty_nats, sits, dirty_sits;
> >   	int free_nids, avail_nids, alloc_nids;
> >   	int total_count, utilization;
> > diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
> > index 9980d17ef9f5..d6dea6258c2d 100644
> > --- a/fs/f2fs/file.c
> > +++ b/fs/f2fs/file.c
> > @@ -2493,6 +2493,51 @@ static int f2fs_ioc_shutdown(struct file *filp, unsigned long arg)
> >   	return ret;
> >   }
> > +static int f2fs_ioc_donate_range(struct file *filp, unsigned long arg)
> > +{
> > +	struct inode *inode = file_inode(filp);
> > +	struct mnt_idmap *idmap = file_mnt_idmap(filp);
> > +	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
> > +	struct f2fs_donate_range range;
> > +	int ret;
> > +
> > +	if (copy_from_user(&range, (struct f2fs_donate_range __user *)arg,
> > +							sizeof(range)))
> > +		return -EFAULT;
> 
> What about doing sanity check on donate range here? in order to avoid overflow
> during fi->donate_end calculation.
> 
> F2FS_I(inode)->donate_end = range.start + range.len - 1;
> 
> > +
> > +	if (!inode_owner_or_capable(idmap, inode))
> > +		return -EACCES;
> > +
> > +	if (!S_ISREG(inode->i_mode))
> > +		return -EINVAL;
> > +
> > +	ret = mnt_want_write_file(filp);
> > +	if (ret)
> > +		return ret;
> > +
> > +	inode_lock(inode);
> > +
> > +	if (f2fs_is_atomic_file(inode))
> > +		goto out;
> > +
> > +	spin_lock(&sbi->inode_lock[DONATE_INODE]);
> > +	if (list_empty(&F2FS_I(inode)->gdonate_list)) {
> > +		list_add_tail(&F2FS_I(inode)->gdonate_list,
> > +				&sbi->inode_list[DONATE_INODE]);
> > +		stat_inc_dirty_inode(sbi, DONATE_INODE);
> > +	} else {
> > +		list_move_tail(&F2FS_I(inode)->gdonate_list,
> > +				&sbi->inode_list[DONATE_INODE]);
> > +	}
> > +	F2FS_I(inode)->donate_start = range.start;
> > +	F2FS_I(inode)->donate_end = range.start + range.len - 1;
> > +	spin_unlock(&sbi->inode_lock[DONATE_INODE]);
> > +out:
> > +	inode_unlock(inode);
> > +	mnt_drop_write_file(filp);
> > +	return ret;
> > +}
> > +
> >   static int f2fs_ioc_fitrim(struct file *filp, unsigned long arg)
> >   {
> >   	struct inode *inode = file_inode(filp);
> > @@ -4522,6 +4567,8 @@ static long __f2fs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
> >   		return -EOPNOTSUPP;
> >   	case F2FS_IOC_SHUTDOWN:
> >   		return f2fs_ioc_shutdown(filp, arg);
> > +	case F2FS_IOC_DONATE_RANGE:
> > +		return f2fs_ioc_donate_range(filp, arg);
> >   	case FITRIM:
> >   		return f2fs_ioc_fitrim(filp, arg);
> >   	case FS_IOC_SET_ENCRYPTION_POLICY:
> > @@ -5273,6 +5320,7 @@ long f2fs_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
> >   	case F2FS_IOC_RELEASE_VOLATILE_WRITE:
> >   	case F2FS_IOC_ABORT_ATOMIC_WRITE:
> >   	case F2FS_IOC_SHUTDOWN:
> > +	case F2FS_IOC_DONATE_RANGE:
> >   	case FITRIM:
> >   	case FS_IOC_SET_ENCRYPTION_POLICY:
> >   	case FS_IOC_GET_ENCRYPTION_PWSALT:
> > diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c
> > index 7de33da8b3ea..e38dc5fe2f2e 100644
> > --- a/fs/f2fs/inode.c
> > +++ b/fs/f2fs/inode.c
> > @@ -804,6 +804,19 @@ int f2fs_write_inode(struct inode *inode, struct writeback_control *wbc)
> >   	return 0;
> >   }
> > +static void f2fs_remove_donate_inode(struct inode *inode)
> > +{
> > +	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
> > +
> > +	if (list_empty(&F2FS_I(inode)->gdonate_list))
> 
> It will be more safe to access gdonate_list w/ inode_lock[DONATE_INODE]?

It's unnecessary as this is called from evict_inode.

> 
> Thanks,
> 
> > +		return;
> > +
> > +	spin_lock(&sbi->inode_lock[DONATE_INODE]);
> > +	list_del_init(&F2FS_I(inode)->gdonate_list);
> > +	stat_dec_dirty_inode(sbi, DONATE_INODE);
> > +	spin_unlock(&sbi->inode_lock[DONATE_INODE]);
> > +}
> > +
> >   /*
> >    * Called at the last iput() if i_nlink is zero
> >    */
> > @@ -838,6 +851,7 @@ void f2fs_evict_inode(struct inode *inode)
> >   	f2fs_bug_on(sbi, get_dirty_pages(inode));
> >   	f2fs_remove_dirty_inode(inode);
> > +	f2fs_remove_donate_inode(inode);
> >   	if (!IS_DEVICE_ALIASING(inode))
> >   		f2fs_destroy_extent_tree(inode);
> > diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
> > index fc7d463dee15..ef639a6d82e5 100644
> > --- a/fs/f2fs/super.c
> > +++ b/fs/f2fs/super.c
> > @@ -1441,6 +1441,7 @@ static struct inode *f2fs_alloc_inode(struct super_block *sb)
> >   	spin_lock_init(&fi->i_size_lock);
> >   	INIT_LIST_HEAD(&fi->dirty_list);
> >   	INIT_LIST_HEAD(&fi->gdirty_list);
> > +	INIT_LIST_HEAD(&fi->gdonate_list);
> >   	init_f2fs_rwsem(&fi->i_gc_rwsem[READ]);
> >   	init_f2fs_rwsem(&fi->i_gc_rwsem[WRITE]);
> >   	init_f2fs_rwsem(&fi->i_xattr_sem);
> > diff --git a/include/uapi/linux/f2fs.h b/include/uapi/linux/f2fs.h
> > index f7aaf8d23e20..cd38a7c166e6 100644
> > --- a/include/uapi/linux/f2fs.h
> > +++ b/include/uapi/linux/f2fs.h
> > @@ -44,6 +44,8 @@
> >   #define F2FS_IOC_COMPRESS_FILE		_IO(F2FS_IOCTL_MAGIC, 24)
> >   #define F2FS_IOC_START_ATOMIC_REPLACE	_IO(F2FS_IOCTL_MAGIC, 25)
> >   #define F2FS_IOC_GET_DEV_ALIAS_FILE	_IOR(F2FS_IOCTL_MAGIC, 26, __u32)
> > +#define F2FS_IOC_DONATE_RANGE		_IOW(F2FS_IOCTL_MAGIC, 27,	\
> > +						struct f2fs_donate_range)
> >   /*
> >    * should be same as XFS_IOC_GOINGDOWN.
> > @@ -97,4 +99,9 @@ struct f2fs_comp_option {
> >   	__u8 log_cluster_size;
> >   };
> > +struct f2fs_donate_range {
> > +	__u64 start;
> > +	__u64 len;
> > +};
> > +
> >   #endif /* _UAPI_LINUX_F2FS_H */

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [f2fs-dev] [PATCH 2/2] f2fs: add a sysfs entry to request donate file-backed pages
  2025-01-14  7:34   ` [f2fs-dev] " Chao Yu
@ 2025-01-14 17:18     ` Jaegeuk Kim
  2025-01-15  2:17       ` Chao Yu
  0 siblings, 1 reply; 12+ messages in thread
From: Jaegeuk Kim @ 2025-01-14 17:18 UTC (permalink / raw)
  To: Chao Yu; +Cc: linux-kernel, linux-f2fs-devel

On 01/14, Chao Yu wrote:
> On 1/14/25 02:39, Jaegeuk Kim via Linux-f2fs-devel wrote:
> > 1. ioctl(fd1, F2FS_IOC_DONATE_RANGE, {0,3});
> > 2. ioctl(fd2, F2FS_IOC_DONATE_RANGE, {1,2});
> > 3. ioctl(fd3, F2FS_IOC_DONATE_RANGE, {3,1});
> > 4. echo 3 > /sys/fs/f2fs/blk/donate_caches
> > 
> > will reclaim 3 page cache ranges, registered by #1, #2, and #3.
> > 
> > Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
> > ---
> >   Documentation/ABI/testing/sysfs-fs-f2fs |  7 +++++++
> >   fs/f2fs/f2fs.h                          |  4 ++++
> >   fs/f2fs/shrinker.c                      | 27 +++++++++++++++++++++++++
> >   fs/f2fs/sysfs.c                         |  8 ++++++++
> >   4 files changed, 46 insertions(+)
> > 
> > diff --git a/Documentation/ABI/testing/sysfs-fs-f2fs b/Documentation/ABI/testing/sysfs-fs-f2fs
> > index 3e1630c70d8a..6f9d8b8889fd 100644
> > --- a/Documentation/ABI/testing/sysfs-fs-f2fs
> > +++ b/Documentation/ABI/testing/sysfs-fs-f2fs
> > @@ -828,3 +828,10 @@ Date:		November 2024
> >   Contact:	"Chao Yu" <chao@kernel.org>
> >   Description:	It controls max read extent count for per-inode, the value of threshold
> >   		is 10240 by default.
> > +
> > +What:		/sys/fs/f2fs/<disk>/donate_caches
> > +Date:		December 2024
> > +Contact:	"Jaegeuk Kim" <jaegeuk@kernel.org>
> > +Description:	It reclaims the certian file-backed pages registered by
> > +		ioctl(F2FS_IOC_DONATE_RANGE).
> > +		For example, writing N tries to drop N address spaces in LRU.
> > diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
> > index 7ce3e3eab17a..6c434ae94cb1 100644
> > --- a/fs/f2fs/f2fs.h
> > +++ b/fs/f2fs/f2fs.h
> > @@ -1635,6 +1635,9 @@ struct f2fs_sb_info {
> >   	unsigned int warm_data_age_threshold;
> >   	unsigned int last_age_weight;
> > +	/* control donate caches */
> > +	unsigned int donate_caches;
> > +
> >   	/* basic filesystem units */
> >   	unsigned int log_sectors_per_block;	/* log2 sectors per block */
> >   	unsigned int log_blocksize;		/* log2 block size */
> > @@ -4256,6 +4259,7 @@ unsigned long f2fs_shrink_count(struct shrinker *shrink,
> >   			struct shrink_control *sc);
> >   unsigned long f2fs_shrink_scan(struct shrinker *shrink,
> >   			struct shrink_control *sc);
> > +void f2fs_donate_caches(struct f2fs_sb_info *sbi);
> >   void f2fs_join_shrinker(struct f2fs_sb_info *sbi);
> >   void f2fs_leave_shrinker(struct f2fs_sb_info *sbi);
> > diff --git a/fs/f2fs/shrinker.c b/fs/f2fs/shrinker.c
> > index 83d6fb97dcae..a3e2063392a7 100644
> > --- a/fs/f2fs/shrinker.c
> > +++ b/fs/f2fs/shrinker.c
> > @@ -130,6 +130,33 @@ unsigned long f2fs_shrink_scan(struct shrinker *shrink,
> >   	return freed;
> >   }
> > +void f2fs_donate_caches(struct f2fs_sb_info *sbi)
> > +{
> > +	struct inode *inode = NULL;
> > +	struct f2fs_inode_info *fi;
> > +	int nfiles = sbi->donate_caches;
> > +next:
> > +	spin_lock(&sbi->inode_lock[DONATE_INODE]);
> > +	if (list_empty(&sbi->inode_list[DONATE_INODE]) || !nfiles) {
> > +		spin_unlock(&sbi->inode_lock[DONATE_INODE]);
> > +		return;
> > +	}
> > +
> > +	fi = list_first_entry(&sbi->inode_list[DONATE_INODE],
> > +				struct f2fs_inode_info, gdonate_list);
> > +	list_move_tail(&fi->gdonate_list, &sbi->inode_list[DONATE_INODE]);
> 
> Not needed to drop it from the global list, right?

Yea, there're two paths to drop: 1) waiting for evict_inode, 2) setting a new
range having len=0.

> 
> Thanks,
> 
> > +	inode = igrab(&fi->vfs_inode);
> > +	spin_unlock(&sbi->inode_lock[DONATE_INODE]);
> > +
> > +	if (inode) {
> > +		invalidate_inode_pages2_range(inode->i_mapping,
> > +			fi->donate_start, fi->donate_end);
> > +		iput(inode);
> > +	}
> > +	if (nfiles--)
> > +		goto next;
> > +}
> > +
> >   void f2fs_join_shrinker(struct f2fs_sb_info *sbi)
> >   {
> >   	spin_lock(&f2fs_list_lock);
> > diff --git a/fs/f2fs/sysfs.c b/fs/f2fs/sysfs.c
> > index 6b99dc49f776..7570580ec3c0 100644
> > --- a/fs/f2fs/sysfs.c
> > +++ b/fs/f2fs/sysfs.c
> > @@ -811,6 +811,12 @@ static ssize_t __sbi_store(struct f2fs_attr *a,
> >   		return count;
> >   	}
> > +	if (!strcmp(a->attr.name, "donate_caches")) {
> > +		sbi->donate_caches = min(t, sbi->ndirty_inode[DONATE_INODE]);
> > +		f2fs_donate_caches(sbi);
> > +		return count;
> > +	}
> > +
> >   	*ui = (unsigned int)t;
> >   	return count;
> > @@ -1030,6 +1036,7 @@ F2FS_SBI_GENERAL_RW_ATTR(max_victim_search);
> >   F2FS_SBI_GENERAL_RW_ATTR(migration_granularity);
> >   F2FS_SBI_GENERAL_RW_ATTR(migration_window_granularity);
> >   F2FS_SBI_GENERAL_RW_ATTR(dir_level);
> > +F2FS_SBI_GENERAL_RW_ATTR(donate_caches);
> >   #ifdef CONFIG_F2FS_IOSTAT
> >   F2FS_SBI_GENERAL_RW_ATTR(iostat_enable);
> >   F2FS_SBI_GENERAL_RW_ATTR(iostat_period_ms);
> > @@ -1178,6 +1185,7 @@ static struct attribute *f2fs_attrs[] = {
> >   	ATTR_LIST(migration_granularity),
> >   	ATTR_LIST(migration_window_granularity),
> >   	ATTR_LIST(dir_level),
> > +	ATTR_LIST(donate_caches),
> >   	ATTR_LIST(ram_thresh),
> >   	ATTR_LIST(ra_nid_pages),
> >   	ATTR_LIST(dirty_nats_ratio),

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH 1/2 v2] f2fs: register inodes which is able to donate pages
  2025-01-13 18:39 [PATCH 1/2] f2fs: register inodes which is able to donate pages Jaegeuk Kim
  2025-01-13 18:39 ` [PATCH 2/2] f2fs: add a sysfs entry to request donate file-backed pages Jaegeuk Kim
  2025-01-14  6:34 ` [f2fs-dev] [PATCH 1/2] f2fs: register inodes which is able to donate pages Chao Yu
@ 2025-01-14 17:20 ` Jaegeuk Kim
  2025-01-14 21:16 ` [PATCH 1/2] " Eric Biggers
  3 siblings, 0 replies; 12+ messages in thread
From: Jaegeuk Kim @ 2025-01-14 17:20 UTC (permalink / raw)
  To: linux-kernel, linux-f2fs-devel

This patch introduces an inode list to keep the page cache ranges that users
can donate pages together.

 #define F2FS_IOC_DONATE_RANGE		_IOW(F2FS_IOCTL_MAGIC, 27,	\
						struct f2fs_donate_range)
 struct f2fs_donate_range {
	__u64 start;
	__u64 len;
 };

e.g., ioctl(F2FS_IOC_DONATE_RANGE, &range);

Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---

 Change log from v1:
  - change %u print format
  - check range boundary

 fs/f2fs/debug.c           |  3 +++
 fs/f2fs/f2fs.h            |  9 ++++++-
 fs/f2fs/file.c            | 52 +++++++++++++++++++++++++++++++++++++++
 fs/f2fs/inode.c           | 14 +++++++++++
 fs/f2fs/super.c           |  1 +
 include/uapi/linux/f2fs.h |  7 ++++++
 6 files changed, 85 insertions(+), 1 deletion(-)

diff --git a/fs/f2fs/debug.c b/fs/f2fs/debug.c
index 468828288a4a..f7aea4dc9565 100644
--- a/fs/f2fs/debug.c
+++ b/fs/f2fs/debug.c
@@ -164,6 +164,7 @@ static void update_general_status(struct f2fs_sb_info *sbi)
 	si->ndirty_imeta = get_pages(sbi, F2FS_DIRTY_IMETA);
 	si->ndirty_dirs = sbi->ndirty_inode[DIR_INODE];
 	si->ndirty_files = sbi->ndirty_inode[FILE_INODE];
+	si->ndonate_files = sbi->ndirty_inode[DONATE_INODE];
 	si->nquota_files = sbi->nquota_files;
 	si->ndirty_all = sbi->ndirty_inode[DIRTY_META];
 	si->aw_cnt = atomic_read(&sbi->atomic_files);
@@ -501,6 +502,8 @@ static int stat_show(struct seq_file *s, void *v)
 			   si->compr_inode, si->compr_blocks);
 		seq_printf(s, "  - Swapfile Inode: %u\n",
 			   si->swapfile_inode);
+		seq_printf(s, "  - Donate Inode: %u\n",
+			   si->ndonate_files);
 		seq_printf(s, "  - Orphan/Append/Update Inode: %u, %u, %u\n",
 			   si->orphans, si->append, si->update);
 		seq_printf(s, "\nMain area: %d segs, %d secs %d zones\n",
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index 4bfe162eefd3..7ce3e3eab17a 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -850,6 +850,11 @@ struct f2fs_inode_info {
 #endif
 	struct list_head dirty_list;	/* dirty list for dirs and files */
 	struct list_head gdirty_list;	/* linked in global dirty list */
+
+	/* linked in global inode list for cache donation */
+	struct list_head gdonate_list;
+	loff_t donate_start, donate_end; /* inclusive */
+
 	struct task_struct *atomic_write_task;	/* store atomic write task */
 	struct extent_tree *extent_tree[NR_EXTENT_CACHES];
 					/* cached extent_tree entry */
@@ -1274,6 +1279,7 @@ enum inode_type {
 	DIR_INODE,			/* for dirty dir inode */
 	FILE_INODE,			/* for dirty regular/symlink inode */
 	DIRTY_META,			/* for all dirtied inode metadata */
+	DONATE_INODE,			/* for all inode to donate pages */
 	NR_INODE_TYPE,
 };
 
@@ -3984,7 +3990,8 @@ struct f2fs_stat_info {
 	unsigned long long allocated_data_blocks;
 	int ndirty_node, ndirty_dent, ndirty_meta, ndirty_imeta;
 	int ndirty_data, ndirty_qdata;
-	unsigned int ndirty_dirs, ndirty_files, nquota_files, ndirty_all;
+	unsigned int ndirty_dirs, ndirty_files, ndirty_all;
+	unsigned int nquota_files, ndonate_files;
 	int nats, dirty_nats, sits, dirty_sits;
 	int free_nids, avail_nids, alloc_nids;
 	int total_count, utilization;
diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
index 9980d17ef9f5..eb44999bb079 100644
--- a/fs/f2fs/file.c
+++ b/fs/f2fs/file.c
@@ -2493,6 +2493,55 @@ static int f2fs_ioc_shutdown(struct file *filp, unsigned long arg)
 	return ret;
 }
 
+static int f2fs_ioc_donate_range(struct file *filp, unsigned long arg)
+{
+	struct inode *inode = file_inode(filp);
+	struct mnt_idmap *idmap = file_mnt_idmap(filp);
+	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
+	struct f2fs_donate_range range;
+	int ret;
+
+	if (copy_from_user(&range, (struct f2fs_donate_range __user *)arg,
+							sizeof(range)))
+		return -EFAULT;
+
+	if (!inode_owner_or_capable(idmap, inode))
+		return -EACCES;
+
+	if (!S_ISREG(inode->i_mode))
+		return -EINVAL;
+
+	if (unlikely((range.start + range.len) >> PAGE_SHIFT >
+					max_file_blocks(inode)))
+		return -EINVAL;
+
+	ret = mnt_want_write_file(filp);
+	if (ret)
+		return ret;
+
+	inode_lock(inode);
+
+	if (f2fs_is_atomic_file(inode))
+		goto out;
+
+	spin_lock(&sbi->inode_lock[DONATE_INODE]);
+	if (list_empty(&F2FS_I(inode)->gdonate_list)) {
+		list_add_tail(&F2FS_I(inode)->gdonate_list,
+				&sbi->inode_list[DONATE_INODE]);
+		stat_inc_dirty_inode(sbi, DONATE_INODE);
+	} else {
+		list_move_tail(&F2FS_I(inode)->gdonate_list,
+				&sbi->inode_list[DONATE_INODE]);
+	}
+	F2FS_I(inode)->donate_start = range.start;
+	F2FS_I(inode)->donate_end = range.start + range.len - 1;
+	spin_unlock(&sbi->inode_lock[DONATE_INODE]);
+out:
+	inode_unlock(inode);
+	mnt_drop_write_file(filp);
+	return ret;
+}
+
 static int f2fs_ioc_fitrim(struct file *filp, unsigned long arg)
 {
 	struct inode *inode = file_inode(filp);
@@ -4522,6 +4571,8 @@ static long __f2fs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
 		return -EOPNOTSUPP;
 	case F2FS_IOC_SHUTDOWN:
 		return f2fs_ioc_shutdown(filp, arg);
+	case F2FS_IOC_DONATE_RANGE:
+		return f2fs_ioc_donate_range(filp, arg);
 	case FITRIM:
 		return f2fs_ioc_fitrim(filp, arg);
 	case FS_IOC_SET_ENCRYPTION_POLICY:
@@ -5273,6 +5324,7 @@ long f2fs_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
 	case F2FS_IOC_RELEASE_VOLATILE_WRITE:
 	case F2FS_IOC_ABORT_ATOMIC_WRITE:
 	case F2FS_IOC_SHUTDOWN:
+	case F2FS_IOC_DONATE_RANGE:
 	case FITRIM:
 	case FS_IOC_SET_ENCRYPTION_POLICY:
 	case FS_IOC_GET_ENCRYPTION_PWSALT:
diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c
index 7de33da8b3ea..e38dc5fe2f2e 100644
--- a/fs/f2fs/inode.c
+++ b/fs/f2fs/inode.c
@@ -804,6 +804,19 @@ int f2fs_write_inode(struct inode *inode, struct writeback_control *wbc)
 	return 0;
 }
 
+static void f2fs_remove_donate_inode(struct inode *inode)
+{
+	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
+
+	if (list_empty(&F2FS_I(inode)->gdonate_list))
+		return;
+
+	spin_lock(&sbi->inode_lock[DONATE_INODE]);
+	list_del_init(&F2FS_I(inode)->gdonate_list);
+	stat_dec_dirty_inode(sbi, DONATE_INODE);
+	spin_unlock(&sbi->inode_lock[DONATE_INODE]);
+}
+
 /*
  * Called at the last iput() if i_nlink is zero
  */
@@ -838,6 +851,7 @@ void f2fs_evict_inode(struct inode *inode)
 
 	f2fs_bug_on(sbi, get_dirty_pages(inode));
 	f2fs_remove_dirty_inode(inode);
+	f2fs_remove_donate_inode(inode);
 
 	if (!IS_DEVICE_ALIASING(inode))
 		f2fs_destroy_extent_tree(inode);
diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
index fc7d463dee15..ef639a6d82e5 100644
--- a/fs/f2fs/super.c
+++ b/fs/f2fs/super.c
@@ -1441,6 +1441,7 @@ static struct inode *f2fs_alloc_inode(struct super_block *sb)
 	spin_lock_init(&fi->i_size_lock);
 	INIT_LIST_HEAD(&fi->dirty_list);
 	INIT_LIST_HEAD(&fi->gdirty_list);
+	INIT_LIST_HEAD(&fi->gdonate_list);
 	init_f2fs_rwsem(&fi->i_gc_rwsem[READ]);
 	init_f2fs_rwsem(&fi->i_gc_rwsem[WRITE]);
 	init_f2fs_rwsem(&fi->i_xattr_sem);
diff --git a/include/uapi/linux/f2fs.h b/include/uapi/linux/f2fs.h
index f7aaf8d23e20..cd38a7c166e6 100644
--- a/include/uapi/linux/f2fs.h
+++ b/include/uapi/linux/f2fs.h
@@ -44,6 +44,8 @@
 #define F2FS_IOC_COMPRESS_FILE		_IO(F2FS_IOCTL_MAGIC, 24)
 #define F2FS_IOC_START_ATOMIC_REPLACE	_IO(F2FS_IOCTL_MAGIC, 25)
 #define F2FS_IOC_GET_DEV_ALIAS_FILE	_IOR(F2FS_IOCTL_MAGIC, 26, __u32)
+#define F2FS_IOC_DONATE_RANGE		_IOW(F2FS_IOCTL_MAGIC, 27,	\
+						struct f2fs_donate_range)
 
 /*
  * should be same as XFS_IOC_GOINGDOWN.
@@ -97,4 +99,9 @@ struct f2fs_comp_option {
 	__u8 log_cluster_size;
 };
 
+struct f2fs_donate_range {
+	__u64 start;
+	__u64 len;
+};
+
 #endif /* _UAPI_LINUX_F2FS_H */
-- 
2.47.1.688.g23fc6f90ad-goog


^ permalink raw reply related	[flat|nested] 12+ messages in thread

* Re: [PATCH 2/2 v2] f2fs: add a sysfs entry to request donate file-backed pages
  2025-01-13 18:39 ` [PATCH 2/2] f2fs: add a sysfs entry to request donate file-backed pages Jaegeuk Kim
  2025-01-14  7:34   ` [f2fs-dev] " Chao Yu
@ 2025-01-14 20:50   ` Jaegeuk Kim
  1 sibling, 0 replies; 12+ messages in thread
From: Jaegeuk Kim @ 2025-01-14 20:50 UTC (permalink / raw)
  To: linux-kernel, linux-f2fs-devel

1. ioctl(fd1, F2FS_IOC_DONATE_RANGE, {0,3});
2. ioctl(fd2, F2FS_IOC_DONATE_RANGE, {1,2});
3. ioctl(fd3, F2FS_IOC_DONATE_RANGE, {3,1});
4. echo 3 > /sys/fs/f2fs/blk/donate_caches

will reclaim 3 page cache ranges, registered by #1, #2, and #3.

Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---

 Change log from v1:
  - don't use sbi->ndirty_inode which is not defined by default

 Documentation/ABI/testing/sysfs-fs-f2fs |  7 +++++++
 fs/f2fs/f2fs.h                          |  4 ++++
 fs/f2fs/shrinker.c                      | 27 +++++++++++++++++++++++++
 fs/f2fs/sysfs.c                         |  8 ++++++++
 4 files changed, 46 insertions(+)

diff --git a/Documentation/ABI/testing/sysfs-fs-f2fs b/Documentation/ABI/testing/sysfs-fs-f2fs
index 3e1630c70d8a..6f9d8b8889fd 100644
--- a/Documentation/ABI/testing/sysfs-fs-f2fs
+++ b/Documentation/ABI/testing/sysfs-fs-f2fs
@@ -828,3 +828,10 @@ Date:		November 2024
 Contact:	"Chao Yu" <chao@kernel.org>
 Description:	It controls max read extent count for per-inode, the value of threshold
 		is 10240 by default.
+
+What:		/sys/fs/f2fs/<disk>/donate_caches
+Date:		December 2024
+Contact:	"Jaegeuk Kim" <jaegeuk@kernel.org>
+Description:	It reclaims the certian file-backed pages registered by
+		ioctl(F2FS_IOC_DONATE_RANGE).
+		For example, writing N tries to drop N address spaces in LRU.
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index 7ce3e3eab17a..6c434ae94cb1 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -1635,6 +1635,9 @@ struct f2fs_sb_info {
 	unsigned int warm_data_age_threshold;
 	unsigned int last_age_weight;
 
+	/* control donate caches */
+	unsigned int donate_caches;
+
 	/* basic filesystem units */
 	unsigned int log_sectors_per_block;	/* log2 sectors per block */
 	unsigned int log_blocksize;		/* log2 block size */
@@ -4256,6 +4259,7 @@ unsigned long f2fs_shrink_count(struct shrinker *shrink,
 			struct shrink_control *sc);
 unsigned long f2fs_shrink_scan(struct shrinker *shrink,
 			struct shrink_control *sc);
+void f2fs_donate_caches(struct f2fs_sb_info *sbi);
 void f2fs_join_shrinker(struct f2fs_sb_info *sbi);
 void f2fs_leave_shrinker(struct f2fs_sb_info *sbi);
 
diff --git a/fs/f2fs/shrinker.c b/fs/f2fs/shrinker.c
index 83d6fb97dcae..a3e2063392a7 100644
--- a/fs/f2fs/shrinker.c
+++ b/fs/f2fs/shrinker.c
@@ -130,6 +130,33 @@ unsigned long f2fs_shrink_scan(struct shrinker *shrink,
 	return freed;
 }
 
+void f2fs_donate_caches(struct f2fs_sb_info *sbi)
+{
+	struct inode *inode = NULL;
+	struct f2fs_inode_info *fi;
+	int nfiles = sbi->donate_caches;
+next:
+	spin_lock(&sbi->inode_lock[DONATE_INODE]);
+	if (list_empty(&sbi->inode_list[DONATE_INODE]) || !nfiles) {
+		spin_unlock(&sbi->inode_lock[DONATE_INODE]);
+		return;
+	}
+
+	fi = list_first_entry(&sbi->inode_list[DONATE_INODE],
+				struct f2fs_inode_info, gdonate_list);
+	list_move_tail(&fi->gdonate_list, &sbi->inode_list[DONATE_INODE]);
+	inode = igrab(&fi->vfs_inode);
+	spin_unlock(&sbi->inode_lock[DONATE_INODE]);
+
+	if (inode) {
+		invalidate_inode_pages2_range(inode->i_mapping,
+			fi->donate_start, fi->donate_end);
+		iput(inode);
+	}
+	if (nfiles--)
+		goto next;
+}
+
 void f2fs_join_shrinker(struct f2fs_sb_info *sbi)
 {
 	spin_lock(&f2fs_list_lock);
diff --git a/fs/f2fs/sysfs.c b/fs/f2fs/sysfs.c
index 6b99dc49f776..2a6b01257ad8 100644
--- a/fs/f2fs/sysfs.c
+++ b/fs/f2fs/sysfs.c
@@ -811,6 +811,12 @@ static ssize_t __sbi_store(struct f2fs_attr *a,
 		return count;
 	}
 
+	if (!strcmp(a->attr.name, "donate_caches")) {
+		sbi->donate_caches = t;
+		f2fs_donate_caches(sbi);
+		return count;
+	}
+
 	*ui = (unsigned int)t;
 
 	return count;
@@ -1030,6 +1036,7 @@ F2FS_SBI_GENERAL_RW_ATTR(max_victim_search);
 F2FS_SBI_GENERAL_RW_ATTR(migration_granularity);
 F2FS_SBI_GENERAL_RW_ATTR(migration_window_granularity);
 F2FS_SBI_GENERAL_RW_ATTR(dir_level);
+F2FS_SBI_GENERAL_RW_ATTR(donate_caches);
 #ifdef CONFIG_F2FS_IOSTAT
 F2FS_SBI_GENERAL_RW_ATTR(iostat_enable);
 F2FS_SBI_GENERAL_RW_ATTR(iostat_period_ms);
@@ -1178,6 +1185,7 @@ static struct attribute *f2fs_attrs[] = {
 	ATTR_LIST(migration_granularity),
 	ATTR_LIST(migration_window_granularity),
 	ATTR_LIST(dir_level),
+	ATTR_LIST(donate_caches),
 	ATTR_LIST(ram_thresh),
 	ATTR_LIST(ra_nid_pages),
 	ATTR_LIST(dirty_nats_ratio),
-- 
2.48.0.rc2.279.g1de40edade-goog


^ permalink raw reply related	[flat|nested] 12+ messages in thread

* Re: [PATCH 1/2] f2fs: register inodes which is able to donate pages
  2025-01-13 18:39 [PATCH 1/2] f2fs: register inodes which is able to donate pages Jaegeuk Kim
                   ` (2 preceding siblings ...)
  2025-01-14 17:20 ` [PATCH 1/2 v2] " Jaegeuk Kim
@ 2025-01-14 21:16 ` Eric Biggers
  3 siblings, 0 replies; 12+ messages in thread
From: Eric Biggers @ 2025-01-14 21:16 UTC (permalink / raw)
  To: Jaegeuk Kim; +Cc: linux-kernel, linux-f2fs-devel

On Mon, Jan 13, 2025 at 06:39:32PM +0000, Jaegeuk Kim via Linux-f2fs-devel wrote:
> This patch introduces an inode list to keep the page cache ranges that users
> can donate pages together.
> 
>  #define F2FS_IOC_DONATE_RANGE		_IOW(F2FS_IOCTL_MAGIC, 27,	\
> 						struct f2fs_donate_range)
>  struct f2fs_donate_range {
> 	__u64 start;
> 	__u64 len;
>  };
> 
> e.g., ioctl(F2FS_IOC_DONATE_RANGE, &range);
> 
> Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
> ---
>  fs/f2fs/debug.c           |  3 +++
>  fs/f2fs/f2fs.h            |  9 +++++++-
>  fs/f2fs/file.c            | 48 +++++++++++++++++++++++++++++++++++++++
>  fs/f2fs/inode.c           | 14 ++++++++++++
>  fs/f2fs/super.c           |  1 +
>  include/uapi/linux/f2fs.h |  7 ++++++
>  6 files changed, 81 insertions(+), 1 deletion(-)

Missing a rationale, documentation, tests, and fuzzing.

- Eric

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [f2fs-dev] [PATCH 1/2] f2fs: register inodes which is able to donate pages
  2025-01-14 17:15   ` Jaegeuk Kim
@ 2025-01-15  2:12     ` Chao Yu
  0 siblings, 0 replies; 12+ messages in thread
From: Chao Yu @ 2025-01-15  2:12 UTC (permalink / raw)
  To: Jaegeuk Kim; +Cc: chao, linux-kernel, linux-f2fs-devel

On 1/15/25 01:15, Jaegeuk Kim wrote:
> On 01/14, Chao Yu wrote:
>> On 1/14/25 02:39, Jaegeuk Kim via Linux-f2fs-devel wrote:
>>> This patch introduces an inode list to keep the page cache ranges that users
>>> can donate pages together.
>>>
>>>    #define F2FS_IOC_DONATE_RANGE		_IOW(F2FS_IOCTL_MAGIC, 27,	\
>>> 						struct f2fs_donate_range)
>>>    struct f2fs_donate_range {
>>> 	__u64 start;
>>> 	__u64 len;
>>>    };
>>>
>>> e.g., ioctl(F2FS_IOC_DONATE_RANGE, &range);
>>
>> I guess we need to add documentation for all ioctls including this one, maybe
>> later? :)
> 
> Yeah, later.
> 
>>
>>>
>>> Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
>>> ---
>>>    fs/f2fs/debug.c           |  3 +++
>>>    fs/f2fs/f2fs.h            |  9 +++++++-
>>>    fs/f2fs/file.c            | 48 +++++++++++++++++++++++++++++++++++++++
>>>    fs/f2fs/inode.c           | 14 ++++++++++++
>>>    fs/f2fs/super.c           |  1 +
>>>    include/uapi/linux/f2fs.h |  7 ++++++
>>>    6 files changed, 81 insertions(+), 1 deletion(-)
>>>
>>> diff --git a/fs/f2fs/debug.c b/fs/f2fs/debug.c
>>> index 468828288a4a..1b099c123670 100644
>>> --- a/fs/f2fs/debug.c
>>> +++ b/fs/f2fs/debug.c
>>> @@ -164,6 +164,7 @@ static void update_general_status(struct f2fs_sb_info *sbi)
>>>    	si->ndirty_imeta = get_pages(sbi, F2FS_DIRTY_IMETA);
>>>    	si->ndirty_dirs = sbi->ndirty_inode[DIR_INODE];
>>>    	si->ndirty_files = sbi->ndirty_inode[FILE_INODE];
>>> +	si->ndonate_files = sbi->ndirty_inode[DONATE_INODE];
>>>    	si->nquota_files = sbi->nquota_files;
>>>    	si->ndirty_all = sbi->ndirty_inode[DIRTY_META];
>>>    	si->aw_cnt = atomic_read(&sbi->atomic_files);
>>> @@ -501,6 +502,8 @@ static int stat_show(struct seq_file *s, void *v)
>>>    			   si->compr_inode, si->compr_blocks);
>>>    		seq_printf(s, "  - Swapfile Inode: %u\n",
>>>    			   si->swapfile_inode);
>>> +		seq_printf(s, "  - Donate Inode: %d\n",
>>
>> %u instead of %d due to si->ndonate_files is type of unsigned int.
>>
>>> +			   si->ndonate_files);
>>>    		seq_printf(s, "  - Orphan/Append/Update Inode: %u, %u, %u\n",
>>>    			   si->orphans, si->append, si->update);
>>>    		seq_printf(s, "\nMain area: %d segs, %d secs %d zones\n",
>>> diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
>>> index 4bfe162eefd3..7ce3e3eab17a 100644
>>> --- a/fs/f2fs/f2fs.h
>>> +++ b/fs/f2fs/f2fs.h
>>> @@ -850,6 +850,11 @@ struct f2fs_inode_info {
>>>    #endif
>>>    	struct list_head dirty_list;	/* dirty list for dirs and files */
>>>    	struct list_head gdirty_list;	/* linked in global dirty list */
>>> +
>>> +	/* linked in global inode list for cache donation */
>>> +	struct list_head gdonate_list;
>>> +	loff_t donate_start, donate_end; /* inclusive */
>>> +
>>>    	struct task_struct *atomic_write_task;	/* store atomic write task */
>>>    	struct extent_tree *extent_tree[NR_EXTENT_CACHES];
>>>    					/* cached extent_tree entry */
>>> @@ -1274,6 +1279,7 @@ enum inode_type {
>>>    	DIR_INODE,			/* for dirty dir inode */
>>>    	FILE_INODE,			/* for dirty regular/symlink inode */
>>>    	DIRTY_META,			/* for all dirtied inode metadata */
>>> +	DONATE_INODE,			/* for all inode to donate pages */
>>>    	NR_INODE_TYPE,
>>>    };
>>> @@ -3984,7 +3990,8 @@ struct f2fs_stat_info {
>>>    	unsigned long long allocated_data_blocks;
>>>    	int ndirty_node, ndirty_dent, ndirty_meta, ndirty_imeta;
>>>    	int ndirty_data, ndirty_qdata;
>>> -	unsigned int ndirty_dirs, ndirty_files, nquota_files, ndirty_all;
>>> +	unsigned int ndirty_dirs, ndirty_files, ndirty_all;
>>> +	unsigned int nquota_files, ndonate_files;
>>>    	int nats, dirty_nats, sits, dirty_sits;
>>>    	int free_nids, avail_nids, alloc_nids;
>>>    	int total_count, utilization;
>>> diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
>>> index 9980d17ef9f5..d6dea6258c2d 100644
>>> --- a/fs/f2fs/file.c
>>> +++ b/fs/f2fs/file.c
>>> @@ -2493,6 +2493,51 @@ static int f2fs_ioc_shutdown(struct file *filp, unsigned long arg)
>>>    	return ret;
>>>    }
>>> +static int f2fs_ioc_donate_range(struct file *filp, unsigned long arg)
>>> +{
>>> +	struct inode *inode = file_inode(filp);
>>> +	struct mnt_idmap *idmap = file_mnt_idmap(filp);
>>> +	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
>>> +	struct f2fs_donate_range range;
>>> +	int ret;
>>> +
>>> +	if (copy_from_user(&range, (struct f2fs_donate_range __user *)arg,
>>> +							sizeof(range)))
>>> +		return -EFAULT;
>>
>> What about doing sanity check on donate range here? in order to avoid overflow
>> during fi->donate_end calculation.
>>
>> F2FS_I(inode)->donate_end = range.start + range.len - 1;
>>
>>> +
>>> +	if (!inode_owner_or_capable(idmap, inode))
>>> +		return -EACCES;
>>> +
>>> +	if (!S_ISREG(inode->i_mode))
>>> +		return -EINVAL;
>>> +
>>> +	ret = mnt_want_write_file(filp);
>>> +	if (ret)
>>> +		return ret;
>>> +
>>> +	inode_lock(inode);
>>> +
>>> +	if (f2fs_is_atomic_file(inode))
>>> +		goto out;
>>> +
>>> +	spin_lock(&sbi->inode_lock[DONATE_INODE]);
>>> +	if (list_empty(&F2FS_I(inode)->gdonate_list)) {
>>> +		list_add_tail(&F2FS_I(inode)->gdonate_list,
>>> +				&sbi->inode_list[DONATE_INODE]);
>>> +		stat_inc_dirty_inode(sbi, DONATE_INODE);
>>> +	} else {
>>> +		list_move_tail(&F2FS_I(inode)->gdonate_list,
>>> +				&sbi->inode_list[DONATE_INODE]);
>>> +	}
>>> +	F2FS_I(inode)->donate_start = range.start;
>>> +	F2FS_I(inode)->donate_end = range.start + range.len - 1;
>>> +	spin_unlock(&sbi->inode_lock[DONATE_INODE]);
>>> +out:
>>> +	inode_unlock(inode);
>>> +	mnt_drop_write_file(filp);
>>> +	return ret;
>>> +}
>>> +
>>>    static int f2fs_ioc_fitrim(struct file *filp, unsigned long arg)
>>>    {
>>>    	struct inode *inode = file_inode(filp);
>>> @@ -4522,6 +4567,8 @@ static long __f2fs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
>>>    		return -EOPNOTSUPP;
>>>    	case F2FS_IOC_SHUTDOWN:
>>>    		return f2fs_ioc_shutdown(filp, arg);
>>> +	case F2FS_IOC_DONATE_RANGE:
>>> +		return f2fs_ioc_donate_range(filp, arg);
>>>    	case FITRIM:
>>>    		return f2fs_ioc_fitrim(filp, arg);
>>>    	case FS_IOC_SET_ENCRYPTION_POLICY:
>>> @@ -5273,6 +5320,7 @@ long f2fs_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
>>>    	case F2FS_IOC_RELEASE_VOLATILE_WRITE:
>>>    	case F2FS_IOC_ABORT_ATOMIC_WRITE:
>>>    	case F2FS_IOC_SHUTDOWN:
>>> +	case F2FS_IOC_DONATE_RANGE:
>>>    	case FITRIM:
>>>    	case FS_IOC_SET_ENCRYPTION_POLICY:
>>>    	case FS_IOC_GET_ENCRYPTION_PWSALT:
>>> diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c
>>> index 7de33da8b3ea..e38dc5fe2f2e 100644
>>> --- a/fs/f2fs/inode.c
>>> +++ b/fs/f2fs/inode.c
>>> @@ -804,6 +804,19 @@ int f2fs_write_inode(struct inode *inode, struct writeback_control *wbc)
>>>    	return 0;
>>>    }
>>> +static void f2fs_remove_donate_inode(struct inode *inode)
>>> +{
>>> +	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
>>> +
>>> +	if (list_empty(&F2FS_I(inode)->gdonate_list))
>>
>> It will be more safe to access gdonate_list w/ inode_lock[DONATE_INODE]?
> 
> It's unnecessary as this is called from evict_inode.

I just concerned about the case fi->gdonate_list's prev and next pointer can
be updated in race condition due to insertion or deletion of its adjacent entry.

No risk now as I checked. :)

Thanks,

> 
>>
>> Thanks,
>>
>>> +		return;
>>> +
>>> +	spin_lock(&sbi->inode_lock[DONATE_INODE]);
>>> +	list_del_init(&F2FS_I(inode)->gdonate_list);
>>> +	stat_dec_dirty_inode(sbi, DONATE_INODE);
>>> +	spin_unlock(&sbi->inode_lock[DONATE_INODE]);
>>> +}
>>> +
>>>    /*
>>>     * Called at the last iput() if i_nlink is zero
>>>     */
>>> @@ -838,6 +851,7 @@ void f2fs_evict_inode(struct inode *inode)
>>>    	f2fs_bug_on(sbi, get_dirty_pages(inode));
>>>    	f2fs_remove_dirty_inode(inode);
>>> +	f2fs_remove_donate_inode(inode);
>>>    	if (!IS_DEVICE_ALIASING(inode))
>>>    		f2fs_destroy_extent_tree(inode);
>>> diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
>>> index fc7d463dee15..ef639a6d82e5 100644
>>> --- a/fs/f2fs/super.c
>>> +++ b/fs/f2fs/super.c
>>> @@ -1441,6 +1441,7 @@ static struct inode *f2fs_alloc_inode(struct super_block *sb)
>>>    	spin_lock_init(&fi->i_size_lock);
>>>    	INIT_LIST_HEAD(&fi->dirty_list);
>>>    	INIT_LIST_HEAD(&fi->gdirty_list);
>>> +	INIT_LIST_HEAD(&fi->gdonate_list);
>>>    	init_f2fs_rwsem(&fi->i_gc_rwsem[READ]);
>>>    	init_f2fs_rwsem(&fi->i_gc_rwsem[WRITE]);
>>>    	init_f2fs_rwsem(&fi->i_xattr_sem);
>>> diff --git a/include/uapi/linux/f2fs.h b/include/uapi/linux/f2fs.h
>>> index f7aaf8d23e20..cd38a7c166e6 100644
>>> --- a/include/uapi/linux/f2fs.h
>>> +++ b/include/uapi/linux/f2fs.h
>>> @@ -44,6 +44,8 @@
>>>    #define F2FS_IOC_COMPRESS_FILE		_IO(F2FS_IOCTL_MAGIC, 24)
>>>    #define F2FS_IOC_START_ATOMIC_REPLACE	_IO(F2FS_IOCTL_MAGIC, 25)
>>>    #define F2FS_IOC_GET_DEV_ALIAS_FILE	_IOR(F2FS_IOCTL_MAGIC, 26, __u32)
>>> +#define F2FS_IOC_DONATE_RANGE		_IOW(F2FS_IOCTL_MAGIC, 27,	\
>>> +						struct f2fs_donate_range)
>>>    /*
>>>     * should be same as XFS_IOC_GOINGDOWN.
>>> @@ -97,4 +99,9 @@ struct f2fs_comp_option {
>>>    	__u8 log_cluster_size;
>>>    };
>>> +struct f2fs_donate_range {
>>> +	__u64 start;
>>> +	__u64 len;
>>> +};
>>> +
>>>    #endif /* _UAPI_LINUX_F2FS_H */


^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [f2fs-dev] [PATCH 2/2] f2fs: add a sysfs entry to request donate file-backed pages
  2025-01-14 17:18     ` Jaegeuk Kim
@ 2025-01-15  2:17       ` Chao Yu
  0 siblings, 0 replies; 12+ messages in thread
From: Chao Yu @ 2025-01-15  2:17 UTC (permalink / raw)
  To: Jaegeuk Kim; +Cc: chao, linux-kernel, linux-f2fs-devel

On 1/15/25 01:18, Jaegeuk Kim wrote:
> On 01/14, Chao Yu wrote:
>> On 1/14/25 02:39, Jaegeuk Kim via Linux-f2fs-devel wrote:
>>> 1. ioctl(fd1, F2FS_IOC_DONATE_RANGE, {0,3});
>>> 2. ioctl(fd2, F2FS_IOC_DONATE_RANGE, {1,2});
>>> 3. ioctl(fd3, F2FS_IOC_DONATE_RANGE, {3,1});
>>> 4. echo 3 > /sys/fs/f2fs/blk/donate_caches
>>>
>>> will reclaim 3 page cache ranges, registered by #1, #2, and #3.
>>>
>>> Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
>>> ---
>>>    Documentation/ABI/testing/sysfs-fs-f2fs |  7 +++++++
>>>    fs/f2fs/f2fs.h                          |  4 ++++
>>>    fs/f2fs/shrinker.c                      | 27 +++++++++++++++++++++++++
>>>    fs/f2fs/sysfs.c                         |  8 ++++++++
>>>    4 files changed, 46 insertions(+)
>>>
>>> diff --git a/Documentation/ABI/testing/sysfs-fs-f2fs b/Documentation/ABI/testing/sysfs-fs-f2fs
>>> index 3e1630c70d8a..6f9d8b8889fd 100644
>>> --- a/Documentation/ABI/testing/sysfs-fs-f2fs
>>> +++ b/Documentation/ABI/testing/sysfs-fs-f2fs
>>> @@ -828,3 +828,10 @@ Date:		November 2024
>>>    Contact:	"Chao Yu" <chao@kernel.org>
>>>    Description:	It controls max read extent count for per-inode, the value of threshold
>>>    		is 10240 by default.
>>> +
>>> +What:		/sys/fs/f2fs/<disk>/donate_caches
>>> +Date:		December 2024
>>> +Contact:	"Jaegeuk Kim" <jaegeuk@kernel.org>
>>> +Description:	It reclaims the certian file-backed pages registered by
>>> +		ioctl(F2FS_IOC_DONATE_RANGE).
>>> +		For example, writing N tries to drop N address spaces in LRU.
>>> diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
>>> index 7ce3e3eab17a..6c434ae94cb1 100644
>>> --- a/fs/f2fs/f2fs.h
>>> +++ b/fs/f2fs/f2fs.h
>>> @@ -1635,6 +1635,9 @@ struct f2fs_sb_info {
>>>    	unsigned int warm_data_age_threshold;
>>>    	unsigned int last_age_weight;
>>> +	/* control donate caches */
>>> +	unsigned int donate_caches;
>>> +
>>>    	/* basic filesystem units */
>>>    	unsigned int log_sectors_per_block;	/* log2 sectors per block */
>>>    	unsigned int log_blocksize;		/* log2 block size */
>>> @@ -4256,6 +4259,7 @@ unsigned long f2fs_shrink_count(struct shrinker *shrink,
>>>    			struct shrink_control *sc);
>>>    unsigned long f2fs_shrink_scan(struct shrinker *shrink,
>>>    			struct shrink_control *sc);
>>> +void f2fs_donate_caches(struct f2fs_sb_info *sbi);
>>>    void f2fs_join_shrinker(struct f2fs_sb_info *sbi);
>>>    void f2fs_leave_shrinker(struct f2fs_sb_info *sbi);
>>> diff --git a/fs/f2fs/shrinker.c b/fs/f2fs/shrinker.c
>>> index 83d6fb97dcae..a3e2063392a7 100644
>>> --- a/fs/f2fs/shrinker.c
>>> +++ b/fs/f2fs/shrinker.c
>>> @@ -130,6 +130,33 @@ unsigned long f2fs_shrink_scan(struct shrinker *shrink,
>>>    	return freed;
>>>    }
>>> +void f2fs_donate_caches(struct f2fs_sb_info *sbi)
>>> +{
>>> +	struct inode *inode = NULL;
>>> +	struct f2fs_inode_info *fi;
>>> +	int nfiles = sbi->donate_caches;
>>> +next:
>>> +	spin_lock(&sbi->inode_lock[DONATE_INODE]);
>>> +	if (list_empty(&sbi->inode_list[DONATE_INODE]) || !nfiles) {
>>> +		spin_unlock(&sbi->inode_lock[DONATE_INODE]);
>>> +		return;
>>> +	}
>>> +
>>> +	fi = list_first_entry(&sbi->inode_list[DONATE_INODE],
>>> +				struct f2fs_inode_info, gdonate_list);
>>> +	list_move_tail(&fi->gdonate_list, &sbi->inode_list[DONATE_INODE]);
>>
>> Not needed to drop it from the global list, right?
> 
> Yea, there're two paths to drop: 1) waiting for evict_inode, 2) setting a new
> range having len=0.

Second way just relocate entry to list tail, not drop it from list?

Thanks,

> 
>>
>> Thanks,
>>
>>> +	inode = igrab(&fi->vfs_inode);
>>> +	spin_unlock(&sbi->inode_lock[DONATE_INODE]);
>>> +
>>> +	if (inode) {
>>> +		invalidate_inode_pages2_range(inode->i_mapping,
>>> +			fi->donate_start, fi->donate_end);
>>> +		iput(inode);
>>> +	}
>>> +	if (nfiles--)
>>> +		goto next;
>>> +}
>>> +
>>>    void f2fs_join_shrinker(struct f2fs_sb_info *sbi)
>>>    {
>>>    	spin_lock(&f2fs_list_lock);
>>> diff --git a/fs/f2fs/sysfs.c b/fs/f2fs/sysfs.c
>>> index 6b99dc49f776..7570580ec3c0 100644
>>> --- a/fs/f2fs/sysfs.c
>>> +++ b/fs/f2fs/sysfs.c
>>> @@ -811,6 +811,12 @@ static ssize_t __sbi_store(struct f2fs_attr *a,
>>>    		return count;
>>>    	}
>>> +	if (!strcmp(a->attr.name, "donate_caches")) {
>>> +		sbi->donate_caches = min(t, sbi->ndirty_inode[DONATE_INODE]);
>>> +		f2fs_donate_caches(sbi);
>>> +		return count;
>>> +	}
>>> +
>>>    	*ui = (unsigned int)t;
>>>    	return count;
>>> @@ -1030,6 +1036,7 @@ F2FS_SBI_GENERAL_RW_ATTR(max_victim_search);
>>>    F2FS_SBI_GENERAL_RW_ATTR(migration_granularity);
>>>    F2FS_SBI_GENERAL_RW_ATTR(migration_window_granularity);
>>>    F2FS_SBI_GENERAL_RW_ATTR(dir_level);
>>> +F2FS_SBI_GENERAL_RW_ATTR(donate_caches);
>>>    #ifdef CONFIG_F2FS_IOSTAT
>>>    F2FS_SBI_GENERAL_RW_ATTR(iostat_enable);
>>>    F2FS_SBI_GENERAL_RW_ATTR(iostat_period_ms);
>>> @@ -1178,6 +1185,7 @@ static struct attribute *f2fs_attrs[] = {
>>>    	ATTR_LIST(migration_granularity),
>>>    	ATTR_LIST(migration_window_granularity),
>>>    	ATTR_LIST(dir_level),
>>> +	ATTR_LIST(donate_caches),
>>>    	ATTR_LIST(ram_thresh),
>>>    	ATTR_LIST(ra_nid_pages),
>>>    	ATTR_LIST(dirty_nats_ratio),


^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH 2/2 v2] f2fs: add a sysfs entry to request donate file-backed pages
  2025-01-31 22:27 ` [PATCH 2/2] f2fs: add a sysfs entry to request " Jaegeuk Kim
@ 2025-02-07 16:28   ` Jaegeuk Kim
  0 siblings, 0 replies; 12+ messages in thread
From: Jaegeuk Kim @ 2025-02-07 16:28 UTC (permalink / raw)
  To: linux-kernel, linux-f2fs-devel

1. ioctl(fd1, F2FS_IOC_DONATE_RANGE, {0,3});
2. ioctl(fd2, F2FS_IOC_DONATE_RANGE, {1,2});
3. ioctl(fd3, F2FS_IOC_DONATE_RANGE, {3,1});
4. echo 1024 > /sys/fs/f2fs/tuning/reclaim_caches_kb

This gives a way to reclaim file-backed pages by iterating all f2fs mounts until
reclaiming 1MB page cache ranges, registered by #1, #2, and #3.

5. cat /sys/fs/f2fs/tuning/reclaim_caches_kb
-> gives total number of registered file ranges.

Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---

 From v1:
   - Minor revision to clean up the flow.

 Documentation/ABI/testing/sysfs-fs-f2fs |  7 ++
 fs/f2fs/f2fs.h                          |  2 +
 fs/f2fs/shrinker.c                      | 90 +++++++++++++++++++++++++
 fs/f2fs/sysfs.c                         | 63 +++++++++++++++++
 4 files changed, 162 insertions(+)

diff --git a/Documentation/ABI/testing/sysfs-fs-f2fs b/Documentation/ABI/testing/sysfs-fs-f2fs
index 3e1630c70d8a..81deae2af84d 100644
--- a/Documentation/ABI/testing/sysfs-fs-f2fs
+++ b/Documentation/ABI/testing/sysfs-fs-f2fs
@@ -828,3 +828,10 @@ Date:		November 2024
 Contact:	"Chao Yu" <chao@kernel.org>
 Description:	It controls max read extent count for per-inode, the value of threshold
 		is 10240 by default.
+
+What:		/sys/fs/f2fs/tuning/reclaim_caches_kb
+Date:		February 2025
+Contact:	"Jaegeuk Kim" <jaegeuk@kernel.org>
+Description:	It reclaims the given KBs of file-backed pages registered by
+		ioctl(F2FS_IOC_DONATE_RANGE).
+		For example, writing N tries to drop N KBs spaces in LRU.
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index 805585a7d2b6..bd0d8138b71d 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -4241,6 +4241,8 @@ unsigned long f2fs_shrink_count(struct shrinker *shrink,
 			struct shrink_control *sc);
 unsigned long f2fs_shrink_scan(struct shrinker *shrink,
 			struct shrink_control *sc);
+unsigned int f2fs_donate_files(void);
+void f2fs_reclaim_caches(unsigned int reclaim_caches_kb);
 void f2fs_join_shrinker(struct f2fs_sb_info *sbi);
 void f2fs_leave_shrinker(struct f2fs_sb_info *sbi);
 
diff --git a/fs/f2fs/shrinker.c b/fs/f2fs/shrinker.c
index 83d6fb97dcae..45efff635d8e 100644
--- a/fs/f2fs/shrinker.c
+++ b/fs/f2fs/shrinker.c
@@ -130,6 +130,96 @@ unsigned long f2fs_shrink_scan(struct shrinker *shrink,
 	return freed;
 }
 
+unsigned int f2fs_donate_files(void)
+{
+	struct f2fs_sb_info *sbi;
+	struct list_head *p;
+	unsigned int donate_files = 0;
+
+	spin_lock(&f2fs_list_lock);
+	p = f2fs_list.next;
+	while (p != &f2fs_list) {
+		sbi = list_entry(p, struct f2fs_sb_info, s_list);
+
+		/* stop f2fs_put_super */
+		if (!mutex_trylock(&sbi->umount_mutex)) {
+			p = p->next;
+			continue;
+		}
+		spin_unlock(&f2fs_list_lock);
+
+		donate_files += sbi->donate_files;
+
+		spin_lock(&f2fs_list_lock);
+		p = p->next;
+		mutex_unlock(&sbi->umount_mutex);
+	}
+	spin_unlock(&f2fs_list_lock);
+
+	return donate_files;
+}
+
+static unsigned int do_reclaim_caches(struct f2fs_sb_info *sbi,
+				unsigned int reclaim_caches_kb)
+{
+	struct inode *inode;
+	struct f2fs_inode_info *fi;
+	unsigned int nfiles = sbi->donate_files;
+	pgoff_t npages = reclaim_caches_kb >> (PAGE_SHIFT - 10);
+
+	while (npages && nfiles--) {
+		pgoff_t len;
+
+		spin_lock(&sbi->inode_lock[DONATE_INODE]);
+		if (list_empty(&sbi->inode_list[DONATE_INODE])) {
+			spin_unlock(&sbi->inode_lock[DONATE_INODE]);
+			break;
+		}
+		fi = list_first_entry(&sbi->inode_list[DONATE_INODE],
+					struct f2fs_inode_info, gdonate_list);
+		list_move_tail(&fi->gdonate_list, &sbi->inode_list[DONATE_INODE]);
+		inode = igrab(&fi->vfs_inode);
+		spin_unlock(&sbi->inode_lock[DONATE_INODE]);
+
+		if (!inode)
+			continue;
+
+		len = fi->donate_end - fi->donate_start + 1;
+		npages = npages < len ? 0 : npages - len;
+		invalidate_inode_pages2_range(inode->i_mapping,
+					fi->donate_start, fi->donate_end);
+		iput(inode);
+		cond_resched();
+	}
+	return npages << (PAGE_SHIFT - 10);
+}
+
+void f2fs_reclaim_caches(unsigned int reclaim_caches_kb)
+{
+	struct f2fs_sb_info *sbi;
+	struct list_head *p;
+
+	spin_lock(&f2fs_list_lock);
+	p = f2fs_list.next;
+	while (p != &f2fs_list && reclaim_caches_kb) {
+		sbi = list_entry(p, struct f2fs_sb_info, s_list);
+
+		/* stop f2fs_put_super */
+		if (!mutex_trylock(&sbi->umount_mutex)) {
+			p = p->next;
+			continue;
+		}
+		spin_unlock(&f2fs_list_lock);
+
+		reclaim_caches_kb = do_reclaim_caches(sbi, reclaim_caches_kb);
+
+		spin_lock(&f2fs_list_lock);
+		p = p->next;
+		mutex_unlock(&sbi->umount_mutex);
+	}
+	spin_unlock(&f2fs_list_lock);
+}
+
 void f2fs_join_shrinker(struct f2fs_sb_info *sbi)
 {
 	spin_lock(&f2fs_list_lock);
diff --git a/fs/f2fs/sysfs.c b/fs/f2fs/sysfs.c
index b419555e1ea7..b27336acf519 100644
--- a/fs/f2fs/sysfs.c
+++ b/fs/f2fs/sysfs.c
@@ -916,6 +916,39 @@ static struct f2fs_base_attr f2fs_base_attr_##_name = {		\
 	.show	= f2fs_feature_show,				\
 }
 
+static ssize_t f2fs_tune_show(struct f2fs_base_attr *a, char *buf)
+{
+	unsigned int res = 0;
+
+	if (!strcmp(a->attr.name, "reclaim_caches_kb"))
+		res = f2fs_donate_files();
+
+	return sysfs_emit(buf, "%u\n", res);
+}
+
+static ssize_t f2fs_tune_store(struct f2fs_base_attr *a,
+			const char *buf, size_t count)
+{
+	unsigned long t;
+	int ret;
+
+	ret = kstrtoul(skip_spaces(buf), 0, &t);
+	if (ret)
+		return ret;
+
+	if (!strcmp(a->attr.name, "reclaim_caches_kb"))
+		f2fs_reclaim_caches(t);
+
+	return count;
+}
+
+#define F2FS_TUNE_RW_ATTR(_name)				\
+static struct f2fs_base_attr f2fs_base_attr_##_name = {		\
+	.attr = {.name = __stringify(_name), .mode = 0644 },	\
+	.show	= f2fs_tune_show,				\
+	.store	= f2fs_tune_store,				\
+}
+
 static ssize_t f2fs_sb_feature_show(struct f2fs_attr *a,
 		struct f2fs_sb_info *sbi, char *buf)
 {
@@ -1368,6 +1401,14 @@ static struct attribute *f2fs_sb_feat_attrs[] = {
 };
 ATTRIBUTE_GROUPS(f2fs_sb_feat);
 
+F2FS_TUNE_RW_ATTR(reclaim_caches_kb);
+
+static struct attribute *f2fs_tune_attrs[] = {
+	BASE_ATTR_LIST(reclaim_caches_kb),
+	NULL,
+};
+ATTRIBUTE_GROUPS(f2fs_tune);
+
 static const struct sysfs_ops f2fs_attr_ops = {
 	.show	= f2fs_attr_show,
 	.store	= f2fs_attr_store,
@@ -1401,6 +1442,20 @@ static struct kobject f2fs_feat = {
 	.kset	= &f2fs_kset,
 };
 
+static const struct sysfs_ops f2fs_tune_attr_ops = {
+	.show	= f2fs_base_attr_show,
+	.store	= f2fs_base_attr_store,
+};
+
+static const struct kobj_type f2fs_tune_ktype = {
+	.default_groups = f2fs_tune_groups,
+	.sysfs_ops	= &f2fs_tune_attr_ops,
+};
+
+static struct kobject f2fs_tune = {
+	.kset	= &f2fs_kset,
+};
+
 static ssize_t f2fs_stat_attr_show(struct kobject *kobj,
 				struct attribute *attr, char *buf)
 {
@@ -1637,6 +1692,11 @@ int __init f2fs_init_sysfs(void)
 	if (ret)
 		goto put_kobject;
 
+	ret = kobject_init_and_add(&f2fs_tune, &f2fs_tune_ktype,
+				   NULL, "tuning");
+	if (ret)
+		goto put_kobject;
+
 	f2fs_proc_root = proc_mkdir("fs/f2fs", NULL);
 	if (!f2fs_proc_root) {
 		ret = -ENOMEM;
@@ -1644,7 +1704,9 @@ int __init f2fs_init_sysfs(void)
 	}
 
 	return 0;
+
 put_kobject:
+	kobject_put(&f2fs_tune);
 	kobject_put(&f2fs_feat);
 	kset_unregister(&f2fs_kset);
 	return ret;
@@ -1652,6 +1714,7 @@ int __init f2fs_init_sysfs(void)
 
 void f2fs_exit_sysfs(void)
 {
+	kobject_put(&f2fs_tune);
 	kobject_put(&f2fs_feat);
 	kset_unregister(&f2fs_kset);
 	remove_proc_entry("fs/f2fs", NULL);
-- 
2.48.1.502.g6dc24dfdaf-goog


^ permalink raw reply related	[flat|nested] 12+ messages in thread

end of thread, other threads:[~2025-02-07 16:28 UTC | newest]

Thread overview: 12+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2025-01-13 18:39 [PATCH 1/2] f2fs: register inodes which is able to donate pages Jaegeuk Kim
2025-01-13 18:39 ` [PATCH 2/2] f2fs: add a sysfs entry to request donate file-backed pages Jaegeuk Kim
2025-01-14  7:34   ` [f2fs-dev] " Chao Yu
2025-01-14 17:18     ` Jaegeuk Kim
2025-01-15  2:17       ` Chao Yu
2025-01-14 20:50   ` [PATCH 2/2 v2] " Jaegeuk Kim
2025-01-14  6:34 ` [f2fs-dev] [PATCH 1/2] f2fs: register inodes which is able to donate pages Chao Yu
2025-01-14 17:15   ` Jaegeuk Kim
2025-01-15  2:12     ` Chao Yu
2025-01-14 17:20 ` [PATCH 1/2 v2] " Jaegeuk Kim
2025-01-14 21:16 ` [PATCH 1/2] " Eric Biggers
  -- strict thread matches above, loose matches on Subject: below --
2025-01-31 22:27 [PATCH 0/2 v8] add ioctl/sysfs to donate file-backed pages Jaegeuk Kim
2025-01-31 22:27 ` [PATCH 2/2] f2fs: add a sysfs entry to request " Jaegeuk Kim
2025-02-07 16:28   ` [PATCH 2/2 v2] " Jaegeuk Kim

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).