* [PATCH 2/2] f2fs: add a sysfs entry to request donate file-backed pages
2025-01-13 18:39 [PATCH 1/2] f2fs: register inodes which is able to donate pages Jaegeuk Kim
@ 2025-01-13 18:39 ` Jaegeuk Kim
2025-01-14 7:34 ` [f2fs-dev] " Chao Yu
2025-01-14 20:50 ` [PATCH 2/2 v2] " Jaegeuk Kim
2025-01-14 6:34 ` [f2fs-dev] [PATCH 1/2] f2fs: register inodes which is able to donate pages Chao Yu
` (2 subsequent siblings)
3 siblings, 2 replies; 12+ messages in thread
From: Jaegeuk Kim @ 2025-01-13 18:39 UTC (permalink / raw)
To: linux-kernel, linux-f2fs-devel; +Cc: Jaegeuk Kim
1. ioctl(fd1, F2FS_IOC_DONATE_RANGE, {0,3});
2. ioctl(fd2, F2FS_IOC_DONATE_RANGE, {1,2});
3. ioctl(fd3, F2FS_IOC_DONATE_RANGE, {3,1});
4. echo 3 > /sys/fs/f2fs/blk/donate_caches
will reclaim 3 page cache ranges, registered by #1, #2, and #3.
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
Documentation/ABI/testing/sysfs-fs-f2fs | 7 +++++++
fs/f2fs/f2fs.h | 4 ++++
fs/f2fs/shrinker.c | 27 +++++++++++++++++++++++++
fs/f2fs/sysfs.c | 8 ++++++++
4 files changed, 46 insertions(+)
diff --git a/Documentation/ABI/testing/sysfs-fs-f2fs b/Documentation/ABI/testing/sysfs-fs-f2fs
index 3e1630c70d8a..6f9d8b8889fd 100644
--- a/Documentation/ABI/testing/sysfs-fs-f2fs
+++ b/Documentation/ABI/testing/sysfs-fs-f2fs
@@ -828,3 +828,10 @@ Date: November 2024
Contact: "Chao Yu" <chao@kernel.org>
Description: It controls max read extent count for per-inode, the value of threshold
is 10240 by default.
+
+What: /sys/fs/f2fs/<disk>/donate_caches
+Date: December 2024
+Contact: "Jaegeuk Kim" <jaegeuk@kernel.org>
+Description: It reclaims the certian file-backed pages registered by
+ ioctl(F2FS_IOC_DONATE_RANGE).
+ For example, writing N tries to drop N address spaces in LRU.
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index 7ce3e3eab17a..6c434ae94cb1 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -1635,6 +1635,9 @@ struct f2fs_sb_info {
unsigned int warm_data_age_threshold;
unsigned int last_age_weight;
+ /* control donate caches */
+ unsigned int donate_caches;
+
/* basic filesystem units */
unsigned int log_sectors_per_block; /* log2 sectors per block */
unsigned int log_blocksize; /* log2 block size */
@@ -4256,6 +4259,7 @@ unsigned long f2fs_shrink_count(struct shrinker *shrink,
struct shrink_control *sc);
unsigned long f2fs_shrink_scan(struct shrinker *shrink,
struct shrink_control *sc);
+void f2fs_donate_caches(struct f2fs_sb_info *sbi);
void f2fs_join_shrinker(struct f2fs_sb_info *sbi);
void f2fs_leave_shrinker(struct f2fs_sb_info *sbi);
diff --git a/fs/f2fs/shrinker.c b/fs/f2fs/shrinker.c
index 83d6fb97dcae..a3e2063392a7 100644
--- a/fs/f2fs/shrinker.c
+++ b/fs/f2fs/shrinker.c
@@ -130,6 +130,33 @@ unsigned long f2fs_shrink_scan(struct shrinker *shrink,
return freed;
}
+void f2fs_donate_caches(struct f2fs_sb_info *sbi)
+{
+ struct inode *inode = NULL;
+ struct f2fs_inode_info *fi;
+ int nfiles = sbi->donate_caches;
+next:
+ spin_lock(&sbi->inode_lock[DONATE_INODE]);
+ if (list_empty(&sbi->inode_list[DONATE_INODE]) || !nfiles) {
+ spin_unlock(&sbi->inode_lock[DONATE_INODE]);
+ return;
+ }
+
+ fi = list_first_entry(&sbi->inode_list[DONATE_INODE],
+ struct f2fs_inode_info, gdonate_list);
+ list_move_tail(&fi->gdonate_list, &sbi->inode_list[DONATE_INODE]);
+ inode = igrab(&fi->vfs_inode);
+ spin_unlock(&sbi->inode_lock[DONATE_INODE]);
+
+ if (inode) {
+ invalidate_inode_pages2_range(inode->i_mapping,
+ fi->donate_start, fi->donate_end);
+ iput(inode);
+ }
+ if (nfiles--)
+ goto next;
+}
+
void f2fs_join_shrinker(struct f2fs_sb_info *sbi)
{
spin_lock(&f2fs_list_lock);
diff --git a/fs/f2fs/sysfs.c b/fs/f2fs/sysfs.c
index 6b99dc49f776..7570580ec3c0 100644
--- a/fs/f2fs/sysfs.c
+++ b/fs/f2fs/sysfs.c
@@ -811,6 +811,12 @@ static ssize_t __sbi_store(struct f2fs_attr *a,
return count;
}
+ if (!strcmp(a->attr.name, "donate_caches")) {
+ sbi->donate_caches = min(t, sbi->ndirty_inode[DONATE_INODE]);
+ f2fs_donate_caches(sbi);
+ return count;
+ }
+
*ui = (unsigned int)t;
return count;
@@ -1030,6 +1036,7 @@ F2FS_SBI_GENERAL_RW_ATTR(max_victim_search);
F2FS_SBI_GENERAL_RW_ATTR(migration_granularity);
F2FS_SBI_GENERAL_RW_ATTR(migration_window_granularity);
F2FS_SBI_GENERAL_RW_ATTR(dir_level);
+F2FS_SBI_GENERAL_RW_ATTR(donate_caches);
#ifdef CONFIG_F2FS_IOSTAT
F2FS_SBI_GENERAL_RW_ATTR(iostat_enable);
F2FS_SBI_GENERAL_RW_ATTR(iostat_period_ms);
@@ -1178,6 +1185,7 @@ static struct attribute *f2fs_attrs[] = {
ATTR_LIST(migration_granularity),
ATTR_LIST(migration_window_granularity),
ATTR_LIST(dir_level),
+ ATTR_LIST(donate_caches),
ATTR_LIST(ram_thresh),
ATTR_LIST(ra_nid_pages),
ATTR_LIST(dirty_nats_ratio),
--
2.47.1.688.g23fc6f90ad-goog
^ permalink raw reply related [flat|nested] 12+ messages in thread* Re: [f2fs-dev] [PATCH 2/2] f2fs: add a sysfs entry to request donate file-backed pages
2025-01-13 18:39 ` [PATCH 2/2] f2fs: add a sysfs entry to request donate file-backed pages Jaegeuk Kim
@ 2025-01-14 7:34 ` Chao Yu
2025-01-14 17:18 ` Jaegeuk Kim
2025-01-14 20:50 ` [PATCH 2/2 v2] " Jaegeuk Kim
1 sibling, 1 reply; 12+ messages in thread
From: Chao Yu @ 2025-01-14 7:34 UTC (permalink / raw)
To: Jaegeuk Kim, linux-kernel, linux-f2fs-devel; +Cc: chao
On 1/14/25 02:39, Jaegeuk Kim via Linux-f2fs-devel wrote:
> 1. ioctl(fd1, F2FS_IOC_DONATE_RANGE, {0,3});
> 2. ioctl(fd2, F2FS_IOC_DONATE_RANGE, {1,2});
> 3. ioctl(fd3, F2FS_IOC_DONATE_RANGE, {3,1});
> 4. echo 3 > /sys/fs/f2fs/blk/donate_caches
>
> will reclaim 3 page cache ranges, registered by #1, #2, and #3.
>
> Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
> ---
> Documentation/ABI/testing/sysfs-fs-f2fs | 7 +++++++
> fs/f2fs/f2fs.h | 4 ++++
> fs/f2fs/shrinker.c | 27 +++++++++++++++++++++++++
> fs/f2fs/sysfs.c | 8 ++++++++
> 4 files changed, 46 insertions(+)
>
> diff --git a/Documentation/ABI/testing/sysfs-fs-f2fs b/Documentation/ABI/testing/sysfs-fs-f2fs
> index 3e1630c70d8a..6f9d8b8889fd 100644
> --- a/Documentation/ABI/testing/sysfs-fs-f2fs
> +++ b/Documentation/ABI/testing/sysfs-fs-f2fs
> @@ -828,3 +828,10 @@ Date: November 2024
> Contact: "Chao Yu" <chao@kernel.org>
> Description: It controls max read extent count for per-inode, the value of threshold
> is 10240 by default.
> +
> +What: /sys/fs/f2fs/<disk>/donate_caches
> +Date: December 2024
> +Contact: "Jaegeuk Kim" <jaegeuk@kernel.org>
> +Description: It reclaims the certian file-backed pages registered by
> + ioctl(F2FS_IOC_DONATE_RANGE).
> + For example, writing N tries to drop N address spaces in LRU.
> diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
> index 7ce3e3eab17a..6c434ae94cb1 100644
> --- a/fs/f2fs/f2fs.h
> +++ b/fs/f2fs/f2fs.h
> @@ -1635,6 +1635,9 @@ struct f2fs_sb_info {
> unsigned int warm_data_age_threshold;
> unsigned int last_age_weight;
>
> + /* control donate caches */
> + unsigned int donate_caches;
> +
> /* basic filesystem units */
> unsigned int log_sectors_per_block; /* log2 sectors per block */
> unsigned int log_blocksize; /* log2 block size */
> @@ -4256,6 +4259,7 @@ unsigned long f2fs_shrink_count(struct shrinker *shrink,
> struct shrink_control *sc);
> unsigned long f2fs_shrink_scan(struct shrinker *shrink,
> struct shrink_control *sc);
> +void f2fs_donate_caches(struct f2fs_sb_info *sbi);
> void f2fs_join_shrinker(struct f2fs_sb_info *sbi);
> void f2fs_leave_shrinker(struct f2fs_sb_info *sbi);
>
> diff --git a/fs/f2fs/shrinker.c b/fs/f2fs/shrinker.c
> index 83d6fb97dcae..a3e2063392a7 100644
> --- a/fs/f2fs/shrinker.c
> +++ b/fs/f2fs/shrinker.c
> @@ -130,6 +130,33 @@ unsigned long f2fs_shrink_scan(struct shrinker *shrink,
> return freed;
> }
>
> +void f2fs_donate_caches(struct f2fs_sb_info *sbi)
> +{
> + struct inode *inode = NULL;
> + struct f2fs_inode_info *fi;
> + int nfiles = sbi->donate_caches;
> +next:
> + spin_lock(&sbi->inode_lock[DONATE_INODE]);
> + if (list_empty(&sbi->inode_list[DONATE_INODE]) || !nfiles) {
> + spin_unlock(&sbi->inode_lock[DONATE_INODE]);
> + return;
> + }
> +
> + fi = list_first_entry(&sbi->inode_list[DONATE_INODE],
> + struct f2fs_inode_info, gdonate_list);
> + list_move_tail(&fi->gdonate_list, &sbi->inode_list[DONATE_INODE]);
Not needed to drop it from the global list, right?
Thanks,
> + inode = igrab(&fi->vfs_inode);
> + spin_unlock(&sbi->inode_lock[DONATE_INODE]);
> +
> + if (inode) {
> + invalidate_inode_pages2_range(inode->i_mapping,
> + fi->donate_start, fi->donate_end);
> + iput(inode);
> + }
> + if (nfiles--)
> + goto next;
> +}
> +
> void f2fs_join_shrinker(struct f2fs_sb_info *sbi)
> {
> spin_lock(&f2fs_list_lock);
> diff --git a/fs/f2fs/sysfs.c b/fs/f2fs/sysfs.c
> index 6b99dc49f776..7570580ec3c0 100644
> --- a/fs/f2fs/sysfs.c
> +++ b/fs/f2fs/sysfs.c
> @@ -811,6 +811,12 @@ static ssize_t __sbi_store(struct f2fs_attr *a,
> return count;
> }
>
> + if (!strcmp(a->attr.name, "donate_caches")) {
> + sbi->donate_caches = min(t, sbi->ndirty_inode[DONATE_INODE]);
> + f2fs_donate_caches(sbi);
> + return count;
> + }
> +
> *ui = (unsigned int)t;
>
> return count;
> @@ -1030,6 +1036,7 @@ F2FS_SBI_GENERAL_RW_ATTR(max_victim_search);
> F2FS_SBI_GENERAL_RW_ATTR(migration_granularity);
> F2FS_SBI_GENERAL_RW_ATTR(migration_window_granularity);
> F2FS_SBI_GENERAL_RW_ATTR(dir_level);
> +F2FS_SBI_GENERAL_RW_ATTR(donate_caches);
> #ifdef CONFIG_F2FS_IOSTAT
> F2FS_SBI_GENERAL_RW_ATTR(iostat_enable);
> F2FS_SBI_GENERAL_RW_ATTR(iostat_period_ms);
> @@ -1178,6 +1185,7 @@ static struct attribute *f2fs_attrs[] = {
> ATTR_LIST(migration_granularity),
> ATTR_LIST(migration_window_granularity),
> ATTR_LIST(dir_level),
> + ATTR_LIST(donate_caches),
> ATTR_LIST(ram_thresh),
> ATTR_LIST(ra_nid_pages),
> ATTR_LIST(dirty_nats_ratio),
^ permalink raw reply [flat|nested] 12+ messages in thread* Re: [f2fs-dev] [PATCH 2/2] f2fs: add a sysfs entry to request donate file-backed pages
2025-01-14 7:34 ` [f2fs-dev] " Chao Yu
@ 2025-01-14 17:18 ` Jaegeuk Kim
2025-01-15 2:17 ` Chao Yu
0 siblings, 1 reply; 12+ messages in thread
From: Jaegeuk Kim @ 2025-01-14 17:18 UTC (permalink / raw)
To: Chao Yu; +Cc: linux-kernel, linux-f2fs-devel
On 01/14, Chao Yu wrote:
> On 1/14/25 02:39, Jaegeuk Kim via Linux-f2fs-devel wrote:
> > 1. ioctl(fd1, F2FS_IOC_DONATE_RANGE, {0,3});
> > 2. ioctl(fd2, F2FS_IOC_DONATE_RANGE, {1,2});
> > 3. ioctl(fd3, F2FS_IOC_DONATE_RANGE, {3,1});
> > 4. echo 3 > /sys/fs/f2fs/blk/donate_caches
> >
> > will reclaim 3 page cache ranges, registered by #1, #2, and #3.
> >
> > Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
> > ---
> > Documentation/ABI/testing/sysfs-fs-f2fs | 7 +++++++
> > fs/f2fs/f2fs.h | 4 ++++
> > fs/f2fs/shrinker.c | 27 +++++++++++++++++++++++++
> > fs/f2fs/sysfs.c | 8 ++++++++
> > 4 files changed, 46 insertions(+)
> >
> > diff --git a/Documentation/ABI/testing/sysfs-fs-f2fs b/Documentation/ABI/testing/sysfs-fs-f2fs
> > index 3e1630c70d8a..6f9d8b8889fd 100644
> > --- a/Documentation/ABI/testing/sysfs-fs-f2fs
> > +++ b/Documentation/ABI/testing/sysfs-fs-f2fs
> > @@ -828,3 +828,10 @@ Date: November 2024
> > Contact: "Chao Yu" <chao@kernel.org>
> > Description: It controls max read extent count for per-inode, the value of threshold
> > is 10240 by default.
> > +
> > +What: /sys/fs/f2fs/<disk>/donate_caches
> > +Date: December 2024
> > +Contact: "Jaegeuk Kim" <jaegeuk@kernel.org>
> > +Description: It reclaims the certian file-backed pages registered by
> > + ioctl(F2FS_IOC_DONATE_RANGE).
> > + For example, writing N tries to drop N address spaces in LRU.
> > diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
> > index 7ce3e3eab17a..6c434ae94cb1 100644
> > --- a/fs/f2fs/f2fs.h
> > +++ b/fs/f2fs/f2fs.h
> > @@ -1635,6 +1635,9 @@ struct f2fs_sb_info {
> > unsigned int warm_data_age_threshold;
> > unsigned int last_age_weight;
> > + /* control donate caches */
> > + unsigned int donate_caches;
> > +
> > /* basic filesystem units */
> > unsigned int log_sectors_per_block; /* log2 sectors per block */
> > unsigned int log_blocksize; /* log2 block size */
> > @@ -4256,6 +4259,7 @@ unsigned long f2fs_shrink_count(struct shrinker *shrink,
> > struct shrink_control *sc);
> > unsigned long f2fs_shrink_scan(struct shrinker *shrink,
> > struct shrink_control *sc);
> > +void f2fs_donate_caches(struct f2fs_sb_info *sbi);
> > void f2fs_join_shrinker(struct f2fs_sb_info *sbi);
> > void f2fs_leave_shrinker(struct f2fs_sb_info *sbi);
> > diff --git a/fs/f2fs/shrinker.c b/fs/f2fs/shrinker.c
> > index 83d6fb97dcae..a3e2063392a7 100644
> > --- a/fs/f2fs/shrinker.c
> > +++ b/fs/f2fs/shrinker.c
> > @@ -130,6 +130,33 @@ unsigned long f2fs_shrink_scan(struct shrinker *shrink,
> > return freed;
> > }
> > +void f2fs_donate_caches(struct f2fs_sb_info *sbi)
> > +{
> > + struct inode *inode = NULL;
> > + struct f2fs_inode_info *fi;
> > + int nfiles = sbi->donate_caches;
> > +next:
> > + spin_lock(&sbi->inode_lock[DONATE_INODE]);
> > + if (list_empty(&sbi->inode_list[DONATE_INODE]) || !nfiles) {
> > + spin_unlock(&sbi->inode_lock[DONATE_INODE]);
> > + return;
> > + }
> > +
> > + fi = list_first_entry(&sbi->inode_list[DONATE_INODE],
> > + struct f2fs_inode_info, gdonate_list);
> > + list_move_tail(&fi->gdonate_list, &sbi->inode_list[DONATE_INODE]);
>
> Not needed to drop it from the global list, right?
Yea, there're two paths to drop: 1) waiting for evict_inode, 2) setting a new
range having len=0.
>
> Thanks,
>
> > + inode = igrab(&fi->vfs_inode);
> > + spin_unlock(&sbi->inode_lock[DONATE_INODE]);
> > +
> > + if (inode) {
> > + invalidate_inode_pages2_range(inode->i_mapping,
> > + fi->donate_start, fi->donate_end);
> > + iput(inode);
> > + }
> > + if (nfiles--)
> > + goto next;
> > +}
> > +
> > void f2fs_join_shrinker(struct f2fs_sb_info *sbi)
> > {
> > spin_lock(&f2fs_list_lock);
> > diff --git a/fs/f2fs/sysfs.c b/fs/f2fs/sysfs.c
> > index 6b99dc49f776..7570580ec3c0 100644
> > --- a/fs/f2fs/sysfs.c
> > +++ b/fs/f2fs/sysfs.c
> > @@ -811,6 +811,12 @@ static ssize_t __sbi_store(struct f2fs_attr *a,
> > return count;
> > }
> > + if (!strcmp(a->attr.name, "donate_caches")) {
> > + sbi->donate_caches = min(t, sbi->ndirty_inode[DONATE_INODE]);
> > + f2fs_donate_caches(sbi);
> > + return count;
> > + }
> > +
> > *ui = (unsigned int)t;
> > return count;
> > @@ -1030,6 +1036,7 @@ F2FS_SBI_GENERAL_RW_ATTR(max_victim_search);
> > F2FS_SBI_GENERAL_RW_ATTR(migration_granularity);
> > F2FS_SBI_GENERAL_RW_ATTR(migration_window_granularity);
> > F2FS_SBI_GENERAL_RW_ATTR(dir_level);
> > +F2FS_SBI_GENERAL_RW_ATTR(donate_caches);
> > #ifdef CONFIG_F2FS_IOSTAT
> > F2FS_SBI_GENERAL_RW_ATTR(iostat_enable);
> > F2FS_SBI_GENERAL_RW_ATTR(iostat_period_ms);
> > @@ -1178,6 +1185,7 @@ static struct attribute *f2fs_attrs[] = {
> > ATTR_LIST(migration_granularity),
> > ATTR_LIST(migration_window_granularity),
> > ATTR_LIST(dir_level),
> > + ATTR_LIST(donate_caches),
> > ATTR_LIST(ram_thresh),
> > ATTR_LIST(ra_nid_pages),
> > ATTR_LIST(dirty_nats_ratio),
^ permalink raw reply [flat|nested] 12+ messages in thread* Re: [f2fs-dev] [PATCH 2/2] f2fs: add a sysfs entry to request donate file-backed pages
2025-01-14 17:18 ` Jaegeuk Kim
@ 2025-01-15 2:17 ` Chao Yu
0 siblings, 0 replies; 12+ messages in thread
From: Chao Yu @ 2025-01-15 2:17 UTC (permalink / raw)
To: Jaegeuk Kim; +Cc: chao, linux-kernel, linux-f2fs-devel
On 1/15/25 01:18, Jaegeuk Kim wrote:
> On 01/14, Chao Yu wrote:
>> On 1/14/25 02:39, Jaegeuk Kim via Linux-f2fs-devel wrote:
>>> 1. ioctl(fd1, F2FS_IOC_DONATE_RANGE, {0,3});
>>> 2. ioctl(fd2, F2FS_IOC_DONATE_RANGE, {1,2});
>>> 3. ioctl(fd3, F2FS_IOC_DONATE_RANGE, {3,1});
>>> 4. echo 3 > /sys/fs/f2fs/blk/donate_caches
>>>
>>> will reclaim 3 page cache ranges, registered by #1, #2, and #3.
>>>
>>> Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
>>> ---
>>> Documentation/ABI/testing/sysfs-fs-f2fs | 7 +++++++
>>> fs/f2fs/f2fs.h | 4 ++++
>>> fs/f2fs/shrinker.c | 27 +++++++++++++++++++++++++
>>> fs/f2fs/sysfs.c | 8 ++++++++
>>> 4 files changed, 46 insertions(+)
>>>
>>> diff --git a/Documentation/ABI/testing/sysfs-fs-f2fs b/Documentation/ABI/testing/sysfs-fs-f2fs
>>> index 3e1630c70d8a..6f9d8b8889fd 100644
>>> --- a/Documentation/ABI/testing/sysfs-fs-f2fs
>>> +++ b/Documentation/ABI/testing/sysfs-fs-f2fs
>>> @@ -828,3 +828,10 @@ Date: November 2024
>>> Contact: "Chao Yu" <chao@kernel.org>
>>> Description: It controls max read extent count for per-inode, the value of threshold
>>> is 10240 by default.
>>> +
>>> +What: /sys/fs/f2fs/<disk>/donate_caches
>>> +Date: December 2024
>>> +Contact: "Jaegeuk Kim" <jaegeuk@kernel.org>
>>> +Description: It reclaims the certian file-backed pages registered by
>>> + ioctl(F2FS_IOC_DONATE_RANGE).
>>> + For example, writing N tries to drop N address spaces in LRU.
>>> diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
>>> index 7ce3e3eab17a..6c434ae94cb1 100644
>>> --- a/fs/f2fs/f2fs.h
>>> +++ b/fs/f2fs/f2fs.h
>>> @@ -1635,6 +1635,9 @@ struct f2fs_sb_info {
>>> unsigned int warm_data_age_threshold;
>>> unsigned int last_age_weight;
>>> + /* control donate caches */
>>> + unsigned int donate_caches;
>>> +
>>> /* basic filesystem units */
>>> unsigned int log_sectors_per_block; /* log2 sectors per block */
>>> unsigned int log_blocksize; /* log2 block size */
>>> @@ -4256,6 +4259,7 @@ unsigned long f2fs_shrink_count(struct shrinker *shrink,
>>> struct shrink_control *sc);
>>> unsigned long f2fs_shrink_scan(struct shrinker *shrink,
>>> struct shrink_control *sc);
>>> +void f2fs_donate_caches(struct f2fs_sb_info *sbi);
>>> void f2fs_join_shrinker(struct f2fs_sb_info *sbi);
>>> void f2fs_leave_shrinker(struct f2fs_sb_info *sbi);
>>> diff --git a/fs/f2fs/shrinker.c b/fs/f2fs/shrinker.c
>>> index 83d6fb97dcae..a3e2063392a7 100644
>>> --- a/fs/f2fs/shrinker.c
>>> +++ b/fs/f2fs/shrinker.c
>>> @@ -130,6 +130,33 @@ unsigned long f2fs_shrink_scan(struct shrinker *shrink,
>>> return freed;
>>> }
>>> +void f2fs_donate_caches(struct f2fs_sb_info *sbi)
>>> +{
>>> + struct inode *inode = NULL;
>>> + struct f2fs_inode_info *fi;
>>> + int nfiles = sbi->donate_caches;
>>> +next:
>>> + spin_lock(&sbi->inode_lock[DONATE_INODE]);
>>> + if (list_empty(&sbi->inode_list[DONATE_INODE]) || !nfiles) {
>>> + spin_unlock(&sbi->inode_lock[DONATE_INODE]);
>>> + return;
>>> + }
>>> +
>>> + fi = list_first_entry(&sbi->inode_list[DONATE_INODE],
>>> + struct f2fs_inode_info, gdonate_list);
>>> + list_move_tail(&fi->gdonate_list, &sbi->inode_list[DONATE_INODE]);
>>
>> Not needed to drop it from the global list, right?
>
> Yea, there're two paths to drop: 1) waiting for evict_inode, 2) setting a new
> range having len=0.
Second way just relocate entry to list tail, not drop it from list?
Thanks,
>
>>
>> Thanks,
>>
>>> + inode = igrab(&fi->vfs_inode);
>>> + spin_unlock(&sbi->inode_lock[DONATE_INODE]);
>>> +
>>> + if (inode) {
>>> + invalidate_inode_pages2_range(inode->i_mapping,
>>> + fi->donate_start, fi->donate_end);
>>> + iput(inode);
>>> + }
>>> + if (nfiles--)
>>> + goto next;
>>> +}
>>> +
>>> void f2fs_join_shrinker(struct f2fs_sb_info *sbi)
>>> {
>>> spin_lock(&f2fs_list_lock);
>>> diff --git a/fs/f2fs/sysfs.c b/fs/f2fs/sysfs.c
>>> index 6b99dc49f776..7570580ec3c0 100644
>>> --- a/fs/f2fs/sysfs.c
>>> +++ b/fs/f2fs/sysfs.c
>>> @@ -811,6 +811,12 @@ static ssize_t __sbi_store(struct f2fs_attr *a,
>>> return count;
>>> }
>>> + if (!strcmp(a->attr.name, "donate_caches")) {
>>> + sbi->donate_caches = min(t, sbi->ndirty_inode[DONATE_INODE]);
>>> + f2fs_donate_caches(sbi);
>>> + return count;
>>> + }
>>> +
>>> *ui = (unsigned int)t;
>>> return count;
>>> @@ -1030,6 +1036,7 @@ F2FS_SBI_GENERAL_RW_ATTR(max_victim_search);
>>> F2FS_SBI_GENERAL_RW_ATTR(migration_granularity);
>>> F2FS_SBI_GENERAL_RW_ATTR(migration_window_granularity);
>>> F2FS_SBI_GENERAL_RW_ATTR(dir_level);
>>> +F2FS_SBI_GENERAL_RW_ATTR(donate_caches);
>>> #ifdef CONFIG_F2FS_IOSTAT
>>> F2FS_SBI_GENERAL_RW_ATTR(iostat_enable);
>>> F2FS_SBI_GENERAL_RW_ATTR(iostat_period_ms);
>>> @@ -1178,6 +1185,7 @@ static struct attribute *f2fs_attrs[] = {
>>> ATTR_LIST(migration_granularity),
>>> ATTR_LIST(migration_window_granularity),
>>> ATTR_LIST(dir_level),
>>> + ATTR_LIST(donate_caches),
>>> ATTR_LIST(ram_thresh),
>>> ATTR_LIST(ra_nid_pages),
>>> ATTR_LIST(dirty_nats_ratio),
^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [PATCH 2/2 v2] f2fs: add a sysfs entry to request donate file-backed pages
2025-01-13 18:39 ` [PATCH 2/2] f2fs: add a sysfs entry to request donate file-backed pages Jaegeuk Kim
2025-01-14 7:34 ` [f2fs-dev] " Chao Yu
@ 2025-01-14 20:50 ` Jaegeuk Kim
1 sibling, 0 replies; 12+ messages in thread
From: Jaegeuk Kim @ 2025-01-14 20:50 UTC (permalink / raw)
To: linux-kernel, linux-f2fs-devel
1. ioctl(fd1, F2FS_IOC_DONATE_RANGE, {0,3});
2. ioctl(fd2, F2FS_IOC_DONATE_RANGE, {1,2});
3. ioctl(fd3, F2FS_IOC_DONATE_RANGE, {3,1});
4. echo 3 > /sys/fs/f2fs/blk/donate_caches
will reclaim 3 page cache ranges, registered by #1, #2, and #3.
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
Change log from v1:
- don't use sbi->ndirty_inode which is not defined by default
Documentation/ABI/testing/sysfs-fs-f2fs | 7 +++++++
fs/f2fs/f2fs.h | 4 ++++
fs/f2fs/shrinker.c | 27 +++++++++++++++++++++++++
fs/f2fs/sysfs.c | 8 ++++++++
4 files changed, 46 insertions(+)
diff --git a/Documentation/ABI/testing/sysfs-fs-f2fs b/Documentation/ABI/testing/sysfs-fs-f2fs
index 3e1630c70d8a..6f9d8b8889fd 100644
--- a/Documentation/ABI/testing/sysfs-fs-f2fs
+++ b/Documentation/ABI/testing/sysfs-fs-f2fs
@@ -828,3 +828,10 @@ Date: November 2024
Contact: "Chao Yu" <chao@kernel.org>
Description: It controls max read extent count for per-inode, the value of threshold
is 10240 by default.
+
+What: /sys/fs/f2fs/<disk>/donate_caches
+Date: December 2024
+Contact: "Jaegeuk Kim" <jaegeuk@kernel.org>
+Description: It reclaims the certian file-backed pages registered by
+ ioctl(F2FS_IOC_DONATE_RANGE).
+ For example, writing N tries to drop N address spaces in LRU.
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index 7ce3e3eab17a..6c434ae94cb1 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -1635,6 +1635,9 @@ struct f2fs_sb_info {
unsigned int warm_data_age_threshold;
unsigned int last_age_weight;
+ /* control donate caches */
+ unsigned int donate_caches;
+
/* basic filesystem units */
unsigned int log_sectors_per_block; /* log2 sectors per block */
unsigned int log_blocksize; /* log2 block size */
@@ -4256,6 +4259,7 @@ unsigned long f2fs_shrink_count(struct shrinker *shrink,
struct shrink_control *sc);
unsigned long f2fs_shrink_scan(struct shrinker *shrink,
struct shrink_control *sc);
+void f2fs_donate_caches(struct f2fs_sb_info *sbi);
void f2fs_join_shrinker(struct f2fs_sb_info *sbi);
void f2fs_leave_shrinker(struct f2fs_sb_info *sbi);
diff --git a/fs/f2fs/shrinker.c b/fs/f2fs/shrinker.c
index 83d6fb97dcae..a3e2063392a7 100644
--- a/fs/f2fs/shrinker.c
+++ b/fs/f2fs/shrinker.c
@@ -130,6 +130,33 @@ unsigned long f2fs_shrink_scan(struct shrinker *shrink,
return freed;
}
+void f2fs_donate_caches(struct f2fs_sb_info *sbi)
+{
+ struct inode *inode = NULL;
+ struct f2fs_inode_info *fi;
+ int nfiles = sbi->donate_caches;
+next:
+ spin_lock(&sbi->inode_lock[DONATE_INODE]);
+ if (list_empty(&sbi->inode_list[DONATE_INODE]) || !nfiles) {
+ spin_unlock(&sbi->inode_lock[DONATE_INODE]);
+ return;
+ }
+
+ fi = list_first_entry(&sbi->inode_list[DONATE_INODE],
+ struct f2fs_inode_info, gdonate_list);
+ list_move_tail(&fi->gdonate_list, &sbi->inode_list[DONATE_INODE]);
+ inode = igrab(&fi->vfs_inode);
+ spin_unlock(&sbi->inode_lock[DONATE_INODE]);
+
+ if (inode) {
+ invalidate_inode_pages2_range(inode->i_mapping,
+ fi->donate_start, fi->donate_end);
+ iput(inode);
+ }
+ if (nfiles--)
+ goto next;
+}
+
void f2fs_join_shrinker(struct f2fs_sb_info *sbi)
{
spin_lock(&f2fs_list_lock);
diff --git a/fs/f2fs/sysfs.c b/fs/f2fs/sysfs.c
index 6b99dc49f776..2a6b01257ad8 100644
--- a/fs/f2fs/sysfs.c
+++ b/fs/f2fs/sysfs.c
@@ -811,6 +811,12 @@ static ssize_t __sbi_store(struct f2fs_attr *a,
return count;
}
+ if (!strcmp(a->attr.name, "donate_caches")) {
+ sbi->donate_caches = t;
+ f2fs_donate_caches(sbi);
+ return count;
+ }
+
*ui = (unsigned int)t;
return count;
@@ -1030,6 +1036,7 @@ F2FS_SBI_GENERAL_RW_ATTR(max_victim_search);
F2FS_SBI_GENERAL_RW_ATTR(migration_granularity);
F2FS_SBI_GENERAL_RW_ATTR(migration_window_granularity);
F2FS_SBI_GENERAL_RW_ATTR(dir_level);
+F2FS_SBI_GENERAL_RW_ATTR(donate_caches);
#ifdef CONFIG_F2FS_IOSTAT
F2FS_SBI_GENERAL_RW_ATTR(iostat_enable);
F2FS_SBI_GENERAL_RW_ATTR(iostat_period_ms);
@@ -1178,6 +1185,7 @@ static struct attribute *f2fs_attrs[] = {
ATTR_LIST(migration_granularity),
ATTR_LIST(migration_window_granularity),
ATTR_LIST(dir_level),
+ ATTR_LIST(donate_caches),
ATTR_LIST(ram_thresh),
ATTR_LIST(ra_nid_pages),
ATTR_LIST(dirty_nats_ratio),
--
2.48.0.rc2.279.g1de40edade-goog
^ permalink raw reply related [flat|nested] 12+ messages in thread
* Re: [f2fs-dev] [PATCH 1/2] f2fs: register inodes which is able to donate pages
2025-01-13 18:39 [PATCH 1/2] f2fs: register inodes which is able to donate pages Jaegeuk Kim
2025-01-13 18:39 ` [PATCH 2/2] f2fs: add a sysfs entry to request donate file-backed pages Jaegeuk Kim
@ 2025-01-14 6:34 ` Chao Yu
2025-01-14 17:15 ` Jaegeuk Kim
2025-01-14 17:20 ` [PATCH 1/2 v2] " Jaegeuk Kim
2025-01-14 21:16 ` [PATCH 1/2] " Eric Biggers
3 siblings, 1 reply; 12+ messages in thread
From: Chao Yu @ 2025-01-14 6:34 UTC (permalink / raw)
To: Jaegeuk Kim, linux-kernel, linux-f2fs-devel; +Cc: chao
On 1/14/25 02:39, Jaegeuk Kim via Linux-f2fs-devel wrote:
> This patch introduces an inode list to keep the page cache ranges that users
> can donate pages together.
>
> #define F2FS_IOC_DONATE_RANGE _IOW(F2FS_IOCTL_MAGIC, 27, \
> struct f2fs_donate_range)
> struct f2fs_donate_range {
> __u64 start;
> __u64 len;
> };
>
> e.g., ioctl(F2FS_IOC_DONATE_RANGE, &range);
I guess we need to add documentation for all ioctls including this one, maybe
later? :)
>
> Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
> ---
> fs/f2fs/debug.c | 3 +++
> fs/f2fs/f2fs.h | 9 +++++++-
> fs/f2fs/file.c | 48 +++++++++++++++++++++++++++++++++++++++
> fs/f2fs/inode.c | 14 ++++++++++++
> fs/f2fs/super.c | 1 +
> include/uapi/linux/f2fs.h | 7 ++++++
> 6 files changed, 81 insertions(+), 1 deletion(-)
>
> diff --git a/fs/f2fs/debug.c b/fs/f2fs/debug.c
> index 468828288a4a..1b099c123670 100644
> --- a/fs/f2fs/debug.c
> +++ b/fs/f2fs/debug.c
> @@ -164,6 +164,7 @@ static void update_general_status(struct f2fs_sb_info *sbi)
> si->ndirty_imeta = get_pages(sbi, F2FS_DIRTY_IMETA);
> si->ndirty_dirs = sbi->ndirty_inode[DIR_INODE];
> si->ndirty_files = sbi->ndirty_inode[FILE_INODE];
> + si->ndonate_files = sbi->ndirty_inode[DONATE_INODE];
> si->nquota_files = sbi->nquota_files;
> si->ndirty_all = sbi->ndirty_inode[DIRTY_META];
> si->aw_cnt = atomic_read(&sbi->atomic_files);
> @@ -501,6 +502,8 @@ static int stat_show(struct seq_file *s, void *v)
> si->compr_inode, si->compr_blocks);
> seq_printf(s, " - Swapfile Inode: %u\n",
> si->swapfile_inode);
> + seq_printf(s, " - Donate Inode: %d\n",
%u instead of %d due to si->ndonate_files is type of unsigned int.
> + si->ndonate_files);
> seq_printf(s, " - Orphan/Append/Update Inode: %u, %u, %u\n",
> si->orphans, si->append, si->update);
> seq_printf(s, "\nMain area: %d segs, %d secs %d zones\n",
> diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
> index 4bfe162eefd3..7ce3e3eab17a 100644
> --- a/fs/f2fs/f2fs.h
> +++ b/fs/f2fs/f2fs.h
> @@ -850,6 +850,11 @@ struct f2fs_inode_info {
> #endif
> struct list_head dirty_list; /* dirty list for dirs and files */
> struct list_head gdirty_list; /* linked in global dirty list */
> +
> + /* linked in global inode list for cache donation */
> + struct list_head gdonate_list;
> + loff_t donate_start, donate_end; /* inclusive */
> +
> struct task_struct *atomic_write_task; /* store atomic write task */
> struct extent_tree *extent_tree[NR_EXTENT_CACHES];
> /* cached extent_tree entry */
> @@ -1274,6 +1279,7 @@ enum inode_type {
> DIR_INODE, /* for dirty dir inode */
> FILE_INODE, /* for dirty regular/symlink inode */
> DIRTY_META, /* for all dirtied inode metadata */
> + DONATE_INODE, /* for all inode to donate pages */
> NR_INODE_TYPE,
> };
>
> @@ -3984,7 +3990,8 @@ struct f2fs_stat_info {
> unsigned long long allocated_data_blocks;
> int ndirty_node, ndirty_dent, ndirty_meta, ndirty_imeta;
> int ndirty_data, ndirty_qdata;
> - unsigned int ndirty_dirs, ndirty_files, nquota_files, ndirty_all;
> + unsigned int ndirty_dirs, ndirty_files, ndirty_all;
> + unsigned int nquota_files, ndonate_files;
> int nats, dirty_nats, sits, dirty_sits;
> int free_nids, avail_nids, alloc_nids;
> int total_count, utilization;
> diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
> index 9980d17ef9f5..d6dea6258c2d 100644
> --- a/fs/f2fs/file.c
> +++ b/fs/f2fs/file.c
> @@ -2493,6 +2493,51 @@ static int f2fs_ioc_shutdown(struct file *filp, unsigned long arg)
> return ret;
> }
>
> +static int f2fs_ioc_donate_range(struct file *filp, unsigned long arg)
> +{
> + struct inode *inode = file_inode(filp);
> + struct mnt_idmap *idmap = file_mnt_idmap(filp);
> + struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
> + struct f2fs_donate_range range;
> + int ret;
> +
> + if (copy_from_user(&range, (struct f2fs_donate_range __user *)arg,
> + sizeof(range)))
> + return -EFAULT;
What about doing sanity check on donate range here? in order to avoid overflow
during fi->donate_end calculation.
F2FS_I(inode)->donate_end = range.start + range.len - 1;
> +
> + if (!inode_owner_or_capable(idmap, inode))
> + return -EACCES;
> +
> + if (!S_ISREG(inode->i_mode))
> + return -EINVAL;
> +
> + ret = mnt_want_write_file(filp);
> + if (ret)
> + return ret;
> +
> + inode_lock(inode);
> +
> + if (f2fs_is_atomic_file(inode))
> + goto out;
> +
> + spin_lock(&sbi->inode_lock[DONATE_INODE]);
> + if (list_empty(&F2FS_I(inode)->gdonate_list)) {
> + list_add_tail(&F2FS_I(inode)->gdonate_list,
> + &sbi->inode_list[DONATE_INODE]);
> + stat_inc_dirty_inode(sbi, DONATE_INODE);
> + } else {
> + list_move_tail(&F2FS_I(inode)->gdonate_list,
> + &sbi->inode_list[DONATE_INODE]);
> + }
> + F2FS_I(inode)->donate_start = range.start;
> + F2FS_I(inode)->donate_end = range.start + range.len - 1;
> + spin_unlock(&sbi->inode_lock[DONATE_INODE]);
> +out:
> + inode_unlock(inode);
> + mnt_drop_write_file(filp);
> + return ret;
> +}
> +
> static int f2fs_ioc_fitrim(struct file *filp, unsigned long arg)
> {
> struct inode *inode = file_inode(filp);
> @@ -4522,6 +4567,8 @@ static long __f2fs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
> return -EOPNOTSUPP;
> case F2FS_IOC_SHUTDOWN:
> return f2fs_ioc_shutdown(filp, arg);
> + case F2FS_IOC_DONATE_RANGE:
> + return f2fs_ioc_donate_range(filp, arg);
> case FITRIM:
> return f2fs_ioc_fitrim(filp, arg);
> case FS_IOC_SET_ENCRYPTION_POLICY:
> @@ -5273,6 +5320,7 @@ long f2fs_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
> case F2FS_IOC_RELEASE_VOLATILE_WRITE:
> case F2FS_IOC_ABORT_ATOMIC_WRITE:
> case F2FS_IOC_SHUTDOWN:
> + case F2FS_IOC_DONATE_RANGE:
> case FITRIM:
> case FS_IOC_SET_ENCRYPTION_POLICY:
> case FS_IOC_GET_ENCRYPTION_PWSALT:
> diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c
> index 7de33da8b3ea..e38dc5fe2f2e 100644
> --- a/fs/f2fs/inode.c
> +++ b/fs/f2fs/inode.c
> @@ -804,6 +804,19 @@ int f2fs_write_inode(struct inode *inode, struct writeback_control *wbc)
> return 0;
> }
>
> +static void f2fs_remove_donate_inode(struct inode *inode)
> +{
> + struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
> +
> + if (list_empty(&F2FS_I(inode)->gdonate_list))
It will be more safe to access gdonate_list w/ inode_lock[DONATE_INODE]?
Thanks,
> + return;
> +
> + spin_lock(&sbi->inode_lock[DONATE_INODE]);
> + list_del_init(&F2FS_I(inode)->gdonate_list);
> + stat_dec_dirty_inode(sbi, DONATE_INODE);
> + spin_unlock(&sbi->inode_lock[DONATE_INODE]);
> +}
> +
> /*
> * Called at the last iput() if i_nlink is zero
> */
> @@ -838,6 +851,7 @@ void f2fs_evict_inode(struct inode *inode)
>
> f2fs_bug_on(sbi, get_dirty_pages(inode));
> f2fs_remove_dirty_inode(inode);
> + f2fs_remove_donate_inode(inode);
>
> if (!IS_DEVICE_ALIASING(inode))
> f2fs_destroy_extent_tree(inode);
> diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
> index fc7d463dee15..ef639a6d82e5 100644
> --- a/fs/f2fs/super.c
> +++ b/fs/f2fs/super.c
> @@ -1441,6 +1441,7 @@ static struct inode *f2fs_alloc_inode(struct super_block *sb)
> spin_lock_init(&fi->i_size_lock);
> INIT_LIST_HEAD(&fi->dirty_list);
> INIT_LIST_HEAD(&fi->gdirty_list);
> + INIT_LIST_HEAD(&fi->gdonate_list);
> init_f2fs_rwsem(&fi->i_gc_rwsem[READ]);
> init_f2fs_rwsem(&fi->i_gc_rwsem[WRITE]);
> init_f2fs_rwsem(&fi->i_xattr_sem);
> diff --git a/include/uapi/linux/f2fs.h b/include/uapi/linux/f2fs.h
> index f7aaf8d23e20..cd38a7c166e6 100644
> --- a/include/uapi/linux/f2fs.h
> +++ b/include/uapi/linux/f2fs.h
> @@ -44,6 +44,8 @@
> #define F2FS_IOC_COMPRESS_FILE _IO(F2FS_IOCTL_MAGIC, 24)
> #define F2FS_IOC_START_ATOMIC_REPLACE _IO(F2FS_IOCTL_MAGIC, 25)
> #define F2FS_IOC_GET_DEV_ALIAS_FILE _IOR(F2FS_IOCTL_MAGIC, 26, __u32)
> +#define F2FS_IOC_DONATE_RANGE _IOW(F2FS_IOCTL_MAGIC, 27, \
> + struct f2fs_donate_range)
>
> /*
> * should be same as XFS_IOC_GOINGDOWN.
> @@ -97,4 +99,9 @@ struct f2fs_comp_option {
> __u8 log_cluster_size;
> };
>
> +struct f2fs_donate_range {
> + __u64 start;
> + __u64 len;
> +};
> +
> #endif /* _UAPI_LINUX_F2FS_H */
^ permalink raw reply [flat|nested] 12+ messages in thread* Re: [f2fs-dev] [PATCH 1/2] f2fs: register inodes which is able to donate pages
2025-01-14 6:34 ` [f2fs-dev] [PATCH 1/2] f2fs: register inodes which is able to donate pages Chao Yu
@ 2025-01-14 17:15 ` Jaegeuk Kim
2025-01-15 2:12 ` Chao Yu
0 siblings, 1 reply; 12+ messages in thread
From: Jaegeuk Kim @ 2025-01-14 17:15 UTC (permalink / raw)
To: Chao Yu; +Cc: linux-kernel, linux-f2fs-devel
On 01/14, Chao Yu wrote:
> On 1/14/25 02:39, Jaegeuk Kim via Linux-f2fs-devel wrote:
> > This patch introduces an inode list to keep the page cache ranges that users
> > can donate pages together.
> >
> > #define F2FS_IOC_DONATE_RANGE _IOW(F2FS_IOCTL_MAGIC, 27, \
> > struct f2fs_donate_range)
> > struct f2fs_donate_range {
> > __u64 start;
> > __u64 len;
> > };
> >
> > e.g., ioctl(F2FS_IOC_DONATE_RANGE, &range);
>
> I guess we need to add documentation for all ioctls including this one, maybe
> later? :)
Yeah, later.
>
> >
> > Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
> > ---
> > fs/f2fs/debug.c | 3 +++
> > fs/f2fs/f2fs.h | 9 +++++++-
> > fs/f2fs/file.c | 48 +++++++++++++++++++++++++++++++++++++++
> > fs/f2fs/inode.c | 14 ++++++++++++
> > fs/f2fs/super.c | 1 +
> > include/uapi/linux/f2fs.h | 7 ++++++
> > 6 files changed, 81 insertions(+), 1 deletion(-)
> >
> > diff --git a/fs/f2fs/debug.c b/fs/f2fs/debug.c
> > index 468828288a4a..1b099c123670 100644
> > --- a/fs/f2fs/debug.c
> > +++ b/fs/f2fs/debug.c
> > @@ -164,6 +164,7 @@ static void update_general_status(struct f2fs_sb_info *sbi)
> > si->ndirty_imeta = get_pages(sbi, F2FS_DIRTY_IMETA);
> > si->ndirty_dirs = sbi->ndirty_inode[DIR_INODE];
> > si->ndirty_files = sbi->ndirty_inode[FILE_INODE];
> > + si->ndonate_files = sbi->ndirty_inode[DONATE_INODE];
> > si->nquota_files = sbi->nquota_files;
> > si->ndirty_all = sbi->ndirty_inode[DIRTY_META];
> > si->aw_cnt = atomic_read(&sbi->atomic_files);
> > @@ -501,6 +502,8 @@ static int stat_show(struct seq_file *s, void *v)
> > si->compr_inode, si->compr_blocks);
> > seq_printf(s, " - Swapfile Inode: %u\n",
> > si->swapfile_inode);
> > + seq_printf(s, " - Donate Inode: %d\n",
>
> %u instead of %d due to si->ndonate_files is type of unsigned int.
>
> > + si->ndonate_files);
> > seq_printf(s, " - Orphan/Append/Update Inode: %u, %u, %u\n",
> > si->orphans, si->append, si->update);
> > seq_printf(s, "\nMain area: %d segs, %d secs %d zones\n",
> > diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
> > index 4bfe162eefd3..7ce3e3eab17a 100644
> > --- a/fs/f2fs/f2fs.h
> > +++ b/fs/f2fs/f2fs.h
> > @@ -850,6 +850,11 @@ struct f2fs_inode_info {
> > #endif
> > struct list_head dirty_list; /* dirty list for dirs and files */
> > struct list_head gdirty_list; /* linked in global dirty list */
> > +
> > + /* linked in global inode list for cache donation */
> > + struct list_head gdonate_list;
> > + loff_t donate_start, donate_end; /* inclusive */
> > +
> > struct task_struct *atomic_write_task; /* store atomic write task */
> > struct extent_tree *extent_tree[NR_EXTENT_CACHES];
> > /* cached extent_tree entry */
> > @@ -1274,6 +1279,7 @@ enum inode_type {
> > DIR_INODE, /* for dirty dir inode */
> > FILE_INODE, /* for dirty regular/symlink inode */
> > DIRTY_META, /* for all dirtied inode metadata */
> > + DONATE_INODE, /* for all inode to donate pages */
> > NR_INODE_TYPE,
> > };
> > @@ -3984,7 +3990,8 @@ struct f2fs_stat_info {
> > unsigned long long allocated_data_blocks;
> > int ndirty_node, ndirty_dent, ndirty_meta, ndirty_imeta;
> > int ndirty_data, ndirty_qdata;
> > - unsigned int ndirty_dirs, ndirty_files, nquota_files, ndirty_all;
> > + unsigned int ndirty_dirs, ndirty_files, ndirty_all;
> > + unsigned int nquota_files, ndonate_files;
> > int nats, dirty_nats, sits, dirty_sits;
> > int free_nids, avail_nids, alloc_nids;
> > int total_count, utilization;
> > diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
> > index 9980d17ef9f5..d6dea6258c2d 100644
> > --- a/fs/f2fs/file.c
> > +++ b/fs/f2fs/file.c
> > @@ -2493,6 +2493,51 @@ static int f2fs_ioc_shutdown(struct file *filp, unsigned long arg)
> > return ret;
> > }
> > +static int f2fs_ioc_donate_range(struct file *filp, unsigned long arg)
> > +{
> > + struct inode *inode = file_inode(filp);
> > + struct mnt_idmap *idmap = file_mnt_idmap(filp);
> > + struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
> > + struct f2fs_donate_range range;
> > + int ret;
> > +
> > + if (copy_from_user(&range, (struct f2fs_donate_range __user *)arg,
> > + sizeof(range)))
> > + return -EFAULT;
>
> What about doing sanity check on donate range here? in order to avoid overflow
> during fi->donate_end calculation.
>
> F2FS_I(inode)->donate_end = range.start + range.len - 1;
>
> > +
> > + if (!inode_owner_or_capable(idmap, inode))
> > + return -EACCES;
> > +
> > + if (!S_ISREG(inode->i_mode))
> > + return -EINVAL;
> > +
> > + ret = mnt_want_write_file(filp);
> > + if (ret)
> > + return ret;
> > +
> > + inode_lock(inode);
> > +
> > + if (f2fs_is_atomic_file(inode))
> > + goto out;
> > +
> > + spin_lock(&sbi->inode_lock[DONATE_INODE]);
> > + if (list_empty(&F2FS_I(inode)->gdonate_list)) {
> > + list_add_tail(&F2FS_I(inode)->gdonate_list,
> > + &sbi->inode_list[DONATE_INODE]);
> > + stat_inc_dirty_inode(sbi, DONATE_INODE);
> > + } else {
> > + list_move_tail(&F2FS_I(inode)->gdonate_list,
> > + &sbi->inode_list[DONATE_INODE]);
> > + }
> > + F2FS_I(inode)->donate_start = range.start;
> > + F2FS_I(inode)->donate_end = range.start + range.len - 1;
> > + spin_unlock(&sbi->inode_lock[DONATE_INODE]);
> > +out:
> > + inode_unlock(inode);
> > + mnt_drop_write_file(filp);
> > + return ret;
> > +}
> > +
> > static int f2fs_ioc_fitrim(struct file *filp, unsigned long arg)
> > {
> > struct inode *inode = file_inode(filp);
> > @@ -4522,6 +4567,8 @@ static long __f2fs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
> > return -EOPNOTSUPP;
> > case F2FS_IOC_SHUTDOWN:
> > return f2fs_ioc_shutdown(filp, arg);
> > + case F2FS_IOC_DONATE_RANGE:
> > + return f2fs_ioc_donate_range(filp, arg);
> > case FITRIM:
> > return f2fs_ioc_fitrim(filp, arg);
> > case FS_IOC_SET_ENCRYPTION_POLICY:
> > @@ -5273,6 +5320,7 @@ long f2fs_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
> > case F2FS_IOC_RELEASE_VOLATILE_WRITE:
> > case F2FS_IOC_ABORT_ATOMIC_WRITE:
> > case F2FS_IOC_SHUTDOWN:
> > + case F2FS_IOC_DONATE_RANGE:
> > case FITRIM:
> > case FS_IOC_SET_ENCRYPTION_POLICY:
> > case FS_IOC_GET_ENCRYPTION_PWSALT:
> > diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c
> > index 7de33da8b3ea..e38dc5fe2f2e 100644
> > --- a/fs/f2fs/inode.c
> > +++ b/fs/f2fs/inode.c
> > @@ -804,6 +804,19 @@ int f2fs_write_inode(struct inode *inode, struct writeback_control *wbc)
> > return 0;
> > }
> > +static void f2fs_remove_donate_inode(struct inode *inode)
> > +{
> > + struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
> > +
> > + if (list_empty(&F2FS_I(inode)->gdonate_list))
>
> It will be more safe to access gdonate_list w/ inode_lock[DONATE_INODE]?
It's unnecessary as this is called from evict_inode.
>
> Thanks,
>
> > + return;
> > +
> > + spin_lock(&sbi->inode_lock[DONATE_INODE]);
> > + list_del_init(&F2FS_I(inode)->gdonate_list);
> > + stat_dec_dirty_inode(sbi, DONATE_INODE);
> > + spin_unlock(&sbi->inode_lock[DONATE_INODE]);
> > +}
> > +
> > /*
> > * Called at the last iput() if i_nlink is zero
> > */
> > @@ -838,6 +851,7 @@ void f2fs_evict_inode(struct inode *inode)
> > f2fs_bug_on(sbi, get_dirty_pages(inode));
> > f2fs_remove_dirty_inode(inode);
> > + f2fs_remove_donate_inode(inode);
> > if (!IS_DEVICE_ALIASING(inode))
> > f2fs_destroy_extent_tree(inode);
> > diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
> > index fc7d463dee15..ef639a6d82e5 100644
> > --- a/fs/f2fs/super.c
> > +++ b/fs/f2fs/super.c
> > @@ -1441,6 +1441,7 @@ static struct inode *f2fs_alloc_inode(struct super_block *sb)
> > spin_lock_init(&fi->i_size_lock);
> > INIT_LIST_HEAD(&fi->dirty_list);
> > INIT_LIST_HEAD(&fi->gdirty_list);
> > + INIT_LIST_HEAD(&fi->gdonate_list);
> > init_f2fs_rwsem(&fi->i_gc_rwsem[READ]);
> > init_f2fs_rwsem(&fi->i_gc_rwsem[WRITE]);
> > init_f2fs_rwsem(&fi->i_xattr_sem);
> > diff --git a/include/uapi/linux/f2fs.h b/include/uapi/linux/f2fs.h
> > index f7aaf8d23e20..cd38a7c166e6 100644
> > --- a/include/uapi/linux/f2fs.h
> > +++ b/include/uapi/linux/f2fs.h
> > @@ -44,6 +44,8 @@
> > #define F2FS_IOC_COMPRESS_FILE _IO(F2FS_IOCTL_MAGIC, 24)
> > #define F2FS_IOC_START_ATOMIC_REPLACE _IO(F2FS_IOCTL_MAGIC, 25)
> > #define F2FS_IOC_GET_DEV_ALIAS_FILE _IOR(F2FS_IOCTL_MAGIC, 26, __u32)
> > +#define F2FS_IOC_DONATE_RANGE _IOW(F2FS_IOCTL_MAGIC, 27, \
> > + struct f2fs_donate_range)
> > /*
> > * should be same as XFS_IOC_GOINGDOWN.
> > @@ -97,4 +99,9 @@ struct f2fs_comp_option {
> > __u8 log_cluster_size;
> > };
> > +struct f2fs_donate_range {
> > + __u64 start;
> > + __u64 len;
> > +};
> > +
> > #endif /* _UAPI_LINUX_F2FS_H */
^ permalink raw reply [flat|nested] 12+ messages in thread* Re: [f2fs-dev] [PATCH 1/2] f2fs: register inodes which is able to donate pages
2025-01-14 17:15 ` Jaegeuk Kim
@ 2025-01-15 2:12 ` Chao Yu
0 siblings, 0 replies; 12+ messages in thread
From: Chao Yu @ 2025-01-15 2:12 UTC (permalink / raw)
To: Jaegeuk Kim; +Cc: chao, linux-kernel, linux-f2fs-devel
On 1/15/25 01:15, Jaegeuk Kim wrote:
> On 01/14, Chao Yu wrote:
>> On 1/14/25 02:39, Jaegeuk Kim via Linux-f2fs-devel wrote:
>>> This patch introduces an inode list to keep the page cache ranges that users
>>> can donate pages together.
>>>
>>> #define F2FS_IOC_DONATE_RANGE _IOW(F2FS_IOCTL_MAGIC, 27, \
>>> struct f2fs_donate_range)
>>> struct f2fs_donate_range {
>>> __u64 start;
>>> __u64 len;
>>> };
>>>
>>> e.g., ioctl(F2FS_IOC_DONATE_RANGE, &range);
>>
>> I guess we need to add documentation for all ioctls including this one, maybe
>> later? :)
>
> Yeah, later.
>
>>
>>>
>>> Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
>>> ---
>>> fs/f2fs/debug.c | 3 +++
>>> fs/f2fs/f2fs.h | 9 +++++++-
>>> fs/f2fs/file.c | 48 +++++++++++++++++++++++++++++++++++++++
>>> fs/f2fs/inode.c | 14 ++++++++++++
>>> fs/f2fs/super.c | 1 +
>>> include/uapi/linux/f2fs.h | 7 ++++++
>>> 6 files changed, 81 insertions(+), 1 deletion(-)
>>>
>>> diff --git a/fs/f2fs/debug.c b/fs/f2fs/debug.c
>>> index 468828288a4a..1b099c123670 100644
>>> --- a/fs/f2fs/debug.c
>>> +++ b/fs/f2fs/debug.c
>>> @@ -164,6 +164,7 @@ static void update_general_status(struct f2fs_sb_info *sbi)
>>> si->ndirty_imeta = get_pages(sbi, F2FS_DIRTY_IMETA);
>>> si->ndirty_dirs = sbi->ndirty_inode[DIR_INODE];
>>> si->ndirty_files = sbi->ndirty_inode[FILE_INODE];
>>> + si->ndonate_files = sbi->ndirty_inode[DONATE_INODE];
>>> si->nquota_files = sbi->nquota_files;
>>> si->ndirty_all = sbi->ndirty_inode[DIRTY_META];
>>> si->aw_cnt = atomic_read(&sbi->atomic_files);
>>> @@ -501,6 +502,8 @@ static int stat_show(struct seq_file *s, void *v)
>>> si->compr_inode, si->compr_blocks);
>>> seq_printf(s, " - Swapfile Inode: %u\n",
>>> si->swapfile_inode);
>>> + seq_printf(s, " - Donate Inode: %d\n",
>>
>> %u instead of %d due to si->ndonate_files is type of unsigned int.
>>
>>> + si->ndonate_files);
>>> seq_printf(s, " - Orphan/Append/Update Inode: %u, %u, %u\n",
>>> si->orphans, si->append, si->update);
>>> seq_printf(s, "\nMain area: %d segs, %d secs %d zones\n",
>>> diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
>>> index 4bfe162eefd3..7ce3e3eab17a 100644
>>> --- a/fs/f2fs/f2fs.h
>>> +++ b/fs/f2fs/f2fs.h
>>> @@ -850,6 +850,11 @@ struct f2fs_inode_info {
>>> #endif
>>> struct list_head dirty_list; /* dirty list for dirs and files */
>>> struct list_head gdirty_list; /* linked in global dirty list */
>>> +
>>> + /* linked in global inode list for cache donation */
>>> + struct list_head gdonate_list;
>>> + loff_t donate_start, donate_end; /* inclusive */
>>> +
>>> struct task_struct *atomic_write_task; /* store atomic write task */
>>> struct extent_tree *extent_tree[NR_EXTENT_CACHES];
>>> /* cached extent_tree entry */
>>> @@ -1274,6 +1279,7 @@ enum inode_type {
>>> DIR_INODE, /* for dirty dir inode */
>>> FILE_INODE, /* for dirty regular/symlink inode */
>>> DIRTY_META, /* for all dirtied inode metadata */
>>> + DONATE_INODE, /* for all inode to donate pages */
>>> NR_INODE_TYPE,
>>> };
>>> @@ -3984,7 +3990,8 @@ struct f2fs_stat_info {
>>> unsigned long long allocated_data_blocks;
>>> int ndirty_node, ndirty_dent, ndirty_meta, ndirty_imeta;
>>> int ndirty_data, ndirty_qdata;
>>> - unsigned int ndirty_dirs, ndirty_files, nquota_files, ndirty_all;
>>> + unsigned int ndirty_dirs, ndirty_files, ndirty_all;
>>> + unsigned int nquota_files, ndonate_files;
>>> int nats, dirty_nats, sits, dirty_sits;
>>> int free_nids, avail_nids, alloc_nids;
>>> int total_count, utilization;
>>> diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
>>> index 9980d17ef9f5..d6dea6258c2d 100644
>>> --- a/fs/f2fs/file.c
>>> +++ b/fs/f2fs/file.c
>>> @@ -2493,6 +2493,51 @@ static int f2fs_ioc_shutdown(struct file *filp, unsigned long arg)
>>> return ret;
>>> }
>>> +static int f2fs_ioc_donate_range(struct file *filp, unsigned long arg)
>>> +{
>>> + struct inode *inode = file_inode(filp);
>>> + struct mnt_idmap *idmap = file_mnt_idmap(filp);
>>> + struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
>>> + struct f2fs_donate_range range;
>>> + int ret;
>>> +
>>> + if (copy_from_user(&range, (struct f2fs_donate_range __user *)arg,
>>> + sizeof(range)))
>>> + return -EFAULT;
>>
>> What about doing sanity check on donate range here? in order to avoid overflow
>> during fi->donate_end calculation.
>>
>> F2FS_I(inode)->donate_end = range.start + range.len - 1;
>>
>>> +
>>> + if (!inode_owner_or_capable(idmap, inode))
>>> + return -EACCES;
>>> +
>>> + if (!S_ISREG(inode->i_mode))
>>> + return -EINVAL;
>>> +
>>> + ret = mnt_want_write_file(filp);
>>> + if (ret)
>>> + return ret;
>>> +
>>> + inode_lock(inode);
>>> +
>>> + if (f2fs_is_atomic_file(inode))
>>> + goto out;
>>> +
>>> + spin_lock(&sbi->inode_lock[DONATE_INODE]);
>>> + if (list_empty(&F2FS_I(inode)->gdonate_list)) {
>>> + list_add_tail(&F2FS_I(inode)->gdonate_list,
>>> + &sbi->inode_list[DONATE_INODE]);
>>> + stat_inc_dirty_inode(sbi, DONATE_INODE);
>>> + } else {
>>> + list_move_tail(&F2FS_I(inode)->gdonate_list,
>>> + &sbi->inode_list[DONATE_INODE]);
>>> + }
>>> + F2FS_I(inode)->donate_start = range.start;
>>> + F2FS_I(inode)->donate_end = range.start + range.len - 1;
>>> + spin_unlock(&sbi->inode_lock[DONATE_INODE]);
>>> +out:
>>> + inode_unlock(inode);
>>> + mnt_drop_write_file(filp);
>>> + return ret;
>>> +}
>>> +
>>> static int f2fs_ioc_fitrim(struct file *filp, unsigned long arg)
>>> {
>>> struct inode *inode = file_inode(filp);
>>> @@ -4522,6 +4567,8 @@ static long __f2fs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
>>> return -EOPNOTSUPP;
>>> case F2FS_IOC_SHUTDOWN:
>>> return f2fs_ioc_shutdown(filp, arg);
>>> + case F2FS_IOC_DONATE_RANGE:
>>> + return f2fs_ioc_donate_range(filp, arg);
>>> case FITRIM:
>>> return f2fs_ioc_fitrim(filp, arg);
>>> case FS_IOC_SET_ENCRYPTION_POLICY:
>>> @@ -5273,6 +5320,7 @@ long f2fs_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
>>> case F2FS_IOC_RELEASE_VOLATILE_WRITE:
>>> case F2FS_IOC_ABORT_ATOMIC_WRITE:
>>> case F2FS_IOC_SHUTDOWN:
>>> + case F2FS_IOC_DONATE_RANGE:
>>> case FITRIM:
>>> case FS_IOC_SET_ENCRYPTION_POLICY:
>>> case FS_IOC_GET_ENCRYPTION_PWSALT:
>>> diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c
>>> index 7de33da8b3ea..e38dc5fe2f2e 100644
>>> --- a/fs/f2fs/inode.c
>>> +++ b/fs/f2fs/inode.c
>>> @@ -804,6 +804,19 @@ int f2fs_write_inode(struct inode *inode, struct writeback_control *wbc)
>>> return 0;
>>> }
>>> +static void f2fs_remove_donate_inode(struct inode *inode)
>>> +{
>>> + struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
>>> +
>>> + if (list_empty(&F2FS_I(inode)->gdonate_list))
>>
>> It will be more safe to access gdonate_list w/ inode_lock[DONATE_INODE]?
>
> It's unnecessary as this is called from evict_inode.
I just concerned about the case fi->gdonate_list's prev and next pointer can
be updated in race condition due to insertion or deletion of its adjacent entry.
No risk now as I checked. :)
Thanks,
>
>>
>> Thanks,
>>
>>> + return;
>>> +
>>> + spin_lock(&sbi->inode_lock[DONATE_INODE]);
>>> + list_del_init(&F2FS_I(inode)->gdonate_list);
>>> + stat_dec_dirty_inode(sbi, DONATE_INODE);
>>> + spin_unlock(&sbi->inode_lock[DONATE_INODE]);
>>> +}
>>> +
>>> /*
>>> * Called at the last iput() if i_nlink is zero
>>> */
>>> @@ -838,6 +851,7 @@ void f2fs_evict_inode(struct inode *inode)
>>> f2fs_bug_on(sbi, get_dirty_pages(inode));
>>> f2fs_remove_dirty_inode(inode);
>>> + f2fs_remove_donate_inode(inode);
>>> if (!IS_DEVICE_ALIASING(inode))
>>> f2fs_destroy_extent_tree(inode);
>>> diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
>>> index fc7d463dee15..ef639a6d82e5 100644
>>> --- a/fs/f2fs/super.c
>>> +++ b/fs/f2fs/super.c
>>> @@ -1441,6 +1441,7 @@ static struct inode *f2fs_alloc_inode(struct super_block *sb)
>>> spin_lock_init(&fi->i_size_lock);
>>> INIT_LIST_HEAD(&fi->dirty_list);
>>> INIT_LIST_HEAD(&fi->gdirty_list);
>>> + INIT_LIST_HEAD(&fi->gdonate_list);
>>> init_f2fs_rwsem(&fi->i_gc_rwsem[READ]);
>>> init_f2fs_rwsem(&fi->i_gc_rwsem[WRITE]);
>>> init_f2fs_rwsem(&fi->i_xattr_sem);
>>> diff --git a/include/uapi/linux/f2fs.h b/include/uapi/linux/f2fs.h
>>> index f7aaf8d23e20..cd38a7c166e6 100644
>>> --- a/include/uapi/linux/f2fs.h
>>> +++ b/include/uapi/linux/f2fs.h
>>> @@ -44,6 +44,8 @@
>>> #define F2FS_IOC_COMPRESS_FILE _IO(F2FS_IOCTL_MAGIC, 24)
>>> #define F2FS_IOC_START_ATOMIC_REPLACE _IO(F2FS_IOCTL_MAGIC, 25)
>>> #define F2FS_IOC_GET_DEV_ALIAS_FILE _IOR(F2FS_IOCTL_MAGIC, 26, __u32)
>>> +#define F2FS_IOC_DONATE_RANGE _IOW(F2FS_IOCTL_MAGIC, 27, \
>>> + struct f2fs_donate_range)
>>> /*
>>> * should be same as XFS_IOC_GOINGDOWN.
>>> @@ -97,4 +99,9 @@ struct f2fs_comp_option {
>>> __u8 log_cluster_size;
>>> };
>>> +struct f2fs_donate_range {
>>> + __u64 start;
>>> + __u64 len;
>>> +};
>>> +
>>> #endif /* _UAPI_LINUX_F2FS_H */
^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [PATCH 1/2 v2] f2fs: register inodes which is able to donate pages
2025-01-13 18:39 [PATCH 1/2] f2fs: register inodes which is able to donate pages Jaegeuk Kim
2025-01-13 18:39 ` [PATCH 2/2] f2fs: add a sysfs entry to request donate file-backed pages Jaegeuk Kim
2025-01-14 6:34 ` [f2fs-dev] [PATCH 1/2] f2fs: register inodes which is able to donate pages Chao Yu
@ 2025-01-14 17:20 ` Jaegeuk Kim
2025-01-14 21:16 ` [PATCH 1/2] " Eric Biggers
3 siblings, 0 replies; 12+ messages in thread
From: Jaegeuk Kim @ 2025-01-14 17:20 UTC (permalink / raw)
To: linux-kernel, linux-f2fs-devel
This patch introduces an inode list to keep the page cache ranges that users
can donate pages together.
#define F2FS_IOC_DONATE_RANGE _IOW(F2FS_IOCTL_MAGIC, 27, \
struct f2fs_donate_range)
struct f2fs_donate_range {
__u64 start;
__u64 len;
};
e.g., ioctl(F2FS_IOC_DONATE_RANGE, &range);
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
Change log from v1:
- change %u print format
- check range boundary
fs/f2fs/debug.c | 3 +++
fs/f2fs/f2fs.h | 9 ++++++-
fs/f2fs/file.c | 52 +++++++++++++++++++++++++++++++++++++++
fs/f2fs/inode.c | 14 +++++++++++
fs/f2fs/super.c | 1 +
include/uapi/linux/f2fs.h | 7 ++++++
6 files changed, 85 insertions(+), 1 deletion(-)
diff --git a/fs/f2fs/debug.c b/fs/f2fs/debug.c
index 468828288a4a..f7aea4dc9565 100644
--- a/fs/f2fs/debug.c
+++ b/fs/f2fs/debug.c
@@ -164,6 +164,7 @@ static void update_general_status(struct f2fs_sb_info *sbi)
si->ndirty_imeta = get_pages(sbi, F2FS_DIRTY_IMETA);
si->ndirty_dirs = sbi->ndirty_inode[DIR_INODE];
si->ndirty_files = sbi->ndirty_inode[FILE_INODE];
+ si->ndonate_files = sbi->ndirty_inode[DONATE_INODE];
si->nquota_files = sbi->nquota_files;
si->ndirty_all = sbi->ndirty_inode[DIRTY_META];
si->aw_cnt = atomic_read(&sbi->atomic_files);
@@ -501,6 +502,8 @@ static int stat_show(struct seq_file *s, void *v)
si->compr_inode, si->compr_blocks);
seq_printf(s, " - Swapfile Inode: %u\n",
si->swapfile_inode);
+ seq_printf(s, " - Donate Inode: %u\n",
+ si->ndonate_files);
seq_printf(s, " - Orphan/Append/Update Inode: %u, %u, %u\n",
si->orphans, si->append, si->update);
seq_printf(s, "\nMain area: %d segs, %d secs %d zones\n",
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index 4bfe162eefd3..7ce3e3eab17a 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -850,6 +850,11 @@ struct f2fs_inode_info {
#endif
struct list_head dirty_list; /* dirty list for dirs and files */
struct list_head gdirty_list; /* linked in global dirty list */
+
+ /* linked in global inode list for cache donation */
+ struct list_head gdonate_list;
+ loff_t donate_start, donate_end; /* inclusive */
+
struct task_struct *atomic_write_task; /* store atomic write task */
struct extent_tree *extent_tree[NR_EXTENT_CACHES];
/* cached extent_tree entry */
@@ -1274,6 +1279,7 @@ enum inode_type {
DIR_INODE, /* for dirty dir inode */
FILE_INODE, /* for dirty regular/symlink inode */
DIRTY_META, /* for all dirtied inode metadata */
+ DONATE_INODE, /* for all inode to donate pages */
NR_INODE_TYPE,
};
@@ -3984,7 +3990,8 @@ struct f2fs_stat_info {
unsigned long long allocated_data_blocks;
int ndirty_node, ndirty_dent, ndirty_meta, ndirty_imeta;
int ndirty_data, ndirty_qdata;
- unsigned int ndirty_dirs, ndirty_files, nquota_files, ndirty_all;
+ unsigned int ndirty_dirs, ndirty_files, ndirty_all;
+ unsigned int nquota_files, ndonate_files;
int nats, dirty_nats, sits, dirty_sits;
int free_nids, avail_nids, alloc_nids;
int total_count, utilization;
diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
index 9980d17ef9f5..eb44999bb079 100644
--- a/fs/f2fs/file.c
+++ b/fs/f2fs/file.c
@@ -2493,6 +2493,55 @@ static int f2fs_ioc_shutdown(struct file *filp, unsigned long arg)
return ret;
}
+static int f2fs_ioc_donate_range(struct file *filp, unsigned long arg)
+{
+ struct inode *inode = file_inode(filp);
+ struct mnt_idmap *idmap = file_mnt_idmap(filp);
+ struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
+ struct f2fs_donate_range range;
+ int ret;
+
+ if (copy_from_user(&range, (struct f2fs_donate_range __user *)arg,
+ sizeof(range)))
+ return -EFAULT;
+
+ if (!inode_owner_or_capable(idmap, inode))
+ return -EACCES;
+
+ if (!S_ISREG(inode->i_mode))
+ return -EINVAL;
+
+ if (unlikely((range.start + range.len) >> PAGE_SHIFT >
+ max_file_blocks(inode)))
+ return -EINVAL;
+
+ ret = mnt_want_write_file(filp);
+ if (ret)
+ return ret;
+
+ inode_lock(inode);
+
+ if (f2fs_is_atomic_file(inode))
+ goto out;
+
+ spin_lock(&sbi->inode_lock[DONATE_INODE]);
+ if (list_empty(&F2FS_I(inode)->gdonate_list)) {
+ list_add_tail(&F2FS_I(inode)->gdonate_list,
+ &sbi->inode_list[DONATE_INODE]);
+ stat_inc_dirty_inode(sbi, DONATE_INODE);
+ } else {
+ list_move_tail(&F2FS_I(inode)->gdonate_list,
+ &sbi->inode_list[DONATE_INODE]);
+ }
+ F2FS_I(inode)->donate_start = range.start;
+ F2FS_I(inode)->donate_end = range.start + range.len - 1;
+ spin_unlock(&sbi->inode_lock[DONATE_INODE]);
+out:
+ inode_unlock(inode);
+ mnt_drop_write_file(filp);
+ return ret;
+}
+
static int f2fs_ioc_fitrim(struct file *filp, unsigned long arg)
{
struct inode *inode = file_inode(filp);
@@ -4522,6 +4571,8 @@ static long __f2fs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
return -EOPNOTSUPP;
case F2FS_IOC_SHUTDOWN:
return f2fs_ioc_shutdown(filp, arg);
+ case F2FS_IOC_DONATE_RANGE:
+ return f2fs_ioc_donate_range(filp, arg);
case FITRIM:
return f2fs_ioc_fitrim(filp, arg);
case FS_IOC_SET_ENCRYPTION_POLICY:
@@ -5273,6 +5324,7 @@ long f2fs_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
case F2FS_IOC_RELEASE_VOLATILE_WRITE:
case F2FS_IOC_ABORT_ATOMIC_WRITE:
case F2FS_IOC_SHUTDOWN:
+ case F2FS_IOC_DONATE_RANGE:
case FITRIM:
case FS_IOC_SET_ENCRYPTION_POLICY:
case FS_IOC_GET_ENCRYPTION_PWSALT:
diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c
index 7de33da8b3ea..e38dc5fe2f2e 100644
--- a/fs/f2fs/inode.c
+++ b/fs/f2fs/inode.c
@@ -804,6 +804,19 @@ int f2fs_write_inode(struct inode *inode, struct writeback_control *wbc)
return 0;
}
+static void f2fs_remove_donate_inode(struct inode *inode)
+{
+ struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
+
+ if (list_empty(&F2FS_I(inode)->gdonate_list))
+ return;
+
+ spin_lock(&sbi->inode_lock[DONATE_INODE]);
+ list_del_init(&F2FS_I(inode)->gdonate_list);
+ stat_dec_dirty_inode(sbi, DONATE_INODE);
+ spin_unlock(&sbi->inode_lock[DONATE_INODE]);
+}
+
/*
* Called at the last iput() if i_nlink is zero
*/
@@ -838,6 +851,7 @@ void f2fs_evict_inode(struct inode *inode)
f2fs_bug_on(sbi, get_dirty_pages(inode));
f2fs_remove_dirty_inode(inode);
+ f2fs_remove_donate_inode(inode);
if (!IS_DEVICE_ALIASING(inode))
f2fs_destroy_extent_tree(inode);
diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
index fc7d463dee15..ef639a6d82e5 100644
--- a/fs/f2fs/super.c
+++ b/fs/f2fs/super.c
@@ -1441,6 +1441,7 @@ static struct inode *f2fs_alloc_inode(struct super_block *sb)
spin_lock_init(&fi->i_size_lock);
INIT_LIST_HEAD(&fi->dirty_list);
INIT_LIST_HEAD(&fi->gdirty_list);
+ INIT_LIST_HEAD(&fi->gdonate_list);
init_f2fs_rwsem(&fi->i_gc_rwsem[READ]);
init_f2fs_rwsem(&fi->i_gc_rwsem[WRITE]);
init_f2fs_rwsem(&fi->i_xattr_sem);
diff --git a/include/uapi/linux/f2fs.h b/include/uapi/linux/f2fs.h
index f7aaf8d23e20..cd38a7c166e6 100644
--- a/include/uapi/linux/f2fs.h
+++ b/include/uapi/linux/f2fs.h
@@ -44,6 +44,8 @@
#define F2FS_IOC_COMPRESS_FILE _IO(F2FS_IOCTL_MAGIC, 24)
#define F2FS_IOC_START_ATOMIC_REPLACE _IO(F2FS_IOCTL_MAGIC, 25)
#define F2FS_IOC_GET_DEV_ALIAS_FILE _IOR(F2FS_IOCTL_MAGIC, 26, __u32)
+#define F2FS_IOC_DONATE_RANGE _IOW(F2FS_IOCTL_MAGIC, 27, \
+ struct f2fs_donate_range)
/*
* should be same as XFS_IOC_GOINGDOWN.
@@ -97,4 +99,9 @@ struct f2fs_comp_option {
__u8 log_cluster_size;
};
+struct f2fs_donate_range {
+ __u64 start;
+ __u64 len;
+};
+
#endif /* _UAPI_LINUX_F2FS_H */
--
2.47.1.688.g23fc6f90ad-goog
^ permalink raw reply related [flat|nested] 12+ messages in thread* Re: [PATCH 1/2] f2fs: register inodes which is able to donate pages
2025-01-13 18:39 [PATCH 1/2] f2fs: register inodes which is able to donate pages Jaegeuk Kim
` (2 preceding siblings ...)
2025-01-14 17:20 ` [PATCH 1/2 v2] " Jaegeuk Kim
@ 2025-01-14 21:16 ` Eric Biggers
3 siblings, 0 replies; 12+ messages in thread
From: Eric Biggers @ 2025-01-14 21:16 UTC (permalink / raw)
To: Jaegeuk Kim; +Cc: linux-kernel, linux-f2fs-devel
On Mon, Jan 13, 2025 at 06:39:32PM +0000, Jaegeuk Kim via Linux-f2fs-devel wrote:
> This patch introduces an inode list to keep the page cache ranges that users
> can donate pages together.
>
> #define F2FS_IOC_DONATE_RANGE _IOW(F2FS_IOCTL_MAGIC, 27, \
> struct f2fs_donate_range)
> struct f2fs_donate_range {
> __u64 start;
> __u64 len;
> };
>
> e.g., ioctl(F2FS_IOC_DONATE_RANGE, &range);
>
> Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
> ---
> fs/f2fs/debug.c | 3 +++
> fs/f2fs/f2fs.h | 9 +++++++-
> fs/f2fs/file.c | 48 +++++++++++++++++++++++++++++++++++++++
> fs/f2fs/inode.c | 14 ++++++++++++
> fs/f2fs/super.c | 1 +
> include/uapi/linux/f2fs.h | 7 ++++++
> 6 files changed, 81 insertions(+), 1 deletion(-)
Missing a rationale, documentation, tests, and fuzzing.
- Eric
^ permalink raw reply [flat|nested] 12+ messages in thread
* [PATCH 2/2] f2fs: add a sysfs entry to request donate file-backed pages
2025-01-31 22:27 [PATCH 0/2 v8] add ioctl/sysfs to donate file-backed pages Jaegeuk Kim
@ 2025-01-31 22:27 ` Jaegeuk Kim
2025-02-07 16:28 ` [PATCH 2/2 v2] " Jaegeuk Kim
0 siblings, 1 reply; 12+ messages in thread
From: Jaegeuk Kim @ 2025-01-31 22:27 UTC (permalink / raw)
To: linux-kernel, linux-f2fs-devel; +Cc: Jaegeuk Kim
1. ioctl(fd1, F2FS_IOC_DONATE_RANGE, {0,3});
2. ioctl(fd2, F2FS_IOC_DONATE_RANGE, {1,2});
3. ioctl(fd3, F2FS_IOC_DONATE_RANGE, {3,1});
4. echo 1024 > /sys/fs/f2fs/tuning/reclaim_caches_kb
This gives a way to reclaim file-backed pages by iterating all f2fs mounts until
reclaiming 1MB page cache ranges, registered by #1, #2, and #3.
5. cat /sys/fs/f2fs/tuning/reclaim_caches_kb
-> gives total number of registered file ranges.
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
Documentation/ABI/testing/sysfs-fs-f2fs | 7 ++
fs/f2fs/f2fs.h | 2 +
fs/f2fs/shrinker.c | 90 +++++++++++++++++++++++++
fs/f2fs/sysfs.c | 63 +++++++++++++++++
4 files changed, 162 insertions(+)
diff --git a/Documentation/ABI/testing/sysfs-fs-f2fs b/Documentation/ABI/testing/sysfs-fs-f2fs
index 3e1630c70d8a..81deae2af84d 100644
--- a/Documentation/ABI/testing/sysfs-fs-f2fs
+++ b/Documentation/ABI/testing/sysfs-fs-f2fs
@@ -828,3 +828,10 @@ Date: November 2024
Contact: "Chao Yu" <chao@kernel.org>
Description: It controls max read extent count for per-inode, the value of threshold
is 10240 by default.
+
+What: /sys/fs/f2fs/tuning/reclaim_caches_kb
+Date: February 2025
+Contact: "Jaegeuk Kim" <jaegeuk@kernel.org>
+Description: It reclaims the given KBs of file-backed pages registered by
+ ioctl(F2FS_IOC_DONATE_RANGE).
+ For example, writing N tries to drop N KBs spaces in LRU.
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index 805585a7d2b6..bd0d8138b71d 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -4241,6 +4241,8 @@ unsigned long f2fs_shrink_count(struct shrinker *shrink,
struct shrink_control *sc);
unsigned long f2fs_shrink_scan(struct shrinker *shrink,
struct shrink_control *sc);
+unsigned int f2fs_donate_files(void);
+void f2fs_reclaim_caches(unsigned int reclaim_caches_kb);
void f2fs_join_shrinker(struct f2fs_sb_info *sbi);
void f2fs_leave_shrinker(struct f2fs_sb_info *sbi);
diff --git a/fs/f2fs/shrinker.c b/fs/f2fs/shrinker.c
index 83d6fb97dcae..45efff635d8e 100644
--- a/fs/f2fs/shrinker.c
+++ b/fs/f2fs/shrinker.c
@@ -130,6 +130,96 @@ unsigned long f2fs_shrink_scan(struct shrinker *shrink,
return freed;
}
+unsigned int f2fs_donate_files(void)
+{
+ struct f2fs_sb_info *sbi;
+ struct list_head *p;
+ unsigned int donate_files = 0;
+
+ spin_lock(&f2fs_list_lock);
+ p = f2fs_list.next;
+ while (p != &f2fs_list) {
+ sbi = list_entry(p, struct f2fs_sb_info, s_list);
+
+ /* stop f2fs_put_super */
+ if (!mutex_trylock(&sbi->umount_mutex)) {
+ p = p->next;
+ continue;
+ }
+ spin_unlock(&f2fs_list_lock);
+
+ donate_files += sbi->donate_files;
+
+ spin_lock(&f2fs_list_lock);
+ p = p->next;
+ mutex_unlock(&sbi->umount_mutex);
+ }
+ spin_unlock(&f2fs_list_lock);
+
+ return donate_files;
+}
+
+static unsigned int do_reclaim_caches(struct f2fs_sb_info *sbi,
+ unsigned int reclaim_caches_kb)
+{
+ struct inode *inode;
+ struct f2fs_inode_info *fi;
+ unsigned int nfiles = sbi->donate_files;
+ pgoff_t npages = reclaim_caches_kb >> (PAGE_SHIFT - 10);
+
+ while (npages && nfiles--) {
+ pgoff_t len;
+
+ spin_lock(&sbi->inode_lock[DONATE_INODE]);
+ if (list_empty(&sbi->inode_list[DONATE_INODE])) {
+ spin_unlock(&sbi->inode_lock[DONATE_INODE]);
+ break;
+ }
+ fi = list_first_entry(&sbi->inode_list[DONATE_INODE],
+ struct f2fs_inode_info, gdonate_list);
+ list_move_tail(&fi->gdonate_list, &sbi->inode_list[DONATE_INODE]);
+ inode = igrab(&fi->vfs_inode);
+ spin_unlock(&sbi->inode_lock[DONATE_INODE]);
+
+ if (!inode)
+ continue;
+
+ len = fi->donate_end - fi->donate_start + 1;
+ npages = npages < len ? 0 : npages - len;
+ invalidate_inode_pages2_range(inode->i_mapping,
+ fi->donate_start, fi->donate_end);
+ iput(inode);
+ cond_resched();
+ }
+ return npages << (PAGE_SHIFT - 10);
+}
+
+void f2fs_reclaim_caches(unsigned int reclaim_caches_kb)
+{
+ struct f2fs_sb_info *sbi;
+ struct list_head *p;
+
+ spin_lock(&f2fs_list_lock);
+ p = f2fs_list.next;
+ while (p != &f2fs_list && reclaim_caches_kb) {
+ sbi = list_entry(p, struct f2fs_sb_info, s_list);
+
+ /* stop f2fs_put_super */
+ if (!mutex_trylock(&sbi->umount_mutex)) {
+ p = p->next;
+ continue;
+ }
+ spin_unlock(&f2fs_list_lock);
+
+ reclaim_caches_kb = do_reclaim_caches(sbi, reclaim_caches_kb);
+
+ spin_lock(&f2fs_list_lock);
+ p = p->next;
+ mutex_unlock(&sbi->umount_mutex);
+ }
+ spin_unlock(&f2fs_list_lock);
+}
+
void f2fs_join_shrinker(struct f2fs_sb_info *sbi)
{
spin_lock(&f2fs_list_lock);
diff --git a/fs/f2fs/sysfs.c b/fs/f2fs/sysfs.c
index 4bd7b17a20c8..579226a05a69 100644
--- a/fs/f2fs/sysfs.c
+++ b/fs/f2fs/sysfs.c
@@ -916,6 +916,39 @@ static struct f2fs_base_attr f2fs_base_attr_##_name = { \
.show = f2fs_feature_show, \
}
+static ssize_t f2fs_tune_show(struct f2fs_base_attr *a, char *buf)
+{
+ unsigned int res;
+
+ if (!strcmp(a->attr.name, "reclaim_caches_kb"))
+ res = f2fs_donate_files();
+
+ return sysfs_emit(buf, "%u\n", res);
+}
+
+static ssize_t f2fs_tune_store(struct f2fs_base_attr *a,
+ const char *buf, size_t count)
+{
+ unsigned long t;
+ int ret;
+
+ ret = kstrtoul(skip_spaces(buf), 0, &t);
+ if (ret)
+ return ret;
+
+ if (!strcmp(a->attr.name, "reclaim_caches_kb"))
+ f2fs_reclaim_caches(t);
+
+ return ret ? ret : count;
+}
+
+#define F2FS_TUNE_RW_ATTR(_name) \
+static struct f2fs_base_attr f2fs_base_attr_##_name = { \
+ .attr = {.name = __stringify(_name), .mode = 0644 }, \
+ .show = f2fs_tune_show, \
+ .store = f2fs_tune_store, \
+}
+
static ssize_t f2fs_sb_feature_show(struct f2fs_attr *a,
struct f2fs_sb_info *sbi, char *buf)
{
@@ -1368,6 +1401,14 @@ static struct attribute *f2fs_sb_feat_attrs[] = {
};
ATTRIBUTE_GROUPS(f2fs_sb_feat);
+F2FS_TUNE_RW_ATTR(reclaim_caches_kb);
+
+static struct attribute *f2fs_tune_attrs[] = {
+ BASE_ATTR_LIST(reclaim_caches_kb),
+ NULL,
+};
+ATTRIBUTE_GROUPS(f2fs_tune);
+
static const struct sysfs_ops f2fs_attr_ops = {
.show = f2fs_attr_show,
.store = f2fs_attr_store,
@@ -1401,6 +1442,20 @@ static struct kobject f2fs_feat = {
.kset = &f2fs_kset,
};
+static const struct sysfs_ops f2fs_tune_attr_ops = {
+ .show = f2fs_base_attr_show,
+ .store = f2fs_base_attr_store,
+};
+
+static const struct kobj_type f2fs_tune_ktype = {
+ .default_groups = f2fs_tune_groups,
+ .sysfs_ops = &f2fs_tune_attr_ops,
+};
+
+static struct kobject f2fs_tune = {
+ .kset = &f2fs_kset,
+};
+
static ssize_t f2fs_stat_attr_show(struct kobject *kobj,
struct attribute *attr, char *buf)
{
@@ -1637,6 +1692,11 @@ int __init f2fs_init_sysfs(void)
if (ret)
goto unregister_out;
+ ret = kobject_init_and_add(&f2fs_tune, &f2fs_tune_ktype,
+ NULL, "tuning");
+ if (ret)
+ goto put_feat;
+
f2fs_proc_root = proc_mkdir("fs/f2fs", NULL);
if (!f2fs_proc_root) {
ret = -ENOMEM;
@@ -1645,6 +1705,8 @@ int __init f2fs_init_sysfs(void)
return 0;
put_kobject:
+ kobject_put(&f2fs_tune);
+put_feat:
kobject_put(&f2fs_feat);
unregister_out:
kset_unregister(&f2fs_kset);
@@ -1653,6 +1715,7 @@ int __init f2fs_init_sysfs(void)
void f2fs_exit_sysfs(void)
{
+ kobject_put(&f2fs_tune);
kobject_put(&f2fs_feat);
kset_unregister(&f2fs_kset);
remove_proc_entry("fs/f2fs", NULL);
--
2.48.1.362.g079036d154-goog
^ permalink raw reply related [flat|nested] 12+ messages in thread* Re: [PATCH 2/2 v2] f2fs: add a sysfs entry to request donate file-backed pages
2025-01-31 22:27 ` [PATCH 2/2] f2fs: add a sysfs entry to request " Jaegeuk Kim
@ 2025-02-07 16:28 ` Jaegeuk Kim
0 siblings, 0 replies; 12+ messages in thread
From: Jaegeuk Kim @ 2025-02-07 16:28 UTC (permalink / raw)
To: linux-kernel, linux-f2fs-devel
1. ioctl(fd1, F2FS_IOC_DONATE_RANGE, {0,3});
2. ioctl(fd2, F2FS_IOC_DONATE_RANGE, {1,2});
3. ioctl(fd3, F2FS_IOC_DONATE_RANGE, {3,1});
4. echo 1024 > /sys/fs/f2fs/tuning/reclaim_caches_kb
This gives a way to reclaim file-backed pages by iterating all f2fs mounts until
reclaiming 1MB page cache ranges, registered by #1, #2, and #3.
5. cat /sys/fs/f2fs/tuning/reclaim_caches_kb
-> gives total number of registered file ranges.
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
From v1:
- Minor revision to clean up the flow.
Documentation/ABI/testing/sysfs-fs-f2fs | 7 ++
fs/f2fs/f2fs.h | 2 +
fs/f2fs/shrinker.c | 90 +++++++++++++++++++++++++
fs/f2fs/sysfs.c | 63 +++++++++++++++++
4 files changed, 162 insertions(+)
diff --git a/Documentation/ABI/testing/sysfs-fs-f2fs b/Documentation/ABI/testing/sysfs-fs-f2fs
index 3e1630c70d8a..81deae2af84d 100644
--- a/Documentation/ABI/testing/sysfs-fs-f2fs
+++ b/Documentation/ABI/testing/sysfs-fs-f2fs
@@ -828,3 +828,10 @@ Date: November 2024
Contact: "Chao Yu" <chao@kernel.org>
Description: It controls max read extent count for per-inode, the value of threshold
is 10240 by default.
+
+What: /sys/fs/f2fs/tuning/reclaim_caches_kb
+Date: February 2025
+Contact: "Jaegeuk Kim" <jaegeuk@kernel.org>
+Description: It reclaims the given KBs of file-backed pages registered by
+ ioctl(F2FS_IOC_DONATE_RANGE).
+ For example, writing N tries to drop N KBs spaces in LRU.
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index 805585a7d2b6..bd0d8138b71d 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -4241,6 +4241,8 @@ unsigned long f2fs_shrink_count(struct shrinker *shrink,
struct shrink_control *sc);
unsigned long f2fs_shrink_scan(struct shrinker *shrink,
struct shrink_control *sc);
+unsigned int f2fs_donate_files(void);
+void f2fs_reclaim_caches(unsigned int reclaim_caches_kb);
void f2fs_join_shrinker(struct f2fs_sb_info *sbi);
void f2fs_leave_shrinker(struct f2fs_sb_info *sbi);
diff --git a/fs/f2fs/shrinker.c b/fs/f2fs/shrinker.c
index 83d6fb97dcae..45efff635d8e 100644
--- a/fs/f2fs/shrinker.c
+++ b/fs/f2fs/shrinker.c
@@ -130,6 +130,96 @@ unsigned long f2fs_shrink_scan(struct shrinker *shrink,
return freed;
}
+unsigned int f2fs_donate_files(void)
+{
+ struct f2fs_sb_info *sbi;
+ struct list_head *p;
+ unsigned int donate_files = 0;
+
+ spin_lock(&f2fs_list_lock);
+ p = f2fs_list.next;
+ while (p != &f2fs_list) {
+ sbi = list_entry(p, struct f2fs_sb_info, s_list);
+
+ /* stop f2fs_put_super */
+ if (!mutex_trylock(&sbi->umount_mutex)) {
+ p = p->next;
+ continue;
+ }
+ spin_unlock(&f2fs_list_lock);
+
+ donate_files += sbi->donate_files;
+
+ spin_lock(&f2fs_list_lock);
+ p = p->next;
+ mutex_unlock(&sbi->umount_mutex);
+ }
+ spin_unlock(&f2fs_list_lock);
+
+ return donate_files;
+}
+
+static unsigned int do_reclaim_caches(struct f2fs_sb_info *sbi,
+ unsigned int reclaim_caches_kb)
+{
+ struct inode *inode;
+ struct f2fs_inode_info *fi;
+ unsigned int nfiles = sbi->donate_files;
+ pgoff_t npages = reclaim_caches_kb >> (PAGE_SHIFT - 10);
+
+ while (npages && nfiles--) {
+ pgoff_t len;
+
+ spin_lock(&sbi->inode_lock[DONATE_INODE]);
+ if (list_empty(&sbi->inode_list[DONATE_INODE])) {
+ spin_unlock(&sbi->inode_lock[DONATE_INODE]);
+ break;
+ }
+ fi = list_first_entry(&sbi->inode_list[DONATE_INODE],
+ struct f2fs_inode_info, gdonate_list);
+ list_move_tail(&fi->gdonate_list, &sbi->inode_list[DONATE_INODE]);
+ inode = igrab(&fi->vfs_inode);
+ spin_unlock(&sbi->inode_lock[DONATE_INODE]);
+
+ if (!inode)
+ continue;
+
+ len = fi->donate_end - fi->donate_start + 1;
+ npages = npages < len ? 0 : npages - len;
+ invalidate_inode_pages2_range(inode->i_mapping,
+ fi->donate_start, fi->donate_end);
+ iput(inode);
+ cond_resched();
+ }
+ return npages << (PAGE_SHIFT - 10);
+}
+
+void f2fs_reclaim_caches(unsigned int reclaim_caches_kb)
+{
+ struct f2fs_sb_info *sbi;
+ struct list_head *p;
+
+ spin_lock(&f2fs_list_lock);
+ p = f2fs_list.next;
+ while (p != &f2fs_list && reclaim_caches_kb) {
+ sbi = list_entry(p, struct f2fs_sb_info, s_list);
+
+ /* stop f2fs_put_super */
+ if (!mutex_trylock(&sbi->umount_mutex)) {
+ p = p->next;
+ continue;
+ }
+ spin_unlock(&f2fs_list_lock);
+
+ reclaim_caches_kb = do_reclaim_caches(sbi, reclaim_caches_kb);
+
+ spin_lock(&f2fs_list_lock);
+ p = p->next;
+ mutex_unlock(&sbi->umount_mutex);
+ }
+ spin_unlock(&f2fs_list_lock);
+}
+
void f2fs_join_shrinker(struct f2fs_sb_info *sbi)
{
spin_lock(&f2fs_list_lock);
diff --git a/fs/f2fs/sysfs.c b/fs/f2fs/sysfs.c
index b419555e1ea7..b27336acf519 100644
--- a/fs/f2fs/sysfs.c
+++ b/fs/f2fs/sysfs.c
@@ -916,6 +916,39 @@ static struct f2fs_base_attr f2fs_base_attr_##_name = { \
.show = f2fs_feature_show, \
}
+static ssize_t f2fs_tune_show(struct f2fs_base_attr *a, char *buf)
+{
+ unsigned int res = 0;
+
+ if (!strcmp(a->attr.name, "reclaim_caches_kb"))
+ res = f2fs_donate_files();
+
+ return sysfs_emit(buf, "%u\n", res);
+}
+
+static ssize_t f2fs_tune_store(struct f2fs_base_attr *a,
+ const char *buf, size_t count)
+{
+ unsigned long t;
+ int ret;
+
+ ret = kstrtoul(skip_spaces(buf), 0, &t);
+ if (ret)
+ return ret;
+
+ if (!strcmp(a->attr.name, "reclaim_caches_kb"))
+ f2fs_reclaim_caches(t);
+
+ return count;
+}
+
+#define F2FS_TUNE_RW_ATTR(_name) \
+static struct f2fs_base_attr f2fs_base_attr_##_name = { \
+ .attr = {.name = __stringify(_name), .mode = 0644 }, \
+ .show = f2fs_tune_show, \
+ .store = f2fs_tune_store, \
+}
+
static ssize_t f2fs_sb_feature_show(struct f2fs_attr *a,
struct f2fs_sb_info *sbi, char *buf)
{
@@ -1368,6 +1401,14 @@ static struct attribute *f2fs_sb_feat_attrs[] = {
};
ATTRIBUTE_GROUPS(f2fs_sb_feat);
+F2FS_TUNE_RW_ATTR(reclaim_caches_kb);
+
+static struct attribute *f2fs_tune_attrs[] = {
+ BASE_ATTR_LIST(reclaim_caches_kb),
+ NULL,
+};
+ATTRIBUTE_GROUPS(f2fs_tune);
+
static const struct sysfs_ops f2fs_attr_ops = {
.show = f2fs_attr_show,
.store = f2fs_attr_store,
@@ -1401,6 +1442,20 @@ static struct kobject f2fs_feat = {
.kset = &f2fs_kset,
};
+static const struct sysfs_ops f2fs_tune_attr_ops = {
+ .show = f2fs_base_attr_show,
+ .store = f2fs_base_attr_store,
+};
+
+static const struct kobj_type f2fs_tune_ktype = {
+ .default_groups = f2fs_tune_groups,
+ .sysfs_ops = &f2fs_tune_attr_ops,
+};
+
+static struct kobject f2fs_tune = {
+ .kset = &f2fs_kset,
+};
+
static ssize_t f2fs_stat_attr_show(struct kobject *kobj,
struct attribute *attr, char *buf)
{
@@ -1637,6 +1692,11 @@ int __init f2fs_init_sysfs(void)
if (ret)
goto put_kobject;
+ ret = kobject_init_and_add(&f2fs_tune, &f2fs_tune_ktype,
+ NULL, "tuning");
+ if (ret)
+ goto put_kobject;
+
f2fs_proc_root = proc_mkdir("fs/f2fs", NULL);
if (!f2fs_proc_root) {
ret = -ENOMEM;
@@ -1644,7 +1704,9 @@ int __init f2fs_init_sysfs(void)
}
return 0;
+
put_kobject:
+ kobject_put(&f2fs_tune);
kobject_put(&f2fs_feat);
kset_unregister(&f2fs_kset);
return ret;
@@ -1652,6 +1714,7 @@ int __init f2fs_init_sysfs(void)
void f2fs_exit_sysfs(void)
{
+ kobject_put(&f2fs_tune);
kobject_put(&f2fs_feat);
kset_unregister(&f2fs_kset);
remove_proc_entry("fs/f2fs", NULL);
--
2.48.1.502.g6dc24dfdaf-goog
^ permalink raw reply related [flat|nested] 12+ messages in thread