From: "Darrick J. Wong" <djwong@kernel.org>
To: Joanne Koong <joannelkoong@gmail.com>
Cc: linux-fsdevel@vger.kernel.org, hch@lst.de, miklos@szeredi.hu,
brauner@kernel.org, anuj20.g@samsung.com, kernel-team@meta.com
Subject: Re: [PATCH v4 2/5] fuse: use iomap for writeback
Date: Fri, 11 Jul 2025 21:41:56 -0700 [thread overview]
Message-ID: <20250712044156.GH2672029@frogsfrogsfrogs> (raw)
In-Reply-To: <20250709221023.2252033-3-joannelkoong@gmail.com>
On Wed, Jul 09, 2025 at 03:10:20PM -0700, Joanne Koong wrote:
> Use iomap for dirty folio writeback in ->writepages().
> This allows for granular dirty writeback of large folios.
>
> Only the dirty portions of the large folio will be written instead of
> having to write out the entire folio. For example if there is a 1 MB
> large folio and only 2 bytes in it are dirty, only the page for those
> dirty bytes will be written out.
>
> .dirty_folio needs to be set to iomap_dirty_folio so that the bitmap
> iomap uses for dirty tracking correctly reflects dirty regions that need
> to be written back.
>
> Signed-off-by: Joanne Koong <joannelkoong@gmail.com>
> ---
> fs/fuse/file.c | 127 +++++++++++++++++++++++++++++--------------------
> 1 file changed, 76 insertions(+), 51 deletions(-)
>
> diff --git a/fs/fuse/file.c b/fs/fuse/file.c
> index cadad61ef7df..70bbc8f26459 100644
> --- a/fs/fuse/file.c
> +++ b/fs/fuse/file.c
> @@ -1832,7 +1832,7 @@ static void fuse_writepage_finish(struct fuse_writepage_args *wpa)
> * scope of the fi->lock alleviates xarray lock
> * contention and noticeably improves performance.
> */
> - folio_end_writeback(ap->folios[i]);
> + iomap_finish_folio_write(inode, ap->folios[i], 1);
> dec_wb_stat(&bdi->wb, WB_WRITEBACK);
> wb_writeout_inc(&bdi->wb);
> }
> @@ -2019,19 +2019,20 @@ static void fuse_writepage_add_to_bucket(struct fuse_conn *fc,
> }
>
> static void fuse_writepage_args_page_fill(struct fuse_writepage_args *wpa, struct folio *folio,
> - uint32_t folio_index)
> + uint32_t folio_index, loff_t offset, unsigned len)
> {
> struct inode *inode = folio->mapping->host;
> struct fuse_args_pages *ap = &wpa->ia.ap;
>
> ap->folios[folio_index] = folio;
> - ap->descs[folio_index].offset = 0;
> - ap->descs[folio_index].length = folio_size(folio);
> + ap->descs[folio_index].offset = offset;
> + ap->descs[folio_index].length = len;
>
> inc_wb_stat(&inode_to_bdi(inode)->wb, WB_WRITEBACK);
> }
>
> static struct fuse_writepage_args *fuse_writepage_args_setup(struct folio *folio,
> + size_t offset,
> struct fuse_file *ff)
> {
> struct inode *inode = folio->mapping->host;
> @@ -2044,7 +2045,7 @@ static struct fuse_writepage_args *fuse_writepage_args_setup(struct folio *folio
> return NULL;
>
> fuse_writepage_add_to_bucket(fc, wpa);
> - fuse_write_args_fill(&wpa->ia, ff, folio_pos(folio), 0);
> + fuse_write_args_fill(&wpa->ia, ff, folio_pos(folio) + offset, 0);
> wpa->ia.write.in.write_flags |= FUSE_WRITE_CACHE;
> wpa->inode = inode;
> wpa->ia.ff = ff;
> @@ -2070,7 +2071,7 @@ static int fuse_writepage_locked(struct folio *folio)
> if (!ff)
> goto err;
>
> - wpa = fuse_writepage_args_setup(folio, ff);
> + wpa = fuse_writepage_args_setup(folio, 0, ff);
> error = -ENOMEM;
> if (!wpa)
> goto err_writepage_args;
> @@ -2079,7 +2080,7 @@ static int fuse_writepage_locked(struct folio *folio)
> ap->num_folios = 1;
>
> folio_start_writeback(folio);
> - fuse_writepage_args_page_fill(wpa, folio, 0);
> + fuse_writepage_args_page_fill(wpa, folio, 0, 0, folio_size(folio));
>
> spin_lock(&fi->lock);
> list_add_tail(&wpa->queue_entry, &fi->queued_writes);
> @@ -2100,7 +2101,7 @@ struct fuse_fill_wb_data {
> struct fuse_file *ff;
> struct inode *inode;
> unsigned int max_folios;
> - unsigned int nr_pages;
> + unsigned int nr_bytes;
I don't know if fuse servers are ever realistically going to end up with
a large number of 1M folios, but at least in theory iomap is capable of
queuing ~4096 folios into a single writeback context. Does this need to
account for that?
> };
>
> static bool fuse_pages_realloc(struct fuse_fill_wb_data *data)
> @@ -2141,22 +2142,29 @@ static void fuse_writepages_send(struct fuse_fill_wb_data *data)
> spin_unlock(&fi->lock);
> }
>
> -static bool fuse_writepage_need_send(struct fuse_conn *fc, struct folio *folio,
> - struct fuse_args_pages *ap,
> +static bool fuse_writepage_need_send(struct fuse_conn *fc, loff_t pos,
> + unsigned len, struct fuse_args_pages *ap,
> struct fuse_fill_wb_data *data)
> {
> + struct folio *prev_folio;
> + struct fuse_folio_desc prev_desc;
> + loff_t prev_pos;
> +
> WARN_ON(!ap->num_folios);
>
> /* Reached max pages */
> - if (data->nr_pages + folio_nr_pages(folio) > fc->max_pages)
> + if ((data->nr_bytes + len) / PAGE_SIZE > fc->max_pages)
>> PAGE_SHIFT ?
Otherwise this looks decent to me.
--D
> return true;
>
> /* Reached max write bytes */
> - if ((data->nr_pages * PAGE_SIZE) + folio_size(folio) > fc->max_write)
> + if (data->nr_bytes + len > fc->max_write)
> return true;
>
> /* Discontinuity */
> - if (folio_next_index(ap->folios[ap->num_folios - 1]) != folio->index)
> + prev_folio = ap->folios[ap->num_folios - 1];
> + prev_desc = ap->descs[ap->num_folios - 1];
> + prev_pos = folio_pos(prev_folio) + prev_desc.offset + prev_desc.length;
> + if (prev_pos != pos)
> return true;
>
> /* Need to grow the pages array? If so, did the expansion fail? */
> @@ -2166,85 +2174,102 @@ static bool fuse_writepage_need_send(struct fuse_conn *fc, struct folio *folio,
> return false;
> }
>
> -static int fuse_writepages_fill(struct folio *folio,
> - struct writeback_control *wbc, void *_data)
> +static ssize_t fuse_iomap_writeback_range(struct iomap_writepage_ctx *wpc,
> + struct folio *folio, u64 pos,
> + unsigned len, u64 end_pos)
> {
> - struct fuse_fill_wb_data *data = _data;
> + struct fuse_fill_wb_data *data = wpc->wb_ctx;
> struct fuse_writepage_args *wpa = data->wpa;
> struct fuse_args_pages *ap = &wpa->ia.ap;
> struct inode *inode = data->inode;
> struct fuse_inode *fi = get_fuse_inode(inode);
> struct fuse_conn *fc = get_fuse_conn(inode);
> - int err;
> + loff_t offset = offset_in_folio(folio, pos);
> +
> + WARN_ON_ONCE(!data);
> + /* len will always be page aligned */
> + WARN_ON_ONCE(len & (PAGE_SIZE - 1));
>
> if (!data->ff) {
> - err = -EIO;
> data->ff = fuse_write_file_get(fi);
> if (!data->ff)
> - goto out_unlock;
> + return -EIO;
> }
>
> - if (wpa && fuse_writepage_need_send(fc, folio, ap, data)) {
> + if (wpa && fuse_writepage_need_send(fc, pos, len, ap, data)) {
> fuse_writepages_send(data);
> data->wpa = NULL;
> - data->nr_pages = 0;
> + data->nr_bytes = 0;
> }
>
> if (data->wpa == NULL) {
> - err = -ENOMEM;
> - wpa = fuse_writepage_args_setup(folio, data->ff);
> + wpa = fuse_writepage_args_setup(folio, offset, data->ff);
> if (!wpa)
> - goto out_unlock;
> + return -ENOMEM;
> fuse_file_get(wpa->ia.ff);
> data->max_folios = 1;
> ap = &wpa->ia.ap;
> }
> - folio_start_writeback(folio);
>
> - fuse_writepage_args_page_fill(wpa, folio, ap->num_folios);
> - data->nr_pages += folio_nr_pages(folio);
> + iomap_start_folio_write(inode, folio, 1);
> + fuse_writepage_args_page_fill(wpa, folio, ap->num_folios,
> + offset, len);
> + data->nr_bytes += len;
>
> - err = 0;
> ap->num_folios++;
> if (!data->wpa)
> data->wpa = wpa;
> -out_unlock:
> - folio_unlock(folio);
>
> - return err;
> + return len;
> +}
> +
> +static int fuse_iomap_writeback_submit(struct iomap_writepage_ctx *wpc,
> + int error)
> +{
> + struct fuse_fill_wb_data *data = wpc->wb_ctx;
> +
> + WARN_ON_ONCE(!data);
> +
> + if (data->wpa) {
> + WARN_ON(!data->wpa->ia.ap.num_folios);
> + fuse_writepages_send(data);
> + }
> +
> + if (data->ff)
> + fuse_file_put(data->ff, false);
> +
> + return error;
> }
>
> +static const struct iomap_writeback_ops fuse_writeback_ops = {
> + .writeback_range = fuse_iomap_writeback_range,
> + .writeback_submit = fuse_iomap_writeback_submit,
> +};
> +
> static int fuse_writepages(struct address_space *mapping,
> struct writeback_control *wbc)
> {
> struct inode *inode = mapping->host;
> struct fuse_conn *fc = get_fuse_conn(inode);
> - struct fuse_fill_wb_data data;
> - int err;
> + struct fuse_fill_wb_data data = {
> + .inode = inode,
> + };
> + struct iomap_writepage_ctx wpc = {
> + .inode = inode,
> + .iomap.type = IOMAP_MAPPED,
> + .wbc = wbc,
> + .ops = &fuse_writeback_ops,
> + .wb_ctx = &data,
> + };
>
> - err = -EIO;
> if (fuse_is_bad(inode))
> - goto out;
> + return -EIO;
>
> if (wbc->sync_mode == WB_SYNC_NONE &&
> fc->num_background >= fc->congestion_threshold)
> return 0;
>
> - data.inode = inode;
> - data.wpa = NULL;
> - data.ff = NULL;
> - data.nr_pages = 0;
> -
> - err = write_cache_pages(mapping, wbc, fuse_writepages_fill, &data);
> - if (data.wpa) {
> - WARN_ON(!data.wpa->ia.ap.num_folios);
> - fuse_writepages_send(&data);
> - }
> - if (data.ff)
> - fuse_file_put(data.ff, false);
> -
> -out:
> - return err;
> + return iomap_writepages(&wpc);
> }
>
> static int fuse_launder_folio(struct folio *folio)
> @@ -3104,7 +3129,7 @@ static const struct address_space_operations fuse_file_aops = {
> .readahead = fuse_readahead,
> .writepages = fuse_writepages,
> .launder_folio = fuse_launder_folio,
> - .dirty_folio = filemap_dirty_folio,
> + .dirty_folio = iomap_dirty_folio,
> .release_folio = iomap_release_folio,
> .migrate_folio = filemap_migrate_folio,
> .bmap = fuse_bmap,
> --
> 2.47.1
>
>
next prev parent reply other threads:[~2025-07-12 4:41 UTC|newest]
Thread overview: 11+ messages / expand[flat|nested] mbox.gz Atom feed top
2025-07-09 22:10 [PATCH v4 0/5] fuse: use iomap for buffered writes + writeback Joanne Koong
2025-07-09 22:10 ` [PATCH v4 1/5] fuse: use iomap for buffered writes Joanne Koong
2025-07-12 4:46 ` Darrick J. Wong
2025-07-12 6:13 ` Amir Goldstein
2025-07-14 11:40 ` Christoph Hellwig
2025-07-09 22:10 ` [PATCH v4 2/5] fuse: use iomap for writeback Joanne Koong
2025-07-12 4:41 ` Darrick J. Wong [this message]
2025-07-14 21:43 ` Joanne Koong
2025-07-09 22:10 ` [PATCH v4 3/5] fuse: use iomap for folio laundering Joanne Koong
2025-07-09 22:10 ` [PATCH v4 4/5] fuse: hook into iomap for invalidating and checking partial uptodateness Joanne Koong
2025-07-09 22:10 ` [PATCH v4 5/5] fuse: refactor writeback to use iomap_writepage_ctx inode Joanne Koong
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20250712044156.GH2672029@frogsfrogsfrogs \
--to=djwong@kernel.org \
--cc=anuj20.g@samsung.com \
--cc=brauner@kernel.org \
--cc=hch@lst.de \
--cc=joannelkoong@gmail.com \
--cc=kernel-team@meta.com \
--cc=linux-fsdevel@vger.kernel.org \
--cc=miklos@szeredi.hu \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).