From: David Howells <dhowells@redhat.com>
To: Linus Torvalds <torvalds@linux-foundation.org>,
Steve French <smfrench@gmail.com>
Cc: dhowells@redhat.com, Vishal Moola <vishal.moola@gmail.com>,
Shyam Prasad N <nspmangalore@gmail.com>,
Rohith Surabattula <rohiths.msft@gmail.com>,
Tom Talpey <tom@talpey.com>, Stefan Metzmacher <metze@samba.org>,
Paulo Alcantara <pc@cjr.nz>, Jeff Layton <jlayton@kernel.org>,
Matthew Wilcox <willy@infradead.org>,
Marc Dionne <marc.dionne@auristor.com>,
linux-afs@lists.infradead.org, linux-cifs@vger.kernel.org,
linux-fsdevel@vger.kernel.org, linux-kernel@vger.kernel.org
Subject: cifs test patch to convert to using write_cache_pages()
Date: Thu, 02 Mar 2023 23:36:49 +0000 [thread overview]
Message-ID: <522331.1677800209@warthog.procyon.org.uk> (raw)
In-Reply-To: <20230302231638.521280-1-dhowells@redhat.com>
David Howells <dhowells@redhat.com> wrote:
> And then CIFS. ...
>
> Base + write_cache_pages():
> WRITE: bw=457MiB/s (479MB/s), 114MiB/s-114MiB/s (120MB/s-120MB/s)
> WRITE: bw=449MiB/s (471MB/s), 112MiB/s-113MiB/s (118MB/s-118MB/s)
> WRITE: bw=459MiB/s (482MB/s), 115MiB/s-115MiB/s (120MB/s-121MB/s)
Here's my patch to convert cifs to use write_cache_pages().
David
---
fs/cifs/file.c | 400 ++++++++++++++++-----------------------------------------
1 file changed, 115 insertions(+), 285 deletions(-)
diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index 3d304d4a54d6..04e2466609d9 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -2613,140 +2613,35 @@ static int cifs_partialpagewrite(struct page *page, unsigned from, unsigned to)
return rc;
}
-/*
- * Extend the region to be written back to include subsequent contiguously
- * dirty pages if possible, but don't sleep while doing so.
- */
-static void cifs_extend_writeback(struct address_space *mapping,
- long *_count,
- loff_t start,
- int max_pages,
- size_t max_len,
- unsigned int *_len)
-{
- struct folio_batch batch;
- struct folio *folio;
- unsigned int psize, nr_pages;
- size_t len = *_len;
- pgoff_t index = (start + len) / PAGE_SIZE;
- bool stop = true;
- unsigned int i;
- XA_STATE(xas, &mapping->i_pages, index);
-
- folio_batch_init(&batch);
-
- do {
- /* Firstly, we gather up a batch of contiguous dirty pages
- * under the RCU read lock - but we can't clear the dirty flags
- * there if any of those pages are mapped.
- */
- rcu_read_lock();
-
- xas_for_each(&xas, folio, ULONG_MAX) {
- stop = true;
- if (xas_retry(&xas, folio))
- continue;
- if (xa_is_value(folio))
- break;
- if (folio_index(folio) != index)
- break;
- if (!folio_try_get_rcu(folio)) {
- xas_reset(&xas);
- continue;
- }
- nr_pages = folio_nr_pages(folio);
- if (nr_pages > max_pages)
- break;
-
- /* Has the page moved or been split? */
- if (unlikely(folio != xas_reload(&xas))) {
- folio_put(folio);
- break;
- }
-
- if (!folio_trylock(folio)) {
- folio_put(folio);
- break;
- }
- if (!folio_test_dirty(folio) || folio_test_writeback(folio)) {
- folio_unlock(folio);
- folio_put(folio);
- break;
- }
-
- psize = folio_size(folio);
- len += psize;
- stop = false;
- if (max_pages <= 0 || len >= max_len || *_count <= 0)
- stop = true;
-
- index += nr_pages;
- if (!folio_batch_add(&batch, folio))
- break;
- if (stop)
- break;
- }
-
- if (!stop)
- xas_pause(&xas);
- rcu_read_unlock();
-
- /* Now, if we obtained any pages, we can shift them to being
- * writable and mark them for caching.
- */
- if (!folio_batch_count(&batch))
- break;
-
- for (i = 0; i < folio_batch_count(&batch); i++) {
- folio = batch.folios[i];
- /* The folio should be locked, dirty and not undergoing
- * writeback from the loop above.
- */
- if (!folio_clear_dirty_for_io(folio))
- WARN_ON(1);
- if (folio_start_writeback(folio))
- WARN_ON(1);
-
- *_count -= folio_nr_pages(folio);
- folio_unlock(folio);
- }
-
- folio_batch_release(&batch);
- cond_resched();
- } while (!stop);
-
- *_len = len;
-}
+struct cifs_writepages_context {
+ struct cifs_writedata *wdata;
+ struct TCP_Server_Info *server;
+ struct cifs_credits credits;
+ unsigned long long start;
+ size_t len;
+ size_t wsize;
+ unsigned int xid;
+ bool begun;
+ bool caching;
+};
/*
- * Write back the locked page and any subsequent non-locked dirty pages.
+ * Set up a writeback op.
*/
-static ssize_t cifs_write_back_from_locked_folio(struct address_space *mapping,
- struct writeback_control *wbc,
- struct folio *folio,
- loff_t start, loff_t end)
+static int cifs_writeback_begin(struct address_space *mapping,
+ struct writeback_control *wbc,
+ unsigned long long start,
+ struct cifs_writepages_context *ctx)
{
struct inode *inode = mapping->host;
struct TCP_Server_Info *server;
struct cifs_writedata *wdata;
struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
- struct cifs_credits credits_on_stack;
- struct cifs_credits *credits = &credits_on_stack;
struct cifsFileInfo *cfile = NULL;
- unsigned int xid, wsize, len;
- loff_t i_size = i_size_read(inode);
- size_t max_len;
- long count = wbc->nr_to_write;
+ unsigned int wsize, len;
int rc;
- /* The folio should be locked, dirty and not undergoing writeback. */
- if (folio_start_writeback(folio))
- WARN_ON(1);
-
- count -= folio_nr_pages(folio);
- len = folio_size(folio);
-
- xid = get_xid();
+ ctx->xid = get_xid();
server = cifs_pick_channel(cifs_sb_master_tcon(cifs_sb)->ses);
rc = cifs_get_writable_file(CIFS_I(inode), FIND_WR_ANY, &cfile);
@@ -2756,7 +2651,7 @@ static ssize_t cifs_write_back_from_locked_folio(struct address_space *mapping,
}
rc = server->ops->wait_mtu_credits(server, cifs_sb->ctx->wsize,
- &wsize, credits);
+ &wsize, &ctx->credits);
if (rc != 0)
goto err_close;
@@ -2767,56 +2662,60 @@ static ssize_t cifs_write_back_from_locked_folio(struct address_space *mapping,
}
wdata->sync_mode = wbc->sync_mode;
- wdata->offset = folio_pos(folio);
+ wdata->offset = start;
wdata->pid = cfile->pid;
- wdata->credits = credits_on_stack;
+ wdata->credits = ctx->credits;
wdata->cfile = cfile;
wdata->server = server;
- cfile = NULL;
-
- /* Find all consecutive lockable dirty pages, stopping when we find a
- * page that is not immediately lockable, is not dirty or is missing,
- * or we reach the end of the range.
- */
- if (start < i_size) {
- /* Trim the write to the EOF; the extra data is ignored. Also
- * put an upper limit on the size of a single storedata op.
- */
- max_len = wsize;
- max_len = min_t(unsigned long long, max_len, end - start + 1);
- max_len = min_t(unsigned long long, max_len, i_size - start);
-
- if (len < max_len) {
- int max_pages = INT_MAX;
-
-#ifdef CONFIG_CIFS_SMB_DIRECT
- if (server->smbd_conn)
- max_pages = server->smbd_conn->max_frmr_depth;
-#endif
- max_pages -= folio_nr_pages(folio);
+ ctx->wsize = wsize;
+ ctx->server = server;
+ ctx->wdata = wdata;
+ ctx->begun = true;
+ return 0;
- if (max_pages > 0)
- cifs_extend_writeback(mapping, &count, start,
- max_pages, max_len, &len);
- }
- len = min_t(loff_t, len, max_len);
+err_uncredit:
+ add_credits_and_wake_if(server, &ctx->credits, 0);
+err_close:
+ if (cfile)
+ cifsFileInfo_put(cfile);
+err_xid:
+ free_xid(ctx->xid);
+ if (is_retryable_error(rc)) {
+ cifs_pages_write_redirty(inode, start, len);
+ } else {
+ cifs_pages_write_failed(inode, start, len);
+ mapping_set_error(mapping, rc);
}
+ /* Indication to update ctime and mtime as close is deferred */
+ set_bit(CIFS_INO_MODIFIED_ATTR, &CIFS_I(inode)->flags);
+ return rc;
+}
- wdata->bytes = len;
+/*
+ * Flush a block of pages to the server and the cache.
+ */
+static int cifs_writepages_submit(struct address_space *mapping,
+ struct writeback_control *wbc,
+ struct cifs_writepages_context *ctx)
+{
+ struct TCP_Server_Info *server = ctx->server;
+ struct cifs_writedata *wdata = ctx->wdata;
+ unsigned long long i_size = i_size_read(mapping->host);
+ int rc;
- /* We now have a contiguous set of dirty pages, each with writeback
- * set; the first page is still locked at this point, but all the rest
- * have been unlocked.
+ /* We now have a contiguous set of dirty pages, each with
+ * writeback set.
*/
- folio_unlock(folio);
-
- if (start < i_size) {
- iov_iter_xarray(&wdata->iter, ITER_SOURCE, &mapping->i_pages,
- start, len);
+ if (ctx->start < i_size) {
+ if (ctx->len > i_size - ctx->start)
+ ctx->len = i_size - ctx->start;
+ wdata->bytes = ctx->len;
+ iov_iter_xarray(&wdata->iter, ITER_SOURCE,
+ &mapping->i_pages, ctx->start, wdata->bytes);
rc = adjust_credits(wdata->server, &wdata->credits, wdata->bytes);
if (rc)
- goto err_wdata;
+ goto err;
if (wdata->cfile->invalidHandle)
rc = -EAGAIN;
@@ -2827,133 +2726,79 @@ static ssize_t cifs_write_back_from_locked_folio(struct address_space *mapping,
kref_put(&wdata->refcount, cifs_writedata_release);
goto err_close;
}
+
} else {
/* The dirty region was entirely beyond the EOF. */
- cifs_pages_written_back(inode, start, len);
+ cifs_pages_written_back(mapping->host, ctx->start, ctx->len);
rc = 0;
}
-err_wdata:
+err:
kref_put(&wdata->refcount, cifs_writedata_release);
-err_uncredit:
- add_credits_and_wake_if(server, credits, 0);
+ add_credits_and_wake_if(server, &ctx->credits, 0);
err_close:
- if (cfile)
- cifsFileInfo_put(cfile);
-err_xid:
- free_xid(xid);
+ free_xid(ctx->xid);
if (rc == 0) {
- wbc->nr_to_write = count;
- rc = len;
+ rc = 0;
} else if (is_retryable_error(rc)) {
- cifs_pages_write_redirty(inode, start, len);
+ cifs_pages_write_redirty(mapping->host, ctx->start, ctx->len);
} else {
- cifs_pages_write_failed(inode, start, len);
+ cifs_pages_write_failed(mapping->host, ctx->start, ctx->len);
mapping_set_error(mapping, rc);
}
+
/* Indication to update ctime and mtime as close is deferred */
- set_bit(CIFS_INO_MODIFIED_ATTR, &CIFS_I(inode)->flags);
+ set_bit(CIFS_INO_MODIFIED_ATTR, &CIFS_I(mapping->host)->flags);
+ ctx->wdata = NULL;
+ ctx->begun = false;
return rc;
}
/*
- * write a region of pages back to the server
+ * Add a page to the set and flush when large enough.
*/
-static int cifs_writepages_region(struct address_space *mapping,
- struct writeback_control *wbc,
- loff_t start, loff_t end, loff_t *_next)
+static int cifs_writepages_add_folio(struct folio *folio,
+ struct writeback_control *wbc, void *data)
{
- struct folio_batch fbatch;
- int skips = 0;
-
- folio_batch_init(&fbatch);
- do {
- int nr;
- pgoff_t index = start / PAGE_SIZE;
-
- nr = filemap_get_folios_tag(mapping, &index, end / PAGE_SIZE,
- PAGECACHE_TAG_DIRTY, &fbatch);
- if (!nr)
- break;
-
- for (int i = 0; i < nr; i++) {
- ssize_t ret;
- struct folio *folio = fbatch.folios[i];
-
-redo_folio:
- start = folio_pos(folio); /* May regress with THPs */
-
- /* At this point we hold neither the i_pages lock nor the
- * page lock: the page may be truncated or invalidated
- * (changing page->mapping to NULL), or even swizzled
- * back from swapper_space to tmpfs file mapping
- */
- if (wbc->sync_mode != WB_SYNC_NONE) {
- ret = folio_lock_killable(folio);
- if (ret < 0)
- goto write_error;
- } else {
- if (!folio_trylock(folio))
- goto skip_write;
- }
-
- if (folio_mapping(folio) != mapping ||
- !folio_test_dirty(folio)) {
- start += folio_size(folio);
- folio_unlock(folio);
- continue;
- }
-
- if (folio_test_writeback(folio) ||
- folio_test_fscache(folio)) {
- folio_unlock(folio);
- if (wbc->sync_mode == WB_SYNC_NONE)
- goto skip_write;
-
- folio_wait_writeback(folio);
-#ifdef CONFIG_CIFS_FSCACHE
- folio_wait_fscache(folio);
-#endif
- goto redo_folio;
- }
-
- if (!folio_clear_dirty_for_io(folio))
- /* We hold the page lock - it should've been dirty. */
- WARN_ON(1);
+ struct cifs_writepages_context *ctx = data;
+ unsigned long long i_size = i_size_read(folio->mapping->host);
+ unsigned long long pos = folio_pos(folio);
+ size_t size = folio_size(folio);
+ int ret;
- ret = cifs_write_back_from_locked_folio(mapping, wbc, folio, start, end);
- if (ret < 0)
- goto write_error;
+ if (pos < i_size && size > i_size - pos)
+ size = i_size - pos;
- start += ret;
- continue;
-
-write_error:
- folio_batch_release(&fbatch);
- *_next = start;
+ if (ctx->begun) {
+ if (pos == ctx->start + ctx->len &&
+ ctx->len + size <= ctx->wsize)
+ goto add;
+ ret = cifs_writepages_submit(folio->mapping, wbc, ctx);
+ if (ret < 0) {
+ ctx->begun = false;
return ret;
+ }
+ }
-skip_write:
- /*
- * Too many skipped writes, or need to reschedule?
- * Treat it as a write error without an error code.
- */
- if (skips >= 5 || need_resched()) {
- ret = 0;
- goto write_error;
- }
+ ret = cifs_writeback_begin(folio->mapping, wbc, pos, ctx);
+ if (ret < 0)
+ return ret;
- /* Otherwise, just skip that folio and go on to the next */
- skips++;
- start += folio_size(folio);
- continue;
- }
+ ctx->start = folio_pos(folio);
+ ctx->len = 0;
+add:
+ ctx->len += folio_size(folio);
- folio_batch_release(&fbatch);
- cond_resched();
- } while (wbc->nr_to_write > 0);
+ folio_wait_fscache(folio);
+ folio_start_writeback(folio);
+ folio_unlock(folio);
- *_next = start;
+ if (ctx->len >= ctx->wsize) {
+ ret = cifs_writepages_submit(folio->mapping, wbc, ctx);
+ if (ret < 0)
+ return ret;
+ ctx->begun = false;
+ }
return 0;
}
@@ -2963,7 +2808,7 @@ static int cifs_writepages_region(struct address_space *mapping,
static int cifs_writepages(struct address_space *mapping,
struct writeback_control *wbc)
{
- loff_t start, next;
+ struct cifs_writepages_context ctx = {};
int ret;
/* We have to be careful as we can end up racing with setattr()
@@ -2971,26 +2816,11 @@ static int cifs_writepages(struct address_space *mapping,
* to prevent it.
*/
- if (wbc->range_cyclic) {
- start = mapping->writeback_index * PAGE_SIZE;
- ret = cifs_writepages_region(mapping, wbc, start, LLONG_MAX, &next);
- if (ret == 0) {
- mapping->writeback_index = next / PAGE_SIZE;
- if (start > 0 && wbc->nr_to_write > 0) {
- ret = cifs_writepages_region(mapping, wbc, 0,
- start, &next);
- if (ret == 0)
- mapping->writeback_index =
- next / PAGE_SIZE;
- }
- }
- } else if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX) {
- ret = cifs_writepages_region(mapping, wbc, 0, LLONG_MAX, &next);
- if (wbc->nr_to_write > 0 && ret == 0)
- mapping->writeback_index = next / PAGE_SIZE;
- } else {
- ret = cifs_writepages_region(mapping, wbc,
- wbc->range_start, wbc->range_end, &next);
+ ret = write_cache_pages(mapping, wbc, cifs_writepages_add_folio, &ctx);
+ if (ret >= 0 && ctx.begun) {
+ ret = cifs_writepages_submit(mapping, wbc, &ctx);
+ if (ret < 0)
+ return ret;
}
return ret;
next prev parent reply other threads:[~2023-03-02 23:37 UTC|newest]
Thread overview: 11+ messages / expand[flat|nested] mbox.gz Atom feed top
2023-03-02 23:16 [PATCH 0/3] smb3, afs: Revert changes to {cifs,afs}_writepages_region() David Howells
2023-03-02 23:16 ` [PATCH 1/3] mm: Add a function to get a single tagged folio from a file David Howells
2023-03-02 23:21 ` Matthew Wilcox
2023-03-02 23:16 ` [PATCH 2/3] afs: Partially revert and use filemap_get_folio_tag() David Howells
2023-03-02 23:16 ` [PATCH 3/3] cifs: " David Howells
2023-03-02 23:20 ` [PATCH 0/3] smb3, afs: Revert changes to {cifs,afs}_writepages_region() David Howells
2023-03-02 23:23 ` Test patch to remove per-page dirty region tracking from afs David Howells
2023-03-02 23:29 ` Test patch to make afs use write_cache_pages() David Howells
2023-03-02 23:32 ` Test patch to make afs use its own version of write_cache_pages() David Howells
2023-03-02 23:36 ` David Howells [this message]
2023-03-02 23:41 ` cifs test patch to make cifs " David Howells
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=522331.1677800209@warthog.procyon.org.uk \
--to=dhowells@redhat.com \
--cc=jlayton@kernel.org \
--cc=linux-afs@lists.infradead.org \
--cc=linux-cifs@vger.kernel.org \
--cc=linux-fsdevel@vger.kernel.org \
--cc=linux-kernel@vger.kernel.org \
--cc=marc.dionne@auristor.com \
--cc=metze@samba.org \
--cc=nspmangalore@gmail.com \
--cc=pc@cjr.nz \
--cc=rohiths.msft@gmail.com \
--cc=smfrench@gmail.com \
--cc=tom@talpey.com \
--cc=torvalds@linux-foundation.org \
--cc=vishal.moola@gmail.com \
--cc=willy@infradead.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.