From: Xiubo Li <xiubli@redhat.com>
To: "Matthew Wilcox (Oracle)" <willy@infradead.org>,
Ilya Dryomov <idryomov@gmail.com>
Cc: Jeff Layton <jlayton@kernel.org>,
ceph-devel@vger.kernel.org, David Howells <dhowells@redhat.com>,
linux-fsdevel@vger.kernel.org
Subject: Re: [PATCH 01/15] ceph: Convert ceph_writepages_start() to use folios a little more
Date: Mon, 28 Aug 2023 09:18:38 +0800 [thread overview]
Message-ID: <467fef8f-8383-1385-dedc-b97ea7c56e47@redhat.com> (raw)
In-Reply-To: <20230825201225.348148-2-willy@infradead.org>
On 8/26/23 04:12, Matthew Wilcox (Oracle) wrote:
> After we iterate through the locked folios using filemap_get_folios_tag(),
> we currently convert back to a page (and then in some circumstaces back
> to a folio again!). Just use a folio throughout and avoid various hidden
> calls to compound_head(). Ceph still uses a page array to interact with
> the OSD which should be cleaned up in a subsequent patch.
>
> Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
> ---
> fs/ceph/addr.c | 100 ++++++++++++++++++++++++-------------------------
> 1 file changed, 49 insertions(+), 51 deletions(-)
>
> diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c
> index f4863078f7fe..9a0a79833eb0 100644
> --- a/fs/ceph/addr.c
> +++ b/fs/ceph/addr.c
> @@ -1018,7 +1018,7 @@ static int ceph_writepages_start(struct address_space *mapping,
> int num_ops = 0, op_idx;
> unsigned i, nr_folios, max_pages, locked_pages = 0;
> struct page **pages = NULL, **data_pages;
> - struct page *page;
> + struct folio *folio;
> pgoff_t strip_unit_end = 0;
> u64 offset = 0, len = 0;
> bool from_pool = false;
> @@ -1032,22 +1032,22 @@ static int ceph_writepages_start(struct address_space *mapping,
> if (!nr_folios && !locked_pages)
> break;
> for (i = 0; i < nr_folios && locked_pages < max_pages; i++) {
> - page = &fbatch.folios[i]->page;
> - dout("? %p idx %lu\n", page, page->index);
> + folio = fbatch.folios[i];
> + dout("? %p idx %lu\n", folio, folio->index);
> if (locked_pages == 0)
> - lock_page(page); /* first page */
> - else if (!trylock_page(page))
> + folio_lock(folio); /* first folio */
> + else if (!folio_trylock(folio))
> break;
>
> /* only dirty pages, or our accounting breaks */
> - if (unlikely(!PageDirty(page)) ||
> - unlikely(page->mapping != mapping)) {
> - dout("!dirty or !mapping %p\n", page);
> - unlock_page(page);
> + if (unlikely(!folio_test_dirty(folio)) ||
> + unlikely(folio->mapping != mapping)) {
> + dout("!dirty or !mapping %p\n", folio);
> + folio_unlock(folio);
> continue;
> }
> /* only if matching snap context */
> - pgsnapc = page_snap_context(page);
> + pgsnapc = folio->private;
> if (pgsnapc != snapc) {
> dout("page snapc %p %lld != oldest %p %lld\n",
> pgsnapc, pgsnapc->seq, snapc, snapc->seq);
> @@ -1055,12 +1055,10 @@ static int ceph_writepages_start(struct address_space *mapping,
> !ceph_wbc.head_snapc &&
> wbc->sync_mode != WB_SYNC_NONE)
> should_loop = true;
> - unlock_page(page);
> + folio_unlock(folio);
> continue;
> }
> - if (page_offset(page) >= ceph_wbc.i_size) {
> - struct folio *folio = page_folio(page);
> -
> + if (folio_pos(folio) >= ceph_wbc.i_size) {
> dout("folio at %lu beyond eof %llu\n",
> folio->index, ceph_wbc.i_size);
> if ((ceph_wbc.size_stable ||
> @@ -1071,31 +1069,32 @@ static int ceph_writepages_start(struct address_space *mapping,
> folio_unlock(folio);
> continue;
> }
> - if (strip_unit_end && (page->index > strip_unit_end)) {
> - dout("end of strip unit %p\n", page);
> - unlock_page(page);
> + if (strip_unit_end && (folio->index > strip_unit_end)) {
> + dout("end of strip unit %p\n", folio);
> + folio_unlock(folio);
> break;
> }
> - if (PageWriteback(page) || PageFsCache(page)) {
> + if (folio_test_writeback(folio) ||
> + folio_test_fscache(folio)) {
> if (wbc->sync_mode == WB_SYNC_NONE) {
> - dout("%p under writeback\n", page);
> - unlock_page(page);
> + dout("%p under writeback\n", folio);
> + folio_unlock(folio);
> continue;
> }
> - dout("waiting on writeback %p\n", page);
> - wait_on_page_writeback(page);
> - wait_on_page_fscache(page);
> + dout("waiting on writeback %p\n", folio);
> + folio_wait_writeback(folio);
> + folio_wait_fscache(folio);
> }
>
> - if (!clear_page_dirty_for_io(page)) {
> - dout("%p !clear_page_dirty_for_io\n", page);
> - unlock_page(page);
> + if (!folio_clear_dirty_for_io(folio)) {
> + dout("%p !folio_clear_dirty_for_io\n", folio);
> + folio_unlock(folio);
> continue;
> }
>
> /*
> * We have something to write. If this is
> - * the first locked page this time through,
> + * the first locked folio this time through,
> * calculate max possinle write size and
> * allocate a page array
> */
> @@ -1105,7 +1104,7 @@ static int ceph_writepages_start(struct address_space *mapping,
> u32 xlen;
>
> /* prepare async write request */
> - offset = (u64)page_offset(page);
> + offset = folio_pos(folio);
> ceph_calc_file_object_mapping(&ci->i_layout,
> offset, wsize,
> &objnum, &objoff,
> @@ -1113,7 +1112,7 @@ static int ceph_writepages_start(struct address_space *mapping,
> len = xlen;
>
> num_ops = 1;
> - strip_unit_end = page->index +
> + strip_unit_end = folio->index +
> ((len - 1) >> PAGE_SHIFT);
>
> BUG_ON(pages);
> @@ -1128,23 +1127,23 @@ static int ceph_writepages_start(struct address_space *mapping,
> }
>
> len = 0;
> - } else if (page->index !=
> + } else if (folio->index !=
> (offset + len) >> PAGE_SHIFT) {
> if (num_ops >= (from_pool ? CEPH_OSD_SLAB_OPS :
> CEPH_OSD_MAX_OPS)) {
> - redirty_page_for_writepage(wbc, page);
> - unlock_page(page);
> + folio_redirty_for_writepage(wbc, folio);
> + folio_unlock(folio);
> break;
> }
>
> num_ops++;
> - offset = (u64)page_offset(page);
> + offset = (u64)folio_pos(folio);
> len = 0;
> }
>
> /* note position of first page in fbatch */
> - dout("%p will write page %p idx %lu\n",
> - inode, page, page->index);
> + dout("%p will write folio %p idx %lu\n",
> + inode, folio, folio->index);
>
> if (atomic_long_inc_return(&fsc->writeback_count) >
> CONGESTION_ON_THRESH(
> @@ -1153,7 +1152,7 @@ static int ceph_writepages_start(struct address_space *mapping,
>
> if (IS_ENCRYPTED(inode)) {
> pages[locked_pages] =
> - fscrypt_encrypt_pagecache_blocks(page,
> + fscrypt_encrypt_pagecache_blocks(&folio->page,
> PAGE_SIZE, 0,
> locked_pages ? GFP_NOWAIT : GFP_NOFS);
> if (IS_ERR(pages[locked_pages])) {
> @@ -1163,17 +1162,17 @@ static int ceph_writepages_start(struct address_space *mapping,
> /* better not fail on first page! */
> BUG_ON(locked_pages == 0);
> pages[locked_pages] = NULL;
> - redirty_page_for_writepage(wbc, page);
> - unlock_page(page);
> + folio_redirty_for_writepage(wbc, folio);
> + folio_unlock(folio);
> break;
> }
> ++locked_pages;
> } else {
> - pages[locked_pages++] = page;
> + pages[locked_pages++] = &folio->page;
> }
>
> fbatch.folios[i] = NULL;
> - len += thp_size(page);
> + len += folio_size(folio);
> }
>
> /* did we get anything? */
> @@ -1222,7 +1221,7 @@ static int ceph_writepages_start(struct address_space *mapping,
> BUG_ON(IS_ERR(req));
> }
> BUG_ON(len < ceph_fscrypt_page_offset(pages[locked_pages - 1]) +
> - thp_size(pages[locked_pages - 1]) - offset);
> + folio_size(folio) - offset);
>
> if (!ceph_inc_osd_stopping_blocker(fsc->mdsc)) {
> rc = -EIO;
> @@ -1236,9 +1235,9 @@ static int ceph_writepages_start(struct address_space *mapping,
> data_pages = pages;
> op_idx = 0;
> for (i = 0; i < locked_pages; i++) {
> - struct page *page = ceph_fscrypt_pagecache_page(pages[i]);
> + struct folio *folio = page_folio(ceph_fscrypt_pagecache_page(pages[i]));
>
> - u64 cur_offset = page_offset(page);
> + u64 cur_offset = folio_pos(folio);
> /*
> * Discontinuity in page range? Ceph can handle that by just passing
> * multiple extents in the write op.
> @@ -1267,10 +1266,10 @@ static int ceph_writepages_start(struct address_space *mapping,
> op_idx++;
> }
>
> - set_page_writeback(page);
> + folio_start_writeback(folio);
> if (caching)
> - ceph_set_page_fscache(page);
> - len += thp_size(page);
> + ceph_set_page_fscache(pages[i]);
> + len += folio_size(folio);
> }
> ceph_fscache_write_to_cache(inode, offset, len, caching);
>
> @@ -1280,7 +1279,7 @@ static int ceph_writepages_start(struct address_space *mapping,
> /* writepages_finish() clears writeback pages
> * according to the data length, so make sure
> * data length covers all locked pages */
> - u64 min_len = len + 1 - thp_size(page);
> + u64 min_len = len + 1 - folio_size(folio);
> len = get_writepages_data_length(inode, pages[i - 1],
> offset);
> len = max(len, min_len);
> @@ -1360,7 +1359,6 @@ static int ceph_writepages_start(struct address_space *mapping,
> if (wbc->sync_mode != WB_SYNC_NONE &&
> start_index == 0 && /* all dirty pages were checked */
> !ceph_wbc.head_snapc) {
> - struct page *page;
> unsigned i, nr;
> index = 0;
> while ((index <= end) &&
> @@ -1369,10 +1367,10 @@ static int ceph_writepages_start(struct address_space *mapping,
> PAGECACHE_TAG_WRITEBACK,
> &fbatch))) {
> for (i = 0; i < nr; i++) {
> - page = &fbatch.folios[i]->page;
> - if (page_snap_context(page) != snapc)
> + struct folio *folio = fbatch.folios[i];
> + if (folio->private != snapc)
Here IMO we should reuse and rename 'page_snap_context()' -->
'folio_snap_context()' instead of 'folio->private' directly. As I
remembered if the dirty bit is not set the `page->private` still could
be non-NULL in some cases ?
Thanks
- Xiubo
> continue;
> - wait_on_page_writeback(page);
> + folio_wait_writeback(folio);
> }
> folio_batch_release(&fbatch);
> cond_resched();
next prev parent reply other threads:[~2023-08-28 1:21 UTC|newest]
Thread overview: 28+ messages / expand[flat|nested] mbox.gz Atom feed top
2023-08-25 20:12 [PATCH 00/15] Many folio conversions for ceph Matthew Wilcox (Oracle)
2023-08-25 20:12 ` [PATCH 01/15] ceph: Convert ceph_writepages_start() to use folios a little more Matthew Wilcox (Oracle)
2023-08-28 1:18 ` Xiubo Li [this message]
2023-11-20 0:30 ` Xiubo Li
2023-08-25 20:12 ` [PATCH 02/15] ceph: Convert ceph_page_mkwrite() to use a folio Matthew Wilcox (Oracle)
2023-08-28 1:21 ` Xiubo Li
2023-08-25 20:12 ` [PATCH 03/15] mm: Delete page_mkwrite_check_truncate() Matthew Wilcox (Oracle)
2023-08-25 20:12 ` [PATCH 04/15] ceph: Add a migrate_folio method Matthew Wilcox (Oracle)
2023-08-25 20:12 ` [PATCH 05/15] ceph: Remove ceph_writepage() Matthew Wilcox (Oracle)
2023-08-25 20:12 ` [PATCH 06/15] ceph: Convert ceph_find_incompatible() to take a folio Matthew Wilcox (Oracle)
2023-08-25 20:12 ` [PATCH 07/15] ceph: Convert writepage_nounlock() " Matthew Wilcox (Oracle)
2023-08-25 20:12 ` [PATCH 08/15] ceph: Convert writepages_finish() to use " Matthew Wilcox (Oracle)
2023-08-25 20:12 ` [PATCH 09/15] ceph: Use a folio in ceph_filemap_fault() Matthew Wilcox (Oracle)
2023-08-26 3:00 ` Matthew Wilcox
2023-08-28 1:19 ` Xiubo Li
2023-08-29 11:55 ` Jeff Layton
2023-08-29 13:30 ` Matthew Wilcox
2023-08-30 10:44 ` Ilya Dryomov
2023-08-31 3:52 ` Xiubo Li
2023-08-25 20:12 ` [PATCH 10/15] ceph: Convert ceph_read_iter() to use a folio to read inline data Matthew Wilcox (Oracle)
2023-08-25 20:12 ` [PATCH 11/15] ceph: Convert __ceph_do_getattr() to take a folio Matthew Wilcox (Oracle)
2023-08-25 20:12 ` [PATCH 12/15] ceph: Convert ceph_fill_inode() " Matthew Wilcox (Oracle)
2023-08-25 20:12 ` [PATCH 13/15] ceph: Convert ceph_fill_inline_data() " Matthew Wilcox (Oracle)
2023-08-25 20:12 ` [PATCH 14/15] ceph: Convert ceph_set_page_fscache() to ceph_folio_start_fscache() Matthew Wilcox (Oracle)
2023-08-25 20:12 ` [PATCH 15/15] netfs: Remove unused functions Matthew Wilcox (Oracle)
2023-08-26 1:30 ` [PATCH] fscache: Convert __fscache_clear_page_bits() to use a folio Matthew Wilcox (Oracle)
2023-11-17 15:37 ` [PATCH 00/15] Many folio conversions for ceph Matthew Wilcox
2023-11-20 0:32 ` Xiubo Li
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=467fef8f-8383-1385-dedc-b97ea7c56e47@redhat.com \
--to=xiubli@redhat.com \
--cc=ceph-devel@vger.kernel.org \
--cc=dhowells@redhat.com \
--cc=idryomov@gmail.com \
--cc=jlayton@kernel.org \
--cc=linux-fsdevel@vger.kernel.org \
--cc=willy@infradead.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).