* [PATCH v2 1/7] ceph: Do not look at the index of an encrypted page
2025-02-14 15:57 [PATCH v2 0/7] Remove accesses to page->index from ceph Matthew Wilcox (Oracle)
@ 2025-02-14 15:57 ` Matthew Wilcox (Oracle)
2025-02-14 15:57 ` [PATCH v2 2/7] ceph: Remove ceph_writepage() Matthew Wilcox (Oracle)
` (5 subsequent siblings)
6 siblings, 0 replies; 12+ messages in thread
From: Matthew Wilcox (Oracle) @ 2025-02-14 15:57 UTC (permalink / raw)
To: Ilya Dryomov
Cc: Matthew Wilcox (Oracle), ceph-devel, linux-fsdevel, David Howells,
stable, Jeff Layton
If the pages array contains encrypted pages, we cannot look at
page->index because that field is uninitialised. Instead, use the new
ceph_fscrypt_pagecache_folio() to get the pagecache folio and look at
the index of that.
Fixes: d55207717ded (ceph: add encryption support to writepage and writepages)
Cc: stable@vger.kernel.org
Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
Reviewed-by: Jeff Layton <jlayton@kernel.org>
---
fs/ceph/addr.c | 5 ++++-
fs/ceph/crypto.h | 7 +++++++
2 files changed, 11 insertions(+), 1 deletion(-)
diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c
index f5224a566b69..80bc0cbacd7a 100644
--- a/fs/ceph/addr.c
+++ b/fs/ceph/addr.c
@@ -1356,8 +1356,11 @@ static int ceph_writepages_start(struct address_space *mapping,
memset(data_pages + i, 0,
locked_pages * sizeof(*pages));
} else {
+ struct folio *folio;
+
BUG_ON(num_ops != req->r_num_ops);
- index = pages[i - 1]->index + 1;
+ folio = ceph_fscrypt_pagecache_folio(pages[i - 1]);
+ index = folio->index + 1;
/* request message now owns the pages array */
pages = NULL;
}
diff --git a/fs/ceph/crypto.h b/fs/ceph/crypto.h
index d0768239a1c9..e4404ef589a1 100644
--- a/fs/ceph/crypto.h
+++ b/fs/ceph/crypto.h
@@ -280,6 +280,13 @@ static inline struct page *ceph_fscrypt_pagecache_page(struct page *page)
}
#endif /* CONFIG_FS_ENCRYPTION */
+static inline struct folio *ceph_fscrypt_pagecache_folio(struct page *page)
+{
+ if (fscrypt_is_bounce_page(page))
+ page = fscrypt_pagecache_page(page);
+ return page_folio(page);
+}
+
static inline loff_t ceph_fscrypt_page_offset(struct page *page)
{
return page_offset(ceph_fscrypt_pagecache_page(page));
--
2.47.2
^ permalink raw reply related [flat|nested] 12+ messages in thread* [PATCH v2 2/7] ceph: Remove ceph_writepage()
2025-02-14 15:57 [PATCH v2 0/7] Remove accesses to page->index from ceph Matthew Wilcox (Oracle)
2025-02-14 15:57 ` [PATCH v2 1/7] ceph: Do not look at the index of an encrypted page Matthew Wilcox (Oracle)
@ 2025-02-14 15:57 ` Matthew Wilcox (Oracle)
2025-02-14 15:57 ` [PATCH v2 3/7] ceph: Use a folio in ceph_page_mkwrite() Matthew Wilcox (Oracle)
` (4 subsequent siblings)
6 siblings, 0 replies; 12+ messages in thread
From: Matthew Wilcox (Oracle) @ 2025-02-14 15:57 UTC (permalink / raw)
To: Ilya Dryomov
Cc: Matthew Wilcox (Oracle), ceph-devel, linux-fsdevel, David Howells
Ceph already has a writepages operation which is preferred over writepage
in all situations except for page migration. By adding a migrate_folio
operation, there will be no situations in which ->writepage should
be called. filemap_migrate_folio() is an appropriate operation to use
because the ceph data stored in folio->private does not contain any
reference to the memory address of the folio.
Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
---
fs/ceph/addr.c | 28 +---------------------------
1 file changed, 1 insertion(+), 27 deletions(-)
diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c
index 80bc0cbacd7a..9b972251881a 100644
--- a/fs/ceph/addr.c
+++ b/fs/ceph/addr.c
@@ -820,32 +820,6 @@ static int writepage_nounlock(struct page *page, struct writeback_control *wbc)
return err;
}
-static int ceph_writepage(struct page *page, struct writeback_control *wbc)
-{
- int err;
- struct inode *inode = page->mapping->host;
- BUG_ON(!inode);
- ihold(inode);
-
- if (wbc->sync_mode == WB_SYNC_NONE &&
- ceph_inode_to_fs_client(inode)->write_congested) {
- redirty_page_for_writepage(wbc, page);
- return AOP_WRITEPAGE_ACTIVATE;
- }
-
- folio_wait_private_2(page_folio(page)); /* [DEPRECATED] */
-
- err = writepage_nounlock(page, wbc);
- if (err == -ERESTARTSYS) {
- /* direct memory reclaimer was killed by SIGKILL. return 0
- * to prevent caller from setting mapping/page error */
- err = 0;
- }
- unlock_page(page);
- iput(inode);
- return err;
-}
-
/*
* async writeback completion handler.
*
@@ -1597,7 +1571,6 @@ static int ceph_write_end(struct file *file, struct address_space *mapping,
const struct address_space_operations ceph_aops = {
.read_folio = netfs_read_folio,
.readahead = netfs_readahead,
- .writepage = ceph_writepage,
.writepages = ceph_writepages_start,
.write_begin = ceph_write_begin,
.write_end = ceph_write_end,
@@ -1605,6 +1578,7 @@ const struct address_space_operations ceph_aops = {
.invalidate_folio = ceph_invalidate_folio,
.release_folio = netfs_release_folio,
.direct_IO = noop_direct_IO,
+ .migrate_folio = filemap_migrate_folio,
};
static void ceph_block_sigs(sigset_t *oldset)
--
2.47.2
^ permalink raw reply related [flat|nested] 12+ messages in thread* [PATCH v2 3/7] ceph: Use a folio in ceph_page_mkwrite()
2025-02-14 15:57 [PATCH v2 0/7] Remove accesses to page->index from ceph Matthew Wilcox (Oracle)
2025-02-14 15:57 ` [PATCH v2 1/7] ceph: Do not look at the index of an encrypted page Matthew Wilcox (Oracle)
2025-02-14 15:57 ` [PATCH v2 2/7] ceph: Remove ceph_writepage() Matthew Wilcox (Oracle)
@ 2025-02-14 15:57 ` Matthew Wilcox (Oracle)
2025-02-14 19:05 ` Viacheslav Dubeyko
2025-02-14 15:57 ` [PATCH v2 4/7] ceph: Convert ceph_find_incompatible() to take a folio Matthew Wilcox (Oracle)
` (3 subsequent siblings)
6 siblings, 1 reply; 12+ messages in thread
From: Matthew Wilcox (Oracle) @ 2025-02-14 15:57 UTC (permalink / raw)
To: Ilya Dryomov
Cc: Matthew Wilcox (Oracle), ceph-devel, linux-fsdevel, David Howells
Convert the passed page to a folio and use it
throughout ceph_page_mkwrite(). Removes the last call to
page_mkwrite_check_truncate(), the last call to offset_in_thp() and one
of the last calls to thp_size(). Saves a few calls to compound_head().
Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
---
fs/ceph/addr.c | 26 +++++++++++++-------------
1 file changed, 13 insertions(+), 13 deletions(-)
diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c
index 9b972251881a..b659100f290a 100644
--- a/fs/ceph/addr.c
+++ b/fs/ceph/addr.c
@@ -1695,8 +1695,8 @@ static vm_fault_t ceph_page_mkwrite(struct vm_fault *vmf)
struct ceph_inode_info *ci = ceph_inode(inode);
struct ceph_file_info *fi = vma->vm_file->private_data;
struct ceph_cap_flush *prealloc_cf;
- struct page *page = vmf->page;
- loff_t off = page_offset(page);
+ struct folio *folio = page_folio(vmf->page);
+ loff_t off = folio_pos(folio);
loff_t size = i_size_read(inode);
size_t len;
int want, got, err;
@@ -1713,10 +1713,10 @@ static vm_fault_t ceph_page_mkwrite(struct vm_fault *vmf)
sb_start_pagefault(inode->i_sb);
ceph_block_sigs(&oldset);
- if (off + thp_size(page) <= size)
- len = thp_size(page);
+ if (off + folio_size(folio) <= size)
+ len = folio_size(folio);
else
- len = offset_in_thp(page, size);
+ len = offset_in_folio(folio, size);
doutc(cl, "%llx.%llx %llu~%zd getting caps i_size %llu\n",
ceph_vinop(inode), off, len, size);
@@ -1733,30 +1733,30 @@ static vm_fault_t ceph_page_mkwrite(struct vm_fault *vmf)
doutc(cl, "%llx.%llx %llu~%zd got cap refs on %s\n", ceph_vinop(inode),
off, len, ceph_cap_string(got));
- /* Update time before taking page lock */
+ /* Update time before taking folio lock */
file_update_time(vma->vm_file);
inode_inc_iversion_raw(inode);
do {
struct ceph_snap_context *snapc;
- lock_page(page);
+ folio_lock(folio);
- if (page_mkwrite_check_truncate(page, inode) < 0) {
- unlock_page(page);
+ if (folio_mkwrite_check_truncate(folio, inode) < 0) {
+ folio_unlock(folio);
ret = VM_FAULT_NOPAGE;
break;
}
- snapc = ceph_find_incompatible(page);
+ snapc = ceph_find_incompatible(&folio->page);
if (!snapc) {
- /* success. we'll keep the page locked. */
- set_page_dirty(page);
+ /* success. we'll keep the folio locked. */
+ folio_mark_dirty(folio);
ret = VM_FAULT_LOCKED;
break;
}
- unlock_page(page);
+ folio_unlock(folio);
if (IS_ERR(snapc)) {
ret = VM_FAULT_SIGBUS;
--
2.47.2
^ permalink raw reply related [flat|nested] 12+ messages in thread* Re: [PATCH v2 3/7] ceph: Use a folio in ceph_page_mkwrite()
2025-02-14 15:57 ` [PATCH v2 3/7] ceph: Use a folio in ceph_page_mkwrite() Matthew Wilcox (Oracle)
@ 2025-02-14 19:05 ` Viacheslav Dubeyko
0 siblings, 0 replies; 12+ messages in thread
From: Viacheslav Dubeyko @ 2025-02-14 19:05 UTC (permalink / raw)
To: idryomov@gmail.com, willy@infradead.org
Cc: ceph-devel@vger.kernel.org, linux-fsdevel@vger.kernel.org,
David Howells
On Fri, 2025-02-14 at 15:57 +0000, Matthew Wilcox (Oracle) wrote:
> Convert the passed page to a folio and use it
> throughout ceph_page_mkwrite(). Removes the last call to
> page_mkwrite_check_truncate(), the last call to offset_in_thp() and one
> of the last calls to thp_size(). Saves a few calls to compound_head().
>
> Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
> ---
> fs/ceph/addr.c | 26 +++++++++++++-------------
> 1 file changed, 13 insertions(+), 13 deletions(-)
>
> diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c
> index 9b972251881a..b659100f290a 100644
> --- a/fs/ceph/addr.c
> +++ b/fs/ceph/addr.c
> @@ -1695,8 +1695,8 @@ static vm_fault_t ceph_page_mkwrite(struct vm_fault *vmf)
> struct ceph_inode_info *ci = ceph_inode(inode);
> struct ceph_file_info *fi = vma->vm_file->private_data;
> struct ceph_cap_flush *prealloc_cf;
> - struct page *page = vmf->page;
> - loff_t off = page_offset(page);
> + struct folio *folio = page_folio(vmf->page);
> + loff_t off = folio_pos(folio);
> loff_t size = i_size_read(inode);
> size_t len;
> int want, got, err;
> @@ -1713,10 +1713,10 @@ static vm_fault_t ceph_page_mkwrite(struct vm_fault *vmf)
> sb_start_pagefault(inode->i_sb);
> ceph_block_sigs(&oldset);
>
> - if (off + thp_size(page) <= size)
> - len = thp_size(page);
> + if (off + folio_size(folio) <= size)
> + len = folio_size(folio);
> else
> - len = offset_in_thp(page, size);
> + len = offset_in_folio(folio, size);
>
> doutc(cl, "%llx.%llx %llu~%zd getting caps i_size %llu\n",
> ceph_vinop(inode), off, len, size);
> @@ -1733,30 +1733,30 @@ static vm_fault_t ceph_page_mkwrite(struct vm_fault *vmf)
> doutc(cl, "%llx.%llx %llu~%zd got cap refs on %s\n", ceph_vinop(inode),
> off, len, ceph_cap_string(got));
>
> - /* Update time before taking page lock */
> + /* Update time before taking folio lock */
> file_update_time(vma->vm_file);
> inode_inc_iversion_raw(inode);
>
> do {
> struct ceph_snap_context *snapc;
>
> - lock_page(page);
> + folio_lock(folio);
>
> - if (page_mkwrite_check_truncate(page, inode) < 0) {
> - unlock_page(page);
> + if (folio_mkwrite_check_truncate(folio, inode) < 0) {
> + folio_unlock(folio);
> ret = VM_FAULT_NOPAGE;
> break;
> }
>
> - snapc = ceph_find_incompatible(page);
> + snapc = ceph_find_incompatible(&folio->page);
> if (!snapc) {
> - /* success. we'll keep the page locked. */
> - set_page_dirty(page);
> + /* success. we'll keep the folio locked. */
> + folio_mark_dirty(folio);
> ret = VM_FAULT_LOCKED;
> break;
> }
>
> - unlock_page(page);
> + folio_unlock(folio);
>
> if (IS_ERR(snapc)) {
> ret = VM_FAULT_SIGBUS;
Looks good. Pretty obvious modification.
Reviewed-by: Viacheslav Dubeyko <Slava.Dubeyko@ibm.com>
Thanks,
Slava.
^ permalink raw reply [flat|nested] 12+ messages in thread
* [PATCH v2 4/7] ceph: Convert ceph_find_incompatible() to take a folio
2025-02-14 15:57 [PATCH v2 0/7] Remove accesses to page->index from ceph Matthew Wilcox (Oracle)
` (2 preceding siblings ...)
2025-02-14 15:57 ` [PATCH v2 3/7] ceph: Use a folio in ceph_page_mkwrite() Matthew Wilcox (Oracle)
@ 2025-02-14 15:57 ` Matthew Wilcox (Oracle)
2025-02-14 15:57 ` [PATCH v2 5/7] ceph: Convert ceph_readdir_cache_control to store " Matthew Wilcox (Oracle)
` (2 subsequent siblings)
6 siblings, 0 replies; 12+ messages in thread
From: Matthew Wilcox (Oracle) @ 2025-02-14 15:57 UTC (permalink / raw)
To: Ilya Dryomov
Cc: Matthew Wilcox (Oracle), ceph-devel, linux-fsdevel, David Howells
Both callers already have the folio. Pass it in and use it throughout.
Removes some hidden calls to compound_head() and a reference to
page->mapping.
Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
---
fs/ceph/addr.c | 38 +++++++++++++++++++-------------------
1 file changed, 19 insertions(+), 19 deletions(-)
diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c
index b659100f290a..7a2aa81b20eb 100644
--- a/fs/ceph/addr.c
+++ b/fs/ceph/addr.c
@@ -1424,56 +1424,56 @@ static int context_is_writeable_or_written(struct inode *inode,
/**
* ceph_find_incompatible - find an incompatible context and return it
- * @page: page being dirtied
+ * @folio: folio being dirtied
*
- * We are only allowed to write into/dirty a page if the page is
+ * We are only allowed to write into/dirty a folio if the folio is
* clean, or already dirty within the same snap context. Returns a
* conflicting context if there is one, NULL if there isn't, or a
* negative error code on other errors.
*
- * Must be called with page lock held.
+ * Must be called with folio lock held.
*/
static struct ceph_snap_context *
-ceph_find_incompatible(struct page *page)
+ceph_find_incompatible(struct folio *folio)
{
- struct inode *inode = page->mapping->host;
+ struct inode *inode = folio->mapping->host;
struct ceph_client *cl = ceph_inode_to_client(inode);
struct ceph_inode_info *ci = ceph_inode(inode);
if (ceph_inode_is_shutdown(inode)) {
- doutc(cl, " %llx.%llx page %p is shutdown\n",
- ceph_vinop(inode), page);
+ doutc(cl, " %llx.%llx folio %p is shutdown\n",
+ ceph_vinop(inode), folio);
return ERR_PTR(-ESTALE);
}
for (;;) {
struct ceph_snap_context *snapc, *oldest;
- wait_on_page_writeback(page);
+ folio_wait_writeback(folio);
- snapc = page_snap_context(page);
+ snapc = page_snap_context(&folio->page);
if (!snapc || snapc == ci->i_head_snapc)
break;
/*
- * this page is already dirty in another (older) snap
+ * this folio is already dirty in another (older) snap
* context! is it writeable now?
*/
oldest = get_oldest_context(inode, NULL, NULL);
if (snapc->seq > oldest->seq) {
/* not writeable -- return it for the caller to deal with */
ceph_put_snap_context(oldest);
- doutc(cl, " %llx.%llx page %p snapc %p not current or oldest\n",
- ceph_vinop(inode), page, snapc);
+ doutc(cl, " %llx.%llx folio %p snapc %p not current or oldest\n",
+ ceph_vinop(inode), folio, snapc);
return ceph_get_snap_context(snapc);
}
ceph_put_snap_context(oldest);
- /* yay, writeable, do it now (without dropping page lock) */
- doutc(cl, " %llx.%llx page %p snapc %p not current, but oldest\n",
- ceph_vinop(inode), page, snapc);
- if (clear_page_dirty_for_io(page)) {
- int r = writepage_nounlock(page, NULL);
+ /* yay, writeable, do it now (without dropping folio lock) */
+ doutc(cl, " %llx.%llx folio %p snapc %p not current, but oldest\n",
+ ceph_vinop(inode), folio, snapc);
+ if (folio_clear_dirty_for_io(folio)) {
+ int r = writepage_nounlock(&folio->page, NULL);
if (r < 0)
return ERR_PTR(r);
}
@@ -1488,7 +1488,7 @@ static int ceph_netfs_check_write_begin(struct file *file, loff_t pos, unsigned
struct ceph_inode_info *ci = ceph_inode(inode);
struct ceph_snap_context *snapc;
- snapc = ceph_find_incompatible(folio_page(*foliop, 0));
+ snapc = ceph_find_incompatible(*foliop);
if (snapc) {
int r;
@@ -1748,7 +1748,7 @@ static vm_fault_t ceph_page_mkwrite(struct vm_fault *vmf)
break;
}
- snapc = ceph_find_incompatible(&folio->page);
+ snapc = ceph_find_incompatible(folio);
if (!snapc) {
/* success. we'll keep the folio locked. */
folio_mark_dirty(folio);
--
2.47.2
^ permalink raw reply related [flat|nested] 12+ messages in thread* [PATCH v2 5/7] ceph: Convert ceph_readdir_cache_control to store a folio
2025-02-14 15:57 [PATCH v2 0/7] Remove accesses to page->index from ceph Matthew Wilcox (Oracle)
` (3 preceding siblings ...)
2025-02-14 15:57 ` [PATCH v2 4/7] ceph: Convert ceph_find_incompatible() to take a folio Matthew Wilcox (Oracle)
@ 2025-02-14 15:57 ` Matthew Wilcox (Oracle)
2025-02-14 19:10 ` Viacheslav Dubeyko
2025-02-14 15:57 ` [PATCH v2 6/7] ceph: Convert writepage_nounlock() to write_folio_nounlock() Matthew Wilcox (Oracle)
2025-02-14 15:57 ` [PATCH v2 7/7] ceph: Use a folio in ceph_writepages_start() Matthew Wilcox (Oracle)
6 siblings, 1 reply; 12+ messages in thread
From: Matthew Wilcox (Oracle) @ 2025-02-14 15:57 UTC (permalink / raw)
To: Ilya Dryomov
Cc: Matthew Wilcox (Oracle), ceph-devel, linux-fsdevel, David Howells
Pass a folio around instead of a page. This removes an access to
page->index and a few hidden calls to compound_head().
Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
---
fs/ceph/dir.c | 13 +++++++------
fs/ceph/inode.c | 26 ++++++++++++++------------
fs/ceph/super.h | 2 +-
3 files changed, 22 insertions(+), 19 deletions(-)
diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c
index 62e99e65250d..66f00604c86b 100644
--- a/fs/ceph/dir.c
+++ b/fs/ceph/dir.c
@@ -141,17 +141,18 @@ __dcache_find_get_entry(struct dentry *parent, u64 idx,
if (ptr_pos >= i_size_read(dir))
return NULL;
- if (!cache_ctl->page || ptr_pgoff != cache_ctl->page->index) {
+ if (!cache_ctl->folio || ptr_pgoff != cache_ctl->folio->index) {
ceph_readdir_cache_release(cache_ctl);
- cache_ctl->page = find_lock_page(&dir->i_data, ptr_pgoff);
- if (!cache_ctl->page) {
+ cache_ctl->folio = filemap_lock_folio(&dir->i_data, ptr_pgoff);
+ if (IS_ERR(cache_ctl->folio)) {
+ cache_ctl->folio = NULL;
doutc(cl, " page %lu not found\n", ptr_pgoff);
return ERR_PTR(-EAGAIN);
}
/* reading/filling the cache are serialized by
- i_rwsem, no need to use page lock */
- unlock_page(cache_ctl->page);
- cache_ctl->dentries = kmap(cache_ctl->page);
+ i_rwsem, no need to use folio lock */
+ folio_unlock(cache_ctl->folio);
+ cache_ctl->dentries = kmap_local_folio(cache_ctl->folio, 0);
}
cache_ctl->index = idx & idx_mask;
diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c
index 7dd6c2275085..c15970fa240f 100644
--- a/fs/ceph/inode.c
+++ b/fs/ceph/inode.c
@@ -1845,10 +1845,9 @@ static int readdir_prepopulate_inodes_only(struct ceph_mds_request *req,
void ceph_readdir_cache_release(struct ceph_readdir_cache_control *ctl)
{
- if (ctl->page) {
- kunmap(ctl->page);
- put_page(ctl->page);
- ctl->page = NULL;
+ if (ctl->folio) {
+ folio_release_kmap(ctl->folio, ctl->dentries);
+ ctl->folio = NULL;
}
}
@@ -1862,20 +1861,23 @@ static int fill_readdir_cache(struct inode *dir, struct dentry *dn,
unsigned idx = ctl->index % nsize;
pgoff_t pgoff = ctl->index / nsize;
- if (!ctl->page || pgoff != ctl->page->index) {
+ if (!ctl->folio || pgoff != ctl->folio->index) {
ceph_readdir_cache_release(ctl);
+ fgf_t fgf = FGP_LOCK;
+
if (idx == 0)
- ctl->page = grab_cache_page(&dir->i_data, pgoff);
- else
- ctl->page = find_lock_page(&dir->i_data, pgoff);
- if (!ctl->page) {
+ fgf |= FGP_ACCESSED | FGP_CREAT;
+
+ ctl->folio = __filemap_get_folio(&dir->i_data, pgoff,
+ fgf, mapping_gfp_mask(&dir->i_data));
+ if (!ctl->folio) {
ctl->index = -1;
return idx == 0 ? -ENOMEM : 0;
}
/* reading/filling the cache are serialized by
- * i_rwsem, no need to use page lock */
- unlock_page(ctl->page);
- ctl->dentries = kmap(ctl->page);
+ * i_rwsem, no need to use folio lock */
+ folio_unlock(ctl->folio);
+ ctl->dentries = kmap_local_folio(ctl->folio, 0);
if (idx == 0)
memset(ctl->dentries, 0, PAGE_SIZE);
}
diff --git a/fs/ceph/super.h b/fs/ceph/super.h
index 7fa1e7be50e4..bb0db0cc8003 100644
--- a/fs/ceph/super.h
+++ b/fs/ceph/super.h
@@ -903,7 +903,7 @@ ceph_find_rw_context(struct ceph_file_info *cf)
}
struct ceph_readdir_cache_control {
- struct page *page;
+ struct folio *folio;
struct dentry **dentries;
int index;
};
--
2.47.2
^ permalink raw reply related [flat|nested] 12+ messages in thread* Re: [PATCH v2 5/7] ceph: Convert ceph_readdir_cache_control to store a folio
2025-02-14 15:57 ` [PATCH v2 5/7] ceph: Convert ceph_readdir_cache_control to store " Matthew Wilcox (Oracle)
@ 2025-02-14 19:10 ` Viacheslav Dubeyko
2025-02-14 19:34 ` Matthew Wilcox
0 siblings, 1 reply; 12+ messages in thread
From: Viacheslav Dubeyko @ 2025-02-14 19:10 UTC (permalink / raw)
To: idryomov@gmail.com, willy@infradead.org
Cc: ceph-devel@vger.kernel.org, linux-fsdevel@vger.kernel.org,
David Howells
On Fri, 2025-02-14 at 15:57 +0000, Matthew Wilcox (Oracle) wrote:
> Pass a folio around instead of a page. This removes an access to
> page->index and a few hidden calls to compound_head().
>
> Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
> ---
> fs/ceph/dir.c | 13 +++++++------
> fs/ceph/inode.c | 26 ++++++++++++++------------
> fs/ceph/super.h | 2 +-
> 3 files changed, 22 insertions(+), 19 deletions(-)
>
> diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c
> index 62e99e65250d..66f00604c86b 100644
> --- a/fs/ceph/dir.c
> +++ b/fs/ceph/dir.c
> @@ -141,17 +141,18 @@ __dcache_find_get_entry(struct dentry *parent, u64 idx,
> if (ptr_pos >= i_size_read(dir))
> return NULL;
>
> - if (!cache_ctl->page || ptr_pgoff != cache_ctl->page->index) {
> + if (!cache_ctl->folio || ptr_pgoff != cache_ctl->folio->index) {
> ceph_readdir_cache_release(cache_ctl);
> - cache_ctl->page = find_lock_page(&dir->i_data, ptr_pgoff);
> - if (!cache_ctl->page) {
> + cache_ctl->folio = filemap_lock_folio(&dir->i_data, ptr_pgoff);
> + if (IS_ERR(cache_ctl->folio)) {
> + cache_ctl->folio = NULL;
> doutc(cl, " page %lu not found\n", ptr_pgoff);
Maybe, we need to change debug output here too?
doutc(cl, " folio %lu not found\n", ptr_pgoff);
Thanks,
Slava.
^ permalink raw reply [flat|nested] 12+ messages in thread* Re: [PATCH v2 5/7] ceph: Convert ceph_readdir_cache_control to store a folio
2025-02-14 19:10 ` Viacheslav Dubeyko
@ 2025-02-14 19:34 ` Matthew Wilcox
2025-02-14 19:38 ` Viacheslav Dubeyko
0 siblings, 1 reply; 12+ messages in thread
From: Matthew Wilcox @ 2025-02-14 19:34 UTC (permalink / raw)
To: Viacheslav Dubeyko
Cc: idryomov@gmail.com, ceph-devel@vger.kernel.org,
linux-fsdevel@vger.kernel.org, David Howells
On Fri, Feb 14, 2025 at 07:10:26PM +0000, Viacheslav Dubeyko wrote:
> > - cache_ctl->page = find_lock_page(&dir->i_data, ptr_pgoff);
> > - if (!cache_ctl->page) {
> > + cache_ctl->folio = filemap_lock_folio(&dir->i_data, ptr_pgoff);
> > + if (IS_ERR(cache_ctl->folio)) {
> > + cache_ctl->folio = NULL;
> > doutc(cl, " page %lu not found\n", ptr_pgoff);
>
> Maybe, we need to change debug output here too?
>
> doutc(cl, " folio %lu not found\n", ptr_pgoff);
I'm happy to make that change for the next version, or for somebody to
make that change while applying the patches.
^ permalink raw reply [flat|nested] 12+ messages in thread* RE: [PATCH v2 5/7] ceph: Convert ceph_readdir_cache_control to store a folio
2025-02-14 19:34 ` Matthew Wilcox
@ 2025-02-14 19:38 ` Viacheslav Dubeyko
0 siblings, 0 replies; 12+ messages in thread
From: Viacheslav Dubeyko @ 2025-02-14 19:38 UTC (permalink / raw)
To: willy@infradead.org
Cc: idryomov@gmail.com, ceph-devel@vger.kernel.org,
linux-fsdevel@vger.kernel.org, David Howells
On Fri, 2025-02-14 at 19:34 +0000, Matthew Wilcox wrote:
> On Fri, Feb 14, 2025 at 07:10:26PM +0000, Viacheslav Dubeyko wrote:
> > > - cache_ctl->page = find_lock_page(&dir->i_data, ptr_pgoff);
> > > - if (!cache_ctl->page) {
> > > + cache_ctl->folio = filemap_lock_folio(&dir->i_data, ptr_pgoff);
> > > + if (IS_ERR(cache_ctl->folio)) {
> > > + cache_ctl->folio = NULL;
> > > doutc(cl, " page %lu not found\n", ptr_pgoff);
> >
> > Maybe, we need to change debug output here too?
> >
> > doutc(cl, " folio %lu not found\n", ptr_pgoff);
>
> I'm happy to make that change for the next version, or for somebody to
> make that change while applying the patches.
It's not critical one. The patch looks good.
Reviewed-by: Viacheslav Dubeyko <Slava.Dubeyko@ibm.com>
Thanks,
Slava.
^ permalink raw reply [flat|nested] 12+ messages in thread
* [PATCH v2 6/7] ceph: Convert writepage_nounlock() to write_folio_nounlock()
2025-02-14 15:57 [PATCH v2 0/7] Remove accesses to page->index from ceph Matthew Wilcox (Oracle)
` (4 preceding siblings ...)
2025-02-14 15:57 ` [PATCH v2 5/7] ceph: Convert ceph_readdir_cache_control to store " Matthew Wilcox (Oracle)
@ 2025-02-14 15:57 ` Matthew Wilcox (Oracle)
2025-02-14 15:57 ` [PATCH v2 7/7] ceph: Use a folio in ceph_writepages_start() Matthew Wilcox (Oracle)
6 siblings, 0 replies; 12+ messages in thread
From: Matthew Wilcox (Oracle) @ 2025-02-14 15:57 UTC (permalink / raw)
To: Ilya Dryomov
Cc: Matthew Wilcox (Oracle), ceph-devel, linux-fsdevel, David Howells
Remove references to page->index, page->mapping, thp_size(),
page_offset() and other page APIs in favour of their more efficient
folio replacements.
Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
---
fs/ceph/addr.c | 67 +++++++++++++++++++++++++-------------------------
1 file changed, 34 insertions(+), 33 deletions(-)
diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c
index 7a2aa81b20eb..822485db234e 100644
--- a/fs/ceph/addr.c
+++ b/fs/ceph/addr.c
@@ -666,22 +666,23 @@ static u64 get_writepages_data_length(struct inode *inode,
}
/*
- * Write a single page, but leave the page locked.
+ * Write a folio, but leave it locked.
*
* If we get a write error, mark the mapping for error, but still adjust the
- * dirty page accounting (i.e., page is no longer dirty).
+ * dirty page accounting (i.e., folio is no longer dirty).
*/
-static int writepage_nounlock(struct page *page, struct writeback_control *wbc)
+static int write_folio_nounlock(struct folio *folio,
+ struct writeback_control *wbc)
{
- struct folio *folio = page_folio(page);
- struct inode *inode = page->mapping->host;
+ struct page *page = &folio->page;
+ struct inode *inode = folio->mapping->host;
struct ceph_inode_info *ci = ceph_inode(inode);
struct ceph_fs_client *fsc = ceph_inode_to_fs_client(inode);
struct ceph_client *cl = fsc->client;
struct ceph_snap_context *snapc, *oldest;
- loff_t page_off = page_offset(page);
+ loff_t page_off = folio_pos(folio);
int err;
- loff_t len = thp_size(page);
+ loff_t len = folio_size(folio);
loff_t wlen;
struct ceph_writeback_ctl ceph_wbc;
struct ceph_osd_client *osdc = &fsc->client->osdc;
@@ -689,27 +690,27 @@ static int writepage_nounlock(struct page *page, struct writeback_control *wbc)
bool caching = ceph_is_cache_enabled(inode);
struct page *bounce_page = NULL;
- doutc(cl, "%llx.%llx page %p idx %lu\n", ceph_vinop(inode), page,
- page->index);
+ doutc(cl, "%llx.%llx folio %p idx %lu\n", ceph_vinop(inode), folio,
+ folio->index);
if (ceph_inode_is_shutdown(inode))
return -EIO;
/* verify this is a writeable snap context */
- snapc = page_snap_context(page);
+ snapc = page_snap_context(&folio->page);
if (!snapc) {
- doutc(cl, "%llx.%llx page %p not dirty?\n", ceph_vinop(inode),
- page);
+ doutc(cl, "%llx.%llx folio %p not dirty?\n", ceph_vinop(inode),
+ folio);
return 0;
}
oldest = get_oldest_context(inode, &ceph_wbc, snapc);
if (snapc->seq > oldest->seq) {
- doutc(cl, "%llx.%llx page %p snapc %p not writeable - noop\n",
- ceph_vinop(inode), page, snapc);
+ doutc(cl, "%llx.%llx folio %p snapc %p not writeable - noop\n",
+ ceph_vinop(inode), folio, snapc);
/* we should only noop if called by kswapd */
WARN_ON(!(current->flags & PF_MEMALLOC));
ceph_put_snap_context(oldest);
- redirty_page_for_writepage(wbc, page);
+ folio_redirty_for_writepage(wbc, folio);
return 0;
}
ceph_put_snap_context(oldest);
@@ -726,8 +727,8 @@ static int writepage_nounlock(struct page *page, struct writeback_control *wbc)
len = ceph_wbc.i_size - page_off;
wlen = IS_ENCRYPTED(inode) ? round_up(len, CEPH_FSCRYPT_BLOCK_SIZE) : len;
- doutc(cl, "%llx.%llx page %p index %lu on %llu~%llu snapc %p seq %lld\n",
- ceph_vinop(inode), page, page->index, page_off, wlen, snapc,
+ doutc(cl, "%llx.%llx folio %p index %lu on %llu~%llu snapc %p seq %lld\n",
+ ceph_vinop(inode), folio, folio->index, page_off, wlen, snapc,
snapc->seq);
if (atomic_long_inc_return(&fsc->writeback_count) >
@@ -740,32 +741,32 @@ static int writepage_nounlock(struct page *page, struct writeback_control *wbc)
ceph_wbc.truncate_seq,
ceph_wbc.truncate_size, true);
if (IS_ERR(req)) {
- redirty_page_for_writepage(wbc, page);
+ folio_redirty_for_writepage(wbc, folio);
return PTR_ERR(req);
}
if (wlen < len)
len = wlen;
- set_page_writeback(page);
+ folio_start_writeback(folio);
if (caching)
- ceph_set_page_fscache(page);
+ ceph_set_page_fscache(&folio->page);
ceph_fscache_write_to_cache(inode, page_off, len, caching);
if (IS_ENCRYPTED(inode)) {
- bounce_page = fscrypt_encrypt_pagecache_blocks(page,
+ bounce_page = fscrypt_encrypt_pagecache_blocks(&folio->page,
CEPH_FSCRYPT_BLOCK_SIZE, 0,
GFP_NOFS);
if (IS_ERR(bounce_page)) {
- redirty_page_for_writepage(wbc, page);
- end_page_writeback(page);
+ folio_redirty_for_writepage(wbc, folio);
+ folio_end_writeback(folio);
ceph_osdc_put_request(req);
return PTR_ERR(bounce_page);
}
}
/* it may be a short write due to an object boundary */
- WARN_ON_ONCE(len > thp_size(page));
+ WARN_ON_ONCE(len > folio_size(folio));
osd_req_op_extent_osd_data_pages(req, 0,
bounce_page ? &bounce_page : &page, wlen, 0,
false, false);
@@ -791,25 +792,25 @@ static int writepage_nounlock(struct page *page, struct writeback_control *wbc)
if (err == -ERESTARTSYS) {
/* killed by SIGKILL */
doutc(cl, "%llx.%llx interrupted page %p\n",
- ceph_vinop(inode), page);
- redirty_page_for_writepage(wbc, page);
- end_page_writeback(page);
+ ceph_vinop(inode), folio);
+ folio_redirty_for_writepage(wbc, folio);
+ folio_end_writeback(folio);
return err;
}
if (err == -EBLOCKLISTED)
fsc->blocklisted = true;
- doutc(cl, "%llx.%llx setting page/mapping error %d %p\n",
- ceph_vinop(inode), err, page);
+ doutc(cl, "%llx.%llx setting mapping error %d %p\n",
+ ceph_vinop(inode), err, folio);
mapping_set_error(&inode->i_data, err);
wbc->pages_skipped++;
} else {
doutc(cl, "%llx.%llx cleaned page %p\n",
- ceph_vinop(inode), page);
+ ceph_vinop(inode), folio);
err = 0; /* vfs expects us to return 0 */
}
- oldest = detach_page_private(page);
+ oldest = folio_detach_private(folio);
WARN_ON_ONCE(oldest != snapc);
- end_page_writeback(page);
+ folio_end_writeback(folio);
ceph_put_wrbuffer_cap_refs(ci, 1, snapc);
ceph_put_snap_context(snapc); /* page's reference */
@@ -1473,7 +1474,7 @@ ceph_find_incompatible(struct folio *folio)
doutc(cl, " %llx.%llx folio %p snapc %p not current, but oldest\n",
ceph_vinop(inode), folio, snapc);
if (folio_clear_dirty_for_io(folio)) {
- int r = writepage_nounlock(&folio->page, NULL);
+ int r = write_folio_nounlock(folio, NULL);
if (r < 0)
return ERR_PTR(r);
}
--
2.47.2
^ permalink raw reply related [flat|nested] 12+ messages in thread* [PATCH v2 7/7] ceph: Use a folio in ceph_writepages_start()
2025-02-14 15:57 [PATCH v2 0/7] Remove accesses to page->index from ceph Matthew Wilcox (Oracle)
` (5 preceding siblings ...)
2025-02-14 15:57 ` [PATCH v2 6/7] ceph: Convert writepage_nounlock() to write_folio_nounlock() Matthew Wilcox (Oracle)
@ 2025-02-14 15:57 ` Matthew Wilcox (Oracle)
6 siblings, 0 replies; 12+ messages in thread
From: Matthew Wilcox (Oracle) @ 2025-02-14 15:57 UTC (permalink / raw)
To: Ilya Dryomov
Cc: Matthew Wilcox (Oracle), ceph-devel, linux-fsdevel, David Howells
We currently convert the folio returned from filemap_get_folios_tag()
to a page and operate on that page. Remove this and operate on the
folio. Removes a lot of calls to obsolete functions and references
to page->index and page->mapping.
Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
---
fs/ceph/addr.c | 63 +++++++++++++++++++++++++-------------------------
1 file changed, 31 insertions(+), 32 deletions(-)
diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c
index 822485db234e..a97a3eee426b 100644
--- a/fs/ceph/addr.c
+++ b/fs/ceph/addr.c
@@ -1025,7 +1025,7 @@ static int ceph_writepages_start(struct address_space *mapping,
int num_ops = 0, op_idx;
unsigned i, nr_folios, max_pages, locked_pages = 0;
struct page **pages = NULL, **data_pages;
- struct page *page;
+ struct folio *folio;
pgoff_t strip_unit_end = 0;
u64 offset = 0, len = 0;
bool from_pool = false;
@@ -1039,24 +1039,23 @@ static int ceph_writepages_start(struct address_space *mapping,
if (!nr_folios && !locked_pages)
break;
for (i = 0; i < nr_folios && locked_pages < max_pages; i++) {
- struct folio *folio = fbatch.folios[i];
+ folio = fbatch.folios[i];
- page = &folio->page;
- doutc(cl, "? %p idx %lu\n", page, page->index);
+ doutc(cl, "? %p idx %lu\n", folio, folio->index);
if (locked_pages == 0)
- lock_page(page); /* first page */
- else if (!trylock_page(page))
+ folio_lock(folio); /* first page */
+ else if (!folio_trylock(folio))
break;
/* only dirty pages, or our accounting breaks */
- if (unlikely(!PageDirty(page)) ||
- unlikely(page->mapping != mapping)) {
- doutc(cl, "!dirty or !mapping %p\n", page);
- unlock_page(page);
+ if (unlikely(!folio_test_dirty(folio)) ||
+ unlikely(folio->mapping != mapping)) {
+ doutc(cl, "!dirty or !mapping %p\n", folio);
+ folio_unlock(folio);
continue;
}
/* only if matching snap context */
- pgsnapc = page_snap_context(page);
+ pgsnapc = page_snap_context(&folio->page);
if (pgsnapc != snapc) {
doutc(cl, "page snapc %p %lld != oldest %p %lld\n",
pgsnapc, pgsnapc->seq, snapc, snapc->seq);
@@ -1064,10 +1063,10 @@ static int ceph_writepages_start(struct address_space *mapping,
!ceph_wbc.head_snapc &&
wbc->sync_mode != WB_SYNC_NONE)
should_loop = true;
- unlock_page(page);
+ folio_unlock(folio);
continue;
}
- if (page_offset(page) >= ceph_wbc.i_size) {
+ if (folio_pos(folio) >= ceph_wbc.i_size) {
doutc(cl, "folio at %lu beyond eof %llu\n",
folio->index, ceph_wbc.i_size);
if ((ceph_wbc.size_stable ||
@@ -1078,9 +1077,9 @@ static int ceph_writepages_start(struct address_space *mapping,
folio_unlock(folio);
continue;
}
- if (strip_unit_end && (page->index > strip_unit_end)) {
- doutc(cl, "end of strip unit %p\n", page);
- unlock_page(page);
+ if (strip_unit_end && (folio->index > strip_unit_end)) {
+ doutc(cl, "end of strip unit %p\n", folio);
+ folio_unlock(folio);
break;
}
if (folio_test_writeback(folio) ||
@@ -1095,9 +1094,9 @@ static int ceph_writepages_start(struct address_space *mapping,
folio_wait_private_2(folio); /* [DEPRECATED] */
}
- if (!clear_page_dirty_for_io(page)) {
- doutc(cl, "%p !clear_page_dirty_for_io\n", page);
- unlock_page(page);
+ if (!folio_clear_dirty_for_io(folio)) {
+ doutc(cl, "%p !clear_page_dirty_for_io\n", folio);
+ folio_unlock(folio);
continue;
}
@@ -1113,7 +1112,7 @@ static int ceph_writepages_start(struct address_space *mapping,
u32 xlen;
/* prepare async write request */
- offset = (u64)page_offset(page);
+ offset = folio_pos(folio);
ceph_calc_file_object_mapping(&ci->i_layout,
offset, wsize,
&objnum, &objoff,
@@ -1121,7 +1120,7 @@ static int ceph_writepages_start(struct address_space *mapping,
len = xlen;
num_ops = 1;
- strip_unit_end = page->index +
+ strip_unit_end = folio->index +
((len - 1) >> PAGE_SHIFT);
BUG_ON(pages);
@@ -1136,23 +1135,23 @@ static int ceph_writepages_start(struct address_space *mapping,
}
len = 0;
- } else if (page->index !=
+ } else if (folio->index !=
(offset + len) >> PAGE_SHIFT) {
if (num_ops >= (from_pool ? CEPH_OSD_SLAB_OPS :
CEPH_OSD_MAX_OPS)) {
- redirty_page_for_writepage(wbc, page);
- unlock_page(page);
+ folio_redirty_for_writepage(wbc, folio);
+ folio_unlock(folio);
break;
}
num_ops++;
- offset = (u64)page_offset(page);
+ offset = folio_pos(folio);
len = 0;
}
/* note position of first page in fbatch */
doutc(cl, "%llx.%llx will write page %p idx %lu\n",
- ceph_vinop(inode), page, page->index);
+ ceph_vinop(inode), folio, folio->index);
if (atomic_long_inc_return(&fsc->writeback_count) >
CONGESTION_ON_THRESH(
@@ -1161,7 +1160,7 @@ static int ceph_writepages_start(struct address_space *mapping,
if (IS_ENCRYPTED(inode)) {
pages[locked_pages] =
- fscrypt_encrypt_pagecache_blocks(page,
+ fscrypt_encrypt_pagecache_blocks(&folio->page,
PAGE_SIZE, 0,
locked_pages ? GFP_NOWAIT : GFP_NOFS);
if (IS_ERR(pages[locked_pages])) {
@@ -1172,17 +1171,17 @@ static int ceph_writepages_start(struct address_space *mapping,
/* better not fail on first page! */
BUG_ON(locked_pages == 0);
pages[locked_pages] = NULL;
- redirty_page_for_writepage(wbc, page);
- unlock_page(page);
+ folio_redirty_for_writepage(wbc, folio);
+ folio_unlock(folio);
break;
}
++locked_pages;
} else {
- pages[locked_pages++] = page;
+ pages[locked_pages++] = &folio->page;
}
fbatch.folios[i] = NULL;
- len += thp_size(page);
+ len += folio_size(folio);
}
/* did we get anything? */
@@ -1289,7 +1288,7 @@ static int ceph_writepages_start(struct address_space *mapping,
/* writepages_finish() clears writeback pages
* according to the data length, so make sure
* data length covers all locked pages */
- u64 min_len = len + 1 - thp_size(page);
+ u64 min_len = len + 1 - folio_size(folio);
len = get_writepages_data_length(inode, pages[i - 1],
offset);
len = max(len, min_len);
--
2.47.2
^ permalink raw reply related [flat|nested] 12+ messages in thread