* [PATCH 1/3] ceph: Make ceph_netfs_issue_op() handle inlined data (untested) @ 2022-01-17 16:26 David Howells 2022-01-17 16:26 ` [PATCH 2/3] ceph: Uninline the data on a file opened for writing David Howells 2022-01-17 16:26 ` [PATCH 3/3] ceph: Remove some other inline-setting bits David Howells 0 siblings, 2 replies; 9+ messages in thread From: David Howells @ 2022-01-17 16:26 UTC (permalink / raw) To: ceph-devel; +Cc: dhowells, jlayton, linux-fsdevel Mak ceph_netfs_issue_op() handle inlined data on page 0. The code that's upstream *ought* to be doing this in ceph_readpage() as the page isn't pinned and could get discarded under memory pressure from what can be seen. Signed-off-by: David Howells <dhowells@redhat.com> cc: ceph-devel@vger.kernel.org --- fs/ceph/addr.c | 79 ++++++++++++++++++++++++++++++++++++++++++++------------ 1 file changed, 62 insertions(+), 17 deletions(-) diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c index c98e5238a1b6..11273108e924 100644 --- a/fs/ceph/addr.c +++ b/fs/ceph/addr.c @@ -245,6 +245,61 @@ static void finish_netfs_read(struct ceph_osd_request *req) iput(req->r_inode); } +static bool ceph_netfs_issue_op_inline(struct netfs_read_subrequest *subreq) +{ + struct netfs_read_request *rreq = subreq->rreq; + struct inode *inode = rreq->inode; + struct ceph_mds_reply_info_parsed *rinfo; + struct ceph_mds_reply_info_in *iinfo; + struct ceph_mds_request *req; + struct ceph_mds_client *mdsc = ceph_sb_to_mdsc(inode->i_sb); + struct ceph_inode_info *ci = ceph_inode(inode); + struct iov_iter iter; + ssize_t err = 0; + size_t len; + + __set_bit(NETFS_SREQ_CLEAR_TAIL, &subreq->flags); + __clear_bit(NETFS_SREQ_WRITE_TO_CACHE, &subreq->flags); + + if (subreq->start >= inode->i_size || subreq->start >= 4096) + goto out; + + /* We need to fetch the inline data. */ + req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_GETATTR, USE_ANY_MDS); + if (IS_ERR(req)) { + err = PTR_ERR(req); + goto out; + } + req->r_ino1 = ci->i_vino; + req->r_args.getattr.mask = cpu_to_le32(CEPH_STAT_CAP_INLINE_DATA); + req->r_num_caps = 2; + + err = ceph_mdsc_do_request(mdsc, NULL, req); + if (err < 0) + goto out; + + rinfo = &req->r_reply_info; + iinfo = &rinfo->targeti; + if (iinfo->inline_version == CEPH_INLINE_NONE) { + /* The data got uninlined */ + ceph_mdsc_put_request(req); + return false; + } + + len = min_t(size_t, 4096 - subreq->start, iinfo->inline_len); + iov_iter_xarray(&iter, READ, &rreq->mapping->i_pages, subreq->start, len); + + err = copy_to_iter(iinfo->inline_data, len, &iter); + if (err == 0) + err = -EFAULT; + + ceph_mdsc_put_request(req); + +out: + netfs_subreq_terminated(subreq, err, false); + return true; +} + static void ceph_netfs_issue_op(struct netfs_read_subrequest *subreq) { struct netfs_read_request *rreq = subreq->rreq; @@ -259,6 +314,10 @@ static void ceph_netfs_issue_op(struct netfs_read_subrequest *subreq) int err = 0; u64 len = subreq->len; + if (ci->i_inline_version != CEPH_INLINE_NONE && + ceph_netfs_issue_op_inline(subreq)) + return; + req = ceph_osdc_new_request(&fsc->client->osdc, &ci->i_layout, vino, subreq->start, &len, 0, 1, CEPH_OSD_OP_READ, CEPH_OSD_FLAG_READ | fsc->client->osdc.client->options->read_from_replica, @@ -327,23 +386,9 @@ static int ceph_readpage(struct file *file, struct page *subpage) size_t len = folio_size(folio); u64 off = folio_file_pos(folio); - if (ci->i_inline_version != CEPH_INLINE_NONE) { - /* - * Uptodate inline data should have been added - * into page cache while getting Fcr caps. - */ - if (off == 0) { - folio_unlock(folio); - return -EINVAL; - } - zero_user_segment(&folio->page, 0, folio_size(folio)); - folio_mark_uptodate(folio); - folio_unlock(folio); - return 0; - } - - dout("readpage ino %llx.%llx file %p off %llu len %zu folio %p index %lu\n", - vino.ino, vino.snap, file, off, len, folio, folio_index(folio)); + if (ci->i_inline_version == CEPH_INLINE_NONE) + dout("readpage ino %llx.%llx file %p off %llu len %zu folio %p index %lu\n", + vino.ino, vino.snap, file, off, len, folio, folio_index(folio)); return netfs_readpage(file, folio, &ceph_netfs_read_ops, NULL); } ^ permalink raw reply related [flat|nested] 9+ messages in thread
* [PATCH 2/3] ceph: Uninline the data on a file opened for writing 2022-01-17 16:26 [PATCH 1/3] ceph: Make ceph_netfs_issue_op() handle inlined data (untested) David Howells @ 2022-01-17 16:26 ` David Howells 2022-01-17 16:47 ` Matthew Wilcox 2022-01-17 16:59 ` David Howells 2022-01-17 16:26 ` [PATCH 3/3] ceph: Remove some other inline-setting bits David Howells 1 sibling, 2 replies; 9+ messages in thread From: David Howells @ 2022-01-17 16:26 UTC (permalink / raw) To: ceph-devel; +Cc: dhowells, jlayton, linux-fsdevel If a ceph file is made up of inline data, uninline that in the ceph_open() rather than in ceph_page_mkwrite(), ceph_write_iter(), ceph_fallocate() or ceph_write_begin(). This makes it easier to convert to using the netfs library for VM write hooks. Changes ======= ver #3: - Move the patch to make ceph_netfs_issue_op() before this one so that reading inline data is handled there. - Call read_mapping_folio() to allocate and cause the page to be read instead of reading inline data directly there. - Assume that we can just dump the uninlined data into the pagecache, no matter the caps. ver #2: - Removed the uninline-handling code from ceph_write_begin() also. Signed-off-by: David Howells <dhowells@redhat.com> cc: ceph-devel@vger.kernel.org --- fs/ceph/addr.c | 135 +++++++++++++------------------------------------------ fs/ceph/file.c | 28 +++++++---- fs/ceph/super.h | 2 - 3 files changed, 50 insertions(+), 115 deletions(-) diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c index 11273108e924..10837587f7db 100644 --- a/fs/ceph/addr.c +++ b/fs/ceph/addr.c @@ -1319,45 +1319,11 @@ static int ceph_write_begin(struct file *file, struct address_space *mapping, struct page **pagep, void **fsdata) { struct inode *inode = file_inode(file); - struct ceph_inode_info *ci = ceph_inode(inode); struct folio *folio = NULL; - pgoff_t index = pos >> PAGE_SHIFT; int r; - /* - * Uninlining should have already been done and everything updated, EXCEPT - * for inline_version sent to the MDS. - */ - if (ci->i_inline_version != CEPH_INLINE_NONE) { - unsigned int fgp_flags = FGP_LOCK | FGP_WRITE | FGP_CREAT | FGP_STABLE; - if (aop_flags & AOP_FLAG_NOFS) - fgp_flags |= FGP_NOFS; - folio = __filemap_get_folio(mapping, index, fgp_flags, - mapping_gfp_mask(mapping)); - if (!folio) - return -ENOMEM; - - /* - * The inline_version on a new inode is set to 1. If that's the - * case, then the folio is brand new and isn't yet Uptodate. - */ - r = 0; - if (index == 0 && ci->i_inline_version != 1) { - if (!folio_test_uptodate(folio)) { - WARN_ONCE(1, "ceph: write_begin called on still-inlined inode (inline_version %llu)!\n", - ci->i_inline_version); - r = -EINVAL; - } - goto out; - } - zero_user_segment(&folio->page, 0, folio_size(folio)); - folio_mark_uptodate(folio); - goto out; - } - r = netfs_write_begin(file, inode->i_mapping, pos, len, 0, &folio, NULL, &ceph_netfs_read_ops, NULL); -out: if (r == 0) folio_wait_fscache(folio); if (r < 0) { @@ -1553,19 +1519,6 @@ static vm_fault_t ceph_page_mkwrite(struct vm_fault *vmf) sb_start_pagefault(inode->i_sb); ceph_block_sigs(&oldset); - if (ci->i_inline_version != CEPH_INLINE_NONE) { - struct page *locked_page = NULL; - if (off == 0) { - lock_page(page); - locked_page = page; - } - err = ceph_uninline_data(vma->vm_file, locked_page); - if (locked_page) - unlock_page(locked_page); - if (err < 0) - goto out_free; - } - if (off + thp_size(page) <= size) len = thp_size(page); else @@ -1690,16 +1643,16 @@ void ceph_fill_inline_data(struct inode *inode, struct page *locked_page, } } -int ceph_uninline_data(struct file *filp, struct page *locked_page) +int ceph_uninline_data(struct file *file) { - struct inode *inode = file_inode(filp); + struct inode *inode = file_inode(file); struct ceph_inode_info *ci = ceph_inode(inode); struct ceph_fs_client *fsc = ceph_inode_to_client(inode); struct ceph_osd_request *req; - struct page *page = NULL; + struct folio *folio = NULL; + struct page *pages[1]; u64 len, inline_version; int err = 0; - bool from_pagecache = false; spin_lock(&ci->i_ceph_lock); inline_version = ci->i_inline_version; @@ -1712,43 +1665,24 @@ int ceph_uninline_data(struct file *filp, struct page *locked_page) inline_version == CEPH_INLINE_NONE) goto out; - if (locked_page) { - page = locked_page; - WARN_ON(!PageUptodate(page)); - } else if (ceph_caps_issued(ci) & - (CEPH_CAP_FILE_CACHE|CEPH_CAP_FILE_LAZYIO)) { - page = find_get_page(inode->i_mapping, 0); - if (page) { - if (PageUptodate(page)) { - from_pagecache = true; - lock_page(page); - } else { - put_page(page); - page = NULL; - } - } - } + folio = read_mapping_folio(inode->i_mapping, 0, file); + if (IS_ERR(folio)) + goto out; - if (page) { - len = i_size_read(inode); - if (len > PAGE_SIZE) - len = PAGE_SIZE; - } else { - page = __page_cache_alloc(GFP_NOFS); - if (!page) { - err = -ENOMEM; - goto out; - } - err = __ceph_do_getattr(inode, page, - CEPH_STAT_CAP_INLINE_DATA, true); - if (err < 0) { - /* no inline data */ - if (err == -ENODATA) - err = 0; - goto out; - } - len = err; - } + if (folio_test_uptodate(folio)) + goto out_put_folio; + + err = folio_lock_killable(folio); + if (err < 0) + goto out_put_folio; + + if (inline_version == 1 || /* initial version, no data */ + inline_version == CEPH_INLINE_NONE) + goto out_unlock; + + len = i_size_read(inode); + if (len > folio_size(folio)) + len = folio_size(folio); req = ceph_osdc_new_request(&fsc->client->osdc, &ci->i_layout, ceph_vino(inode), 0, &len, 0, 1, @@ -1756,7 +1690,7 @@ int ceph_uninline_data(struct file *filp, struct page *locked_page) NULL, 0, 0, false); if (IS_ERR(req)) { err = PTR_ERR(req); - goto out; + goto out_unlock; } req->r_mtime = inode->i_mtime; @@ -1765,7 +1699,7 @@ int ceph_uninline_data(struct file *filp, struct page *locked_page) err = ceph_osdc_wait_request(&fsc->client->osdc, req); ceph_osdc_put_request(req); if (err < 0) - goto out; + goto out_unlock; req = ceph_osdc_new_request(&fsc->client->osdc, &ci->i_layout, ceph_vino(inode), 0, &len, 1, 3, @@ -1774,10 +1708,11 @@ int ceph_uninline_data(struct file *filp, struct page *locked_page) ci->i_truncate_size, false); if (IS_ERR(req)) { err = PTR_ERR(req); - goto out; + goto out_unlock; } - osd_req_op_extent_osd_data_pages(req, 1, &page, len, 0, false, false); + pages[0] = folio_page(folio, 0); + osd_req_op_extent_osd_data_pages(req, 1, pages, len, 0, false, false); { __le64 xattr_buf = cpu_to_le64(inline_version); @@ -1787,7 +1722,7 @@ int ceph_uninline_data(struct file *filp, struct page *locked_page) CEPH_OSD_CMPXATTR_OP_GT, CEPH_OSD_CMPXATTR_MODE_U64); if (err) - goto out_put; + goto out_put_req; } { @@ -1798,7 +1733,7 @@ int ceph_uninline_data(struct file *filp, struct page *locked_page) "inline_version", xattr_buf, xattr_len, 0, 0); if (err) - goto out_put; + goto out_put_req; } req->r_mtime = inode->i_mtime; @@ -1809,19 +1744,15 @@ int ceph_uninline_data(struct file *filp, struct page *locked_page) ceph_update_write_metrics(&fsc->mdsc->metric, req->r_start_latency, req->r_end_latency, len, err); -out_put: +out_put_req: ceph_osdc_put_request(req); if (err == -ECANCELED) err = 0; +out_unlock: + folio_unlock(folio); +out_put_folio: + folio_put(folio); out: - if (page && page != locked_page) { - if (from_pagecache) { - unlock_page(page); - put_page(page); - } else - __free_pages(page, 0); - } - dout("uninline_data %p %llx.%llx inline_version %llu = %d\n", inode, ceph_vinop(inode), inline_version, err); return err; diff --git a/fs/ceph/file.c b/fs/ceph/file.c index 9d9304e712d9..d1d28220f691 100644 --- a/fs/ceph/file.c +++ b/fs/ceph/file.c @@ -205,6 +205,7 @@ static int ceph_init_file_info(struct inode *inode, struct file *file, { struct ceph_inode_info *ci = ceph_inode(inode); struct ceph_file_info *fi; + int ret; dout("%s %p %p 0%o (%s)\n", __func__, inode, file, inode->i_mode, isdir ? "dir" : "regular"); @@ -235,7 +236,22 @@ static int ceph_init_file_info(struct inode *inode, struct file *file, INIT_LIST_HEAD(&fi->rw_contexts); fi->filp_gen = READ_ONCE(ceph_inode_to_client(inode)->filp_gen); + if ((file->f_mode & FMODE_WRITE) && + ci->i_inline_version != CEPH_INLINE_NONE) { + ret = ceph_uninline_data(file); + if (ret < 0) + goto error; + } + return 0; + +error: + ceph_fscache_unuse_cookie(inode, file->f_mode & FMODE_WRITE); + ceph_put_fmode(ci, fi->fmode, 1); + kmem_cache_free(ceph_file_cachep, fi); + /* wake up anyone waiting for caps on this inode */ + wake_up_all(&ci->i_cap_wq); + return ret; } /* @@ -1763,12 +1779,6 @@ static ssize_t ceph_write_iter(struct kiocb *iocb, struct iov_iter *from) if (err) goto out; - if (ci->i_inline_version != CEPH_INLINE_NONE) { - err = ceph_uninline_data(file, NULL); - if (err < 0) - goto out; - } - dout("aio_write %p %llx.%llx %llu~%zd getting caps. i_size %llu\n", inode, ceph_vinop(inode), pos, count, i_size_read(inode)); if (fi->fmode & CEPH_FILE_MODE_LAZY) @@ -2094,12 +2104,6 @@ static long ceph_fallocate(struct file *file, int mode, goto unlock; } - if (ci->i_inline_version != CEPH_INLINE_NONE) { - ret = ceph_uninline_data(file, NULL); - if (ret < 0) - goto unlock; - } - size = i_size_read(inode); /* Are we punching a hole beyond EOF? */ diff --git a/fs/ceph/super.h b/fs/ceph/super.h index d0142cc5c41b..f1cec05e4eb8 100644 --- a/fs/ceph/super.h +++ b/fs/ceph/super.h @@ -1207,7 +1207,7 @@ extern void __ceph_touch_fmode(struct ceph_inode_info *ci, /* addr.c */ extern const struct address_space_operations ceph_aops; extern int ceph_mmap(struct file *file, struct vm_area_struct *vma); -extern int ceph_uninline_data(struct file *filp, struct page *locked_page); +extern int ceph_uninline_data(struct file *file); extern int ceph_pool_perm_check(struct inode *inode, int need); extern void ceph_pool_perm_destroy(struct ceph_mds_client* mdsc); int ceph_purge_inode_cap(struct inode *inode, struct ceph_cap *cap, bool *invalidate); ^ permalink raw reply related [flat|nested] 9+ messages in thread
* Re: [PATCH 2/3] ceph: Uninline the data on a file opened for writing 2022-01-17 16:26 ` [PATCH 2/3] ceph: Uninline the data on a file opened for writing David Howells @ 2022-01-17 16:47 ` Matthew Wilcox 2022-01-21 11:09 ` Jeff Layton 2022-01-17 16:59 ` David Howells 1 sibling, 1 reply; 9+ messages in thread From: Matthew Wilcox @ 2022-01-17 16:47 UTC (permalink / raw) To: David Howells; +Cc: ceph-devel, jlayton, linux-fsdevel On Mon, Jan 17, 2022 at 04:26:36PM +0000, David Howells wrote: > + folio = read_mapping_folio(inode->i_mapping, 0, file); > + if (IS_ERR(folio)) > + goto out; ... you need to set 'err' here, right? > + if (folio_test_uptodate(folio)) > + goto out_put_folio; Er ... if (!folio_test_uptodate(folio)), perhaps? And is it even worth testing if read_mapping_folio() returned success? I feel like we should take ->readpage()'s word for it that success means the folio is now uptodate. > + err = folio_lock_killable(folio); > + if (err < 0) > + goto out_put_folio; > + > + if (inline_version == 1 || /* initial version, no data */ > + inline_version == CEPH_INLINE_NONE) > + goto out_unlock; > + > + len = i_size_read(inode); > + if (len > folio_size(folio)) extra space. Plus, you're hardcoding 4096 below, but using folio_size() here which is a bit weird to me. ^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [PATCH 2/3] ceph: Uninline the data on a file opened for writing 2022-01-17 16:47 ` Matthew Wilcox @ 2022-01-21 11:09 ` Jeff Layton 0 siblings, 0 replies; 9+ messages in thread From: Jeff Layton @ 2022-01-21 11:09 UTC (permalink / raw) To: Matthew Wilcox, David Howells; +Cc: ceph-devel, linux-fsdevel On Mon, 2022-01-17 at 16:47 +0000, Matthew Wilcox wrote: > On Mon, Jan 17, 2022 at 04:26:36PM +0000, David Howells wrote: > > + folio = read_mapping_folio(inode->i_mapping, 0, file); > > + if (IS_ERR(folio)) > > + goto out; > > ... you need to set 'err' here, right? > > > + if (folio_test_uptodate(folio)) > > + goto out_put_folio; > > Er ... if (!folio_test_uptodate(folio)), perhaps? And is it even > worth testing if read_mapping_folio() returned success? I feel like > we should take ->readpage()'s word for it that success means the > folio is now uptodate. > > > + err = folio_lock_killable(folio); > > + if (err < 0) > > + goto out_put_folio; > > + > > + if (inline_version == 1 || /* initial version, no data */ > > + inline_version == CEPH_INLINE_NONE) > > + goto out_unlock; > > + > > + len = i_size_read(inode); > > + if (len > folio_size(folio)) > > extra space. Plus, you're hardcoding 4096 below, but using folio_size() > here which is a bit weird to me. The client actually decides how big a region to inline when it does this. The default is 4k, but someone could inline up to 64k (and potentially larger, if they tweak settings the right way). I'd suggest not capping the length in this function at all. If you find more than 4k, just assume that some other client stashed more data than expected, and uninline whatever is there. You might also consider throwing in a pr_warn or something in that case, since finding more than 4k uninlined would be unexpected (though not necessarily broken). -- Jeff Layton <jlayton@kernel.org> ^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [PATCH 2/3] ceph: Uninline the data on a file opened for writing 2022-01-17 16:26 ` [PATCH 2/3] ceph: Uninline the data on a file opened for writing David Howells 2022-01-17 16:47 ` Matthew Wilcox @ 2022-01-17 16:59 ` David Howells 2022-01-17 17:15 ` Jeff Layton ` (2 more replies) 1 sibling, 3 replies; 9+ messages in thread From: David Howells @ 2022-01-17 16:59 UTC (permalink / raw) To: Matthew Wilcox; +Cc: dhowells, ceph-devel, jlayton, linux-fsdevel Matthew Wilcox <willy@infradead.org> wrote: > > + if (folio_test_uptodate(folio)) > > + goto out_put_folio; > > Er ... if (!folio_test_uptodate(folio)), perhaps? And is it even > worth testing if read_mapping_folio() returned success? I feel like > we should take ->readpage()'s word for it that success means the > folio is now uptodate. Actually, no, I shouldn't need to do this since it comes out with the page lock still held. > > + len = i_size_read(inode); > > + if (len > folio_size(folio)) > > extra space. Plus, you're hardcoding 4096 below, but using folio_size() > here which is a bit weird to me. As I understand it, 4096 is the maximum length of the inline data, not PAGE_SIZE, so I have to be careful when doing a DIO read because it might start after the data - and there's also truncate to consider:-/ I wonder if the uninlining code should lock the inode while it does it and the truncation code should do uninlining too. David ^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [PATCH 2/3] ceph: Uninline the data on a file opened for writing 2022-01-17 16:59 ` David Howells @ 2022-01-17 17:15 ` Jeff Layton 2022-01-17 17:50 ` Matthew Wilcox 2022-01-21 14:03 ` Jeff Layton 2 siblings, 0 replies; 9+ messages in thread From: Jeff Layton @ 2022-01-17 17:15 UTC (permalink / raw) To: David Howells, Matthew Wilcox; +Cc: ceph-devel, linux-fsdevel On Mon, 2022-01-17 at 16:59 +0000, David Howells wrote: > Matthew Wilcox <willy@infradead.org> wrote: > > > > + if (folio_test_uptodate(folio)) > > > + goto out_put_folio; > > > > Er ... if (!folio_test_uptodate(folio)), perhaps? And is it even > > worth testing if read_mapping_folio() returned success? I feel like > > we should take ->readpage()'s word for it that success means the > > folio is now uptodate. > > Actually, no, I shouldn't need to do this since it comes out with the page > lock still held. > > > > + len = i_size_read(inode); > > > + if (len > folio_size(folio)) > > > > extra space. Plus, you're hardcoding 4096 below, but using folio_size() > > here which is a bit weird to me. > > As I understand it, 4096 is the maximum length of the inline data, not > PAGE_SIZE, so I have to be careful when doing a DIO read because it might > start after the data - and there's also truncate to consider:-/ > The default is 4k for the userland client, and the kernel client had it hardcoded at 4k (though it seemed to swap in PAGE_SIZE in random places in the code). I think the MDS allows the client to inline an arbitrary size of data but there are probably some limits I don't see. I have no idea how the client is intended to discover this value... The whole inlining feature was somewhat half-baked, IMNSHO. > I wonder if the uninlining code should lock the inode while it does it and the > truncation code should do uninlining too. > Probably. This code is on the way out of ceph and (eventually) the kernel, so I'm not inclined to worry too much about subtle bugs in here. -- Jeff Layton <jlayton@kernel.org> ^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [PATCH 2/3] ceph: Uninline the data on a file opened for writing 2022-01-17 16:59 ` David Howells 2022-01-17 17:15 ` Jeff Layton @ 2022-01-17 17:50 ` Matthew Wilcox 2022-01-21 14:03 ` Jeff Layton 2 siblings, 0 replies; 9+ messages in thread From: Matthew Wilcox @ 2022-01-17 17:50 UTC (permalink / raw) To: David Howells; +Cc: ceph-devel, jlayton, linux-fsdevel On Mon, Jan 17, 2022 at 04:59:35PM +0000, David Howells wrote: > Matthew Wilcox <willy@infradead.org> wrote: > > > > + if (folio_test_uptodate(folio)) > > > + goto out_put_folio; > > > > Er ... if (!folio_test_uptodate(folio)), perhaps? And is it even > > worth testing if read_mapping_folio() returned success? I feel like > > we should take ->readpage()'s word for it that success means the > > folio is now uptodate. > > Actually, no, I shouldn't need to do this since it comes out with the page > lock still held. No. The page is unlocked upon IO completion. After calling ->readpage, folio_wait_locked(folio); ^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [PATCH 2/3] ceph: Uninline the data on a file opened for writing 2022-01-17 16:59 ` David Howells 2022-01-17 17:15 ` Jeff Layton 2022-01-17 17:50 ` Matthew Wilcox @ 2022-01-21 14:03 ` Jeff Layton 2 siblings, 0 replies; 9+ messages in thread From: Jeff Layton @ 2022-01-21 14:03 UTC (permalink / raw) To: David Howells, Matthew Wilcox; +Cc: ceph-devel, linux-fsdevel On Mon, 2022-01-17 at 16:59 +0000, David Howells wrote: > Matthew Wilcox <willy@infradead.org> wrote: > > > > + if (folio_test_uptodate(folio)) > > > + goto out_put_folio; > > > > Er ... if (!folio_test_uptodate(folio)), perhaps? And is it even > > worth testing if read_mapping_folio() returned success? I feel like > > we should take ->readpage()'s word for it that success means the > > folio is now uptodate. > > Actually, no, I shouldn't need to do this since it comes out with the page > lock still held. > > > > + len = i_size_read(inode); > > > + if (len > folio_size(folio)) > > > > extra space. Plus, you're hardcoding 4096 below, but using folio_size() > > here which is a bit weird to me. > > As I understand it, 4096 is the maximum length of the inline data, not > PAGE_SIZE, so I have to be careful when doing a DIO read because it might > start after the data - and there's also truncate to consider:-/ > > I wonder if the uninlining code should lock the inode while it does it and the > truncation code should do uninlining too. > It probably should have done uninlining on truncate(). OTOH, the old code never did this, so I'm not inclined to add that in. I'd feel differently if we were looking to keep the inline_data as a feature, long-term, but we aren't. The plan is to keep this feature limping along in the kclient until the last ceph release that supports inline_data is no longer supported in the field, at which point we'll rip support out altogether. We officially marked it deprecated in the Octopus release, and I was hoping to remove it from ceph in Quincy (which should ship this spring). We still need the cephfs-scrub utility to walk the fs and uninline any files that are still inlined so we can support people on upgrades. That work isn't finished yet. See: https://tracker.ceph.com/issues/52916 -- Jeff Layton <jlayton@kernel.org> ^ permalink raw reply [flat|nested] 9+ messages in thread
* [PATCH 3/3] ceph: Remove some other inline-setting bits 2022-01-17 16:26 [PATCH 1/3] ceph: Make ceph_netfs_issue_op() handle inlined data (untested) David Howells 2022-01-17 16:26 ` [PATCH 2/3] ceph: Uninline the data on a file opened for writing David Howells @ 2022-01-17 16:26 ` David Howells 1 sibling, 0 replies; 9+ messages in thread From: David Howells @ 2022-01-17 16:26 UTC (permalink / raw) To: ceph-devel; +Cc: dhowells, jlayton, linux-fsdevel Remove some other bits where a ceph file can't be inline because we uninlined it when we opened it for writing. Signed-off-by: David Howells <dhowells@redhat.com> --- fs/ceph/addr.c | 4 +--- fs/ceph/file.c | 4 ---- 2 files changed, 1 insertion(+), 7 deletions(-) diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c index 10837587f7db..a2a03d4c28fb 100644 --- a/fs/ceph/addr.c +++ b/fs/ceph/addr.c @@ -1575,11 +1575,9 @@ static vm_fault_t ceph_page_mkwrite(struct vm_fault *vmf) ceph_put_snap_context(snapc); } while (err == 0); - if (ret == VM_FAULT_LOCKED || - ci->i_inline_version != CEPH_INLINE_NONE) { + if (ret == VM_FAULT_LOCKED) { int dirty; spin_lock(&ci->i_ceph_lock); - ci->i_inline_version = CEPH_INLINE_NONE; dirty = __ceph_mark_dirty_caps(ci, CEPH_CAP_FILE_WR, &prealloc_cf); spin_unlock(&ci->i_ceph_lock); diff --git a/fs/ceph/file.c b/fs/ceph/file.c index d1d28220f691..114d8c7d5aab 100644 --- a/fs/ceph/file.c +++ b/fs/ceph/file.c @@ -1043,7 +1043,6 @@ static void ceph_aio_complete(struct inode *inode, } spin_lock(&ci->i_ceph_lock); - ci->i_inline_version = CEPH_INLINE_NONE; dirty = __ceph_mark_dirty_caps(ci, CEPH_CAP_FILE_WR, &aio_req->prealloc_cf); spin_unlock(&ci->i_ceph_lock); @@ -1850,7 +1849,6 @@ static ssize_t ceph_write_iter(struct kiocb *iocb, struct iov_iter *from) int dirty; spin_lock(&ci->i_ceph_lock); - ci->i_inline_version = CEPH_INLINE_NONE; dirty = __ceph_mark_dirty_caps(ci, CEPH_CAP_FILE_WR, &prealloc_cf); spin_unlock(&ci->i_ceph_lock); @@ -2128,7 +2126,6 @@ static long ceph_fallocate(struct file *file, int mode, if (!ret) { spin_lock(&ci->i_ceph_lock); - ci->i_inline_version = CEPH_INLINE_NONE; dirty = __ceph_mark_dirty_caps(ci, CEPH_CAP_FILE_WR, &prealloc_cf); spin_unlock(&ci->i_ceph_lock); @@ -2521,7 +2518,6 @@ static ssize_t __ceph_copy_file_range(struct file *src_file, loff_t src_off, } /* Mark Fw dirty */ spin_lock(&dst_ci->i_ceph_lock); - dst_ci->i_inline_version = CEPH_INLINE_NONE; dirty = __ceph_mark_dirty_caps(dst_ci, CEPH_CAP_FILE_WR, &prealloc_cf); spin_unlock(&dst_ci->i_ceph_lock); if (dirty) ^ permalink raw reply related [flat|nested] 9+ messages in thread
end of thread, other threads:[~2022-01-21 14:04 UTC | newest] Thread overview: 9+ messages (download: mbox.gz follow: Atom feed -- links below jump to the message on this page -- 2022-01-17 16:26 [PATCH 1/3] ceph: Make ceph_netfs_issue_op() handle inlined data (untested) David Howells 2022-01-17 16:26 ` [PATCH 2/3] ceph: Uninline the data on a file opened for writing David Howells 2022-01-17 16:47 ` Matthew Wilcox 2022-01-21 11:09 ` Jeff Layton 2022-01-17 16:59 ` David Howells 2022-01-17 17:15 ` Jeff Layton 2022-01-17 17:50 ` Matthew Wilcox 2022-01-21 14:03 ` Jeff Layton 2022-01-17 16:26 ` [PATCH 3/3] ceph: Remove some other inline-setting bits David Howells
This is a public inbox, see mirroring instructions for how to clone and mirror all data and code used for this inbox; as well as URLs for NNTP newsgroup(s).