From: David Howells <dhowells@redhat.com>
To: Jens Axboe <axboe@kernel.dk>, Al Viro <viro@zeniv.linux.org.uk>,
Christoph Hellwig <hch@infradead.org>
Cc: David Howells <dhowells@redhat.com>,
Matthew Wilcox <willy@infradead.org>, Jan Kara <jack@suse.cz>,
Jeff Layton <jlayton@kernel.org>,
David Hildenbrand <david@redhat.com>,
Jason Gunthorpe <jgg@nvidia.com>,
Logan Gunthorpe <logang@deltatee.com>,
Hillf Danton <hdanton@sina.com>,
linux-fsdevel@vger.kernel.org, linux-block@vger.kernel.org,
linux-kernel@vger.kernel.org, linux-mm@kvack.org,
Daniel Golle <daniel@makrotopia.org>,
Guenter Roeck <groeck7@gmail.com>, Christoph Hellwig <hch@lst.de>,
John Hubbard <jhubbard@nvidia.com>,
Hugh Dickins <hughd@google.com>
Subject: [PATCH v14 04/17] shmem: Implement splice-read
Date: Tue, 14 Feb 2023 17:13:17 +0000 [thread overview]
Message-ID: <20230214171330.2722188-5-dhowells@redhat.com> (raw)
In-Reply-To: <20230214171330.2722188-1-dhowells@redhat.com>
The new filemap_splice_read() has an implicit expectation via
filemap_get_pages() that ->read_folio() exists if ->readahead() doesn't
fully populate the pagecache of the file it is reading from[1], potentially
leading to a jump to NULL if this doesn't exist. shmem, however, (and by
extension, tmpfs, ramfs and rootfs), doesn't have ->read_folio(),
Work around this by equipping shmem with its own splice-read
implementation, based on filemap_splice_read(), but able to paste in
zero_page when there's a page missing.
Signed-off-by: David Howells <dhowells@redhat.com>
cc: Daniel Golle <daniel@makrotopia.org>
cc: Guenter Roeck <groeck7@gmail.com>
cc: Christoph Hellwig <hch@lst.de>
cc: Jens Axboe <axboe@kernel.dk>
cc: Al Viro <viro@zeniv.linux.org.uk>
cc: John Hubbard <jhubbard@nvidia.com>
cc: David Hildenbrand <david@redhat.com>
cc: Matthew Wilcox <willy@infradead.org>
cc: Hugh Dickins <hughd@google.com>
cc: linux-block@vger.kernel.org
cc: linux-fsdevel@vger.kernel.org
cc: linux-mm@kvack.org
Link: https://lore.kernel.org/r/Y+pdHFFTk1TTEBsO@makrotopia.org/ [1]
---
mm/shmem.c | 124 ++++++++++++++++++++++++++++++++++++++++++++++++++++-
1 file changed, 123 insertions(+), 1 deletion(-)
diff --git a/mm/shmem.c b/mm/shmem.c
index 0005ab2c29af..7145a5345f4d 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -2711,6 +2711,128 @@ static ssize_t shmem_file_read_iter(struct kiocb *iocb, struct iov_iter *to)
return retval ? retval : error;
}
+static bool zero_pipe_buf_try_steal(struct pipe_inode_info *pipe,
+ struct pipe_buffer *buf)
+{
+ return false;
+}
+
+static const struct pipe_buf_operations zero_pipe_buf_ops = {
+ .release = generic_pipe_buf_release,
+ .try_steal = zero_pipe_buf_try_steal,
+ .get = generic_pipe_buf_get,
+};
+
+static size_t splice_zeropage_into_pipe(struct pipe_inode_info *pipe,
+ loff_t fpos, size_t size)
+{
+ size_t offset = fpos & ~PAGE_MASK;
+
+ size = min_t(size_t, size, PAGE_SIZE - offset);
+
+ if (!pipe_full(pipe->head, pipe->tail, pipe->max_usage)) {
+ struct pipe_buffer *buf = pipe_head_buf(pipe);
+
+ *buf = (struct pipe_buffer) {
+ .ops = &zero_pipe_buf_ops,
+ .page = ZERO_PAGE(0),
+ .offset = offset,
+ .len = size,
+ };
+ get_page(buf->page);
+ pipe->head++;
+ }
+
+ return size;
+}
+
+static ssize_t shmem_file_splice_read(struct file *in, loff_t *ppos,
+ struct pipe_inode_info *pipe,
+ size_t len, unsigned int flags)
+{
+ struct inode *inode = file_inode(in);
+ struct address_space *mapping = inode->i_mapping;
+ struct folio *folio = NULL;
+ size_t total_spliced = 0, used, npages, n, part;
+ loff_t isize;
+ int error = 0;
+
+ /* Work out how much data we can actually add into the pipe */
+ used = pipe_occupancy(pipe->head, pipe->tail);
+ npages = max_t(ssize_t, pipe->max_usage - used, 0);
+ len = min_t(size_t, len, npages * PAGE_SIZE);
+
+ do {
+ if (*ppos >= i_size_read(inode))
+ break;
+
+ error = shmem_get_folio(inode, *ppos / PAGE_SIZE, &folio, SGP_READ);
+ if (error) {
+ if (error == -EINVAL)
+ error = 0;
+ break;
+ }
+ if (folio) {
+ folio_unlock(folio);
+
+ if (folio_test_hwpoison(folio)) {
+ error = -EIO;
+ break;
+ }
+ }
+
+ /*
+ * i_size must be checked after we know the pages are Uptodate.
+ *
+ * Checking i_size after the check allows us to calculate
+ * the correct value for "nr", which means the zero-filled
+ * part of the page is not copied back to userspace (unless
+ * another truncate extends the file - this is desired though).
+ */
+ isize = i_size_read(inode);
+ if (unlikely(*ppos >= isize))
+ break;
+ part = min_t(loff_t, isize - *ppos, len);
+
+ if (folio) {
+ /*
+ * If users can be writing to this page using arbitrary
+ * virtual addresses, take care about potential aliasing
+ * before reading the page on the kernel side.
+ */
+ if (mapping_writably_mapped(mapping))
+ flush_dcache_folio(folio);
+ folio_mark_accessed(folio);
+ /*
+ * Ok, we have the page, and it's up-to-date, so we can
+ * now splice it into the pipe.
+ */
+ n = splice_folio_into_pipe(pipe, folio, *ppos, part);
+ folio_put(folio);
+ folio = NULL;
+ } else {
+ n = splice_zeropage_into_pipe(pipe, *ppos, len);
+ }
+
+ if (!n)
+ break;
+ len -= n;
+ total_spliced += n;
+ *ppos += n;
+ in->f_ra.prev_pos = *ppos;
+ if (pipe_full(pipe->head, pipe->tail, pipe->max_usage))
+ break;
+
+ cond_resched();
+ } while (len);
+
+ if (folio)
+ folio_put(folio);
+
+ file_accessed(in);
+ return total_spliced ? total_spliced : error;
+}
+
static loff_t shmem_file_llseek(struct file *file, loff_t offset, int whence)
{
struct address_space *mapping = file->f_mapping;
@@ -3929,7 +4051,7 @@ static const struct file_operations shmem_file_operations = {
.read_iter = shmem_file_read_iter,
.write_iter = generic_file_write_iter,
.fsync = noop_fsync,
- .splice_read = generic_file_splice_read,
+ .splice_read = shmem_file_splice_read,
.splice_write = iter_file_splice_write,
.fallocate = shmem_fallocate,
#endif
next prev parent reply other threads:[~2023-02-14 17:14 UTC|newest]
Thread overview: 50+ messages / expand[flat|nested] mbox.gz Atom feed top
2023-02-14 17:13 [PATCH v14 00/17] iov_iter: Improve page extraction (pin or just list) David Howells
2023-02-14 17:13 ` [PATCH v14 01/17] mm: Pass info, not iter, into filemap_get_pages() David Howells
2023-02-14 17:13 ` [PATCH v14 02/17] splice: Add a func to do a splice from a buffered file without ITER_PIPE David Howells
2023-02-18 2:41 ` Ming Lei
2023-02-18 9:25 ` David Howells
2023-02-14 17:13 ` [PATCH v14 03/17] splice: Add a func to do a splice from an O_DIRECT " David Howells
2023-02-14 17:13 ` David Howells [this message]
2023-02-14 17:13 ` [PATCH v14 05/17] overlayfs: Implement splice-read David Howells
2023-02-15 14:21 ` Miklos Szeredi
2023-02-15 15:03 ` David Howells
2023-02-15 15:32 ` Miklos Szeredi
2023-02-15 15:40 ` [PATCH v15 " David Howells
2023-02-15 15:50 ` Miklos Szeredi
2023-02-15 15:53 ` Matthew Wilcox
2023-02-15 16:38 ` Christoph Hellwig
2023-02-15 16:40 ` Miklos Szeredi
2023-02-15 15:58 ` David Howells
2023-02-14 17:13 ` [PATCH v14 06/17] coda: " David Howells
2023-02-14 18:04 ` Jan Harkes
2023-02-14 17:13 ` [PATCH v14 07/17] tty, proc, kernfs, random: Use direct_splice_read() David Howells
2023-02-14 17:13 ` [PATCH v14 08/17] splice: Do splice read from a file without using ITER_PIPE David Howells
2023-02-14 17:24 ` Mezgani Ali
2023-02-17 8:22 ` Ming Lei
2023-02-17 9:18 ` Ming Lei
2023-02-17 20:39 ` Alexander Egorenkov
2023-02-17 20:59 ` egorenar
2023-02-17 21:24 ` David Howells
2023-02-17 21:16 ` David Howells
2023-02-17 21:47 ` David Howells
2023-02-18 8:29 ` Alexander Egorenkov
2023-02-18 8:29 ` Alexander Egorenkov
2023-02-18 9:18 ` David Howells
2023-02-14 17:13 ` [PATCH v14 09/17] iov_iter: Kill ITER_PIPE David Howells
2023-02-14 17:13 ` [PATCH v14 10/17] iov_iter: Define flags to qualify page extraction David Howells
2023-02-14 17:13 ` [PATCH v14 11/17] iov_iter: Add a function to extract a page list from an iterator David Howells
2023-02-14 17:13 ` [PATCH v14 12/17] iomap: Don't get an reference on ZERO_PAGE for direct I/O block zeroing David Howells
2023-02-14 17:13 ` [PATCH v14 13/17] block: Fix bio_flagged() so that gcc can better optimise it David Howells
2023-02-14 17:13 ` [PATCH v14 14/17] block: Replace BIO_NO_PAGE_REF with BIO_PAGE_REFFED with inverted logic David Howells
2023-02-14 17:13 ` [PATCH v14 15/17] block: Add BIO_PAGE_PINNED and associated infrastructure David Howells
2023-02-14 17:13 ` [PATCH v14 16/17] block: Convert bio_iov_iter_get_pages to use iov_iter_extract_pages David Howells
2023-02-14 17:13 ` [PATCH v14 17/17] block: convert bio_map_user_iov " David Howells
2023-02-14 22:56 ` [PATCH v14 00/17] iov_iter: Improve page extraction (pin or just list) David Howells
2023-02-14 23:01 ` David Howells
2023-02-14 23:01 ` David Howells
2023-02-14 23:05 ` Jens Axboe
2023-02-15 8:07 ` David Howells
2023-02-15 8:20 ` David Howells
2023-02-15 14:23 ` Christoph Hellwig
2023-02-15 14:38 ` Christoph Hellwig
2023-02-15 15:43 ` David Howells
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20230214171330.2722188-5-dhowells@redhat.com \
--to=dhowells@redhat.com \
--cc=axboe@kernel.dk \
--cc=daniel@makrotopia.org \
--cc=david@redhat.com \
--cc=groeck7@gmail.com \
--cc=hch@infradead.org \
--cc=hch@lst.de \
--cc=hdanton@sina.com \
--cc=hughd@google.com \
--cc=jack@suse.cz \
--cc=jgg@nvidia.com \
--cc=jhubbard@nvidia.com \
--cc=jlayton@kernel.org \
--cc=linux-block@vger.kernel.org \
--cc=linux-fsdevel@vger.kernel.org \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-mm@kvack.org \
--cc=logang@deltatee.com \
--cc=viro@zeniv.linux.org.uk \
--cc=willy@infradead.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.