From: David Howells <dhowells@redhat.com>
To: Jens Axboe <axboe@kernel.dk>, Al Viro <viro@zeniv.linux.org.uk>,
Christoph Hellwig <hch@infradead.org>
Cc: David Howells <dhowells@redhat.com>,
Matthew Wilcox <willy@infradead.org>, Jan Kara <jack@suse.cz>,
Jeff Layton <jlayton@kernel.org>,
David Hildenbrand <david@redhat.com>,
Jason Gunthorpe <jgg@nvidia.com>,
Logan Gunthorpe <logang@deltatee.com>,
Hillf Danton <hdanton@sina.com>,
linux-fsdevel@vger.kernel.org, linux-block@vger.kernel.org,
linux-kernel@vger.kernel.org, linux-mm@kvack.org,
Christoph Hellwig <hch@lst.de>,
John Hubbard <jhubbard@nvidia.com>
Subject: [PATCH v13 03/12] splice: Do splice read from a buffered file without using ITER_PIPE
Date: Thu, 9 Feb 2023 10:29:45 +0000 [thread overview]
Message-ID: <20230209102954.528942-4-dhowells@redhat.com> (raw)
In-Reply-To: <20230209102954.528942-1-dhowells@redhat.com>
Provide a function to do splice read from a buffered file, pulling the
folios out of the pagecache directly by calling filemap_get_pages() to do
any required reading and then pasting the returned folios into the pipe.
A helper function is provided to do the actual folio pasting and will
handle multipage folios by splicing as many of the relevant subpages as
will fit into the pipe.
The ITER_BVEC-based splicing previously added is then only used for
splicing from O_DIRECT files.
The code is loosely based on filemap_read() and might belong in
mm/filemap.c with that as it needs to use filemap_get_pages().
With this, ITER_PIPE is no longer used.
Signed-off-by: David Howells <dhowells@redhat.com>
cc: Jens Axboe <axboe@kernel.dk>
cc: Christoph Hellwig <hch@lst.de>
cc: Al Viro <viro@zeniv.linux.org.uk>
cc: David Hildenbrand <david@redhat.com>
cc: John Hubbard <jhubbard@nvidia.com>
cc: linux-mm@kvack.org
cc: linux-block@vger.kernel.org
cc: linux-fsdevel@vger.kernel.org
---
fs/splice.c | 159 ++++++++++++++++++++++++++++++++++++++++++++--------
1 file changed, 135 insertions(+), 24 deletions(-)
diff --git a/fs/splice.c b/fs/splice.c
index b4be6fc314a1..963cbf20abc8 100644
--- a/fs/splice.c
+++ b/fs/splice.c
@@ -22,6 +22,7 @@
#include <linux/fs.h>
#include <linux/file.h>
#include <linux/pagemap.h>
+#include <linux/pagevec.h>
#include <linux/splice.h>
#include <linux/memcontrol.h>
#include <linux/mm_inline.h>
@@ -375,6 +376,135 @@ static ssize_t generic_file_direct_splice_read(struct file *in, loff_t *ppos,
return ret;
}
+/*
+ * Splice subpages from a folio into a pipe.
+ */
+static size_t splice_folio_into_pipe(struct pipe_inode_info *pipe,
+ struct folio *folio,
+ loff_t fpos, size_t size)
+{
+ struct page *page;
+ size_t spliced = 0, offset = offset_in_folio(folio, fpos);
+
+ page = folio_page(folio, offset / PAGE_SIZE);
+ size = min(size, folio_size(folio) - offset);
+ offset %= PAGE_SIZE;
+
+ while (spliced < size &&
+ !pipe_full(pipe->head, pipe->tail, pipe->max_usage)) {
+ struct pipe_buffer *buf = &pipe->bufs[pipe->head & (pipe->ring_size - 1)];
+ size_t part = min_t(size_t, PAGE_SIZE - offset, size - spliced);
+
+ *buf = (struct pipe_buffer) {
+ .ops = &page_cache_pipe_buf_ops,
+ .page = page,
+ .offset = offset,
+ .len = part,
+ };
+ folio_get(folio);
+ pipe->head++;
+ page++;
+ spliced += part;
+ offset = 0;
+ }
+
+ return spliced;
+}
+
+/*
+ * Splice folios from the pagecache of a buffered (ie. non-O_DIRECT) file into
+ * a pipe.
+ */
+static ssize_t generic_file_buffered_splice_read(struct file *in, loff_t *ppos,
+ struct pipe_inode_info *pipe,
+ size_t len,
+ unsigned int flags)
+{
+ struct folio_batch fbatch;
+ size_t total_spliced = 0, used, npages;
+ loff_t isize, end_offset;
+ bool writably_mapped;
+ int i, error = 0;
+
+ struct kiocb iocb = {
+ .ki_filp = in,
+ .ki_pos = *ppos,
+ };
+
+ /* Work out how much data we can actually add into the pipe */
+ used = pipe_occupancy(pipe->head, pipe->tail);
+ npages = max_t(ssize_t, pipe->max_usage - used, 0);
+ len = min_t(size_t, len, npages * PAGE_SIZE);
+
+ folio_batch_init(&fbatch);
+
+ do {
+ cond_resched();
+
+ if (*ppos >= i_size_read(file_inode(in)))
+ break;
+
+ iocb.ki_pos = *ppos;
+ error = filemap_get_pages(&iocb, len, &fbatch, true);
+ if (error < 0)
+ break;
+
+ /*
+ * i_size must be checked after we know the pages are Uptodate.
+ *
+ * Checking i_size after the check allows us to calculate
+ * the correct value for "nr", which means the zero-filled
+ * part of the page is not copied back to userspace (unless
+ * another truncate extends the file - this is desired though).
+ */
+ isize = i_size_read(file_inode(in));
+ if (unlikely(*ppos >= isize))
+ break;
+ end_offset = min_t(loff_t, isize, *ppos + len);
+
+ /*
+ * Once we start copying data, we don't want to be touching any
+ * cachelines that might be contended:
+ */
+ writably_mapped = mapping_writably_mapped(in->f_mapping);
+
+ for (i = 0; i < folio_batch_count(&fbatch); i++) {
+ struct folio *folio = fbatch.folios[i];
+ size_t n;
+
+ if (folio_pos(folio) >= end_offset)
+ goto out;
+ folio_mark_accessed(folio);
+
+ /*
+ * If users can be writing to this folio using arbitrary
+ * virtual addresses, take care of potential aliasing
+ * before reading the folio on the kernel side.
+ */
+ if (writably_mapped)
+ flush_dcache_folio(folio);
+
+ n = splice_folio_into_pipe(pipe, folio, *ppos, len);
+ if (!n)
+ goto out;
+ len -= n;
+ total_spliced += n;
+ *ppos += n;
+ in->f_ra.prev_pos = *ppos;
+ if (pipe_full(pipe->head, pipe->tail, pipe->max_usage))
+ goto out;
+ }
+
+ folio_batch_release(&fbatch);
+ } while (len);
+
+out:
+ folio_batch_release(&fbatch);
+ file_accessed(in);
+
+ return total_spliced ? total_spliced : error;
+}
+
/**
* generic_file_splice_read - splice data from file to a pipe
* @in: file to splice from
@@ -392,32 +522,13 @@ ssize_t generic_file_splice_read(struct file *in, loff_t *ppos,
struct pipe_inode_info *pipe, size_t len,
unsigned int flags)
{
- struct iov_iter to;
- struct kiocb kiocb;
- int ret;
-
+ if (unlikely(*ppos >= file_inode(in)->i_sb->s_maxbytes))
+ return 0;
+ if (unlikely(!len))
+ return 0;
if (in->f_flags & O_DIRECT)
return generic_file_direct_splice_read(in, ppos, pipe, len, flags);
-
- iov_iter_pipe(&to, ITER_DEST, pipe, len);
- init_sync_kiocb(&kiocb, in);
- kiocb.ki_pos = *ppos;
- ret = call_read_iter(in, &kiocb, &to);
- if (ret > 0) {
- *ppos = kiocb.ki_pos;
- file_accessed(in);
- } else if (ret < 0) {
- /* free what was emitted */
- pipe_discard_from(pipe, to.start_head);
- /*
- * callers of ->splice_read() expect -EAGAIN on
- * "can't put anything in there", rather than -EFAULT.
- */
- if (ret == -EFAULT)
- ret = -EAGAIN;
- }
-
- return ret;
+ return generic_file_buffered_splice_read(in, ppos, pipe, len, flags);
}
EXPORT_SYMBOL(generic_file_splice_read);
next prev parent reply other threads:[~2023-02-09 10:31 UTC|newest]
Thread overview: 35+ messages / expand[flat|nested] mbox.gz Atom feed top
2023-02-09 10:29 [PATCH v13 00/12] iov_iter: Improve page extraction (pin or just list) David Howells
2023-02-09 10:29 ` [PATCH v13 01/12] splice: Fix O_DIRECT file read splice to avoid reversion of ITER_PIPE David Howells
2023-02-09 14:53 ` Matthew Wilcox
2023-02-09 15:06 ` David Howells
2023-02-09 16:15 ` [PATCH v14 " David Howells
2023-02-13 8:22 ` Christoph Hellwig
2023-02-15 13:17 ` David Howells
2023-02-15 14:24 ` Christoph Hellwig
2023-02-15 15:56 ` David Howells
2023-02-15 13:42 ` [PATCH] splice: Clean up direct_splice_read() a bit David Howells
2023-02-15 13:47 ` David Howells
2023-02-09 10:29 ` [PATCH v13 02/12] mm: Pass info, not iter, into filemap_get_pages() and unstatic it David Howells
2023-02-13 8:22 ` Christoph Hellwig
2023-02-09 10:29 ` David Howells [this message]
2023-02-13 8:28 ` [PATCH v13 03/12] splice: Do splice read from a buffered file without using ITER_PIPE Christoph Hellwig
2023-02-13 10:11 ` David Howells
2023-02-13 10:18 ` Christoph Hellwig
2023-02-13 11:15 ` David Howells
2023-02-13 14:44 ` Christoph Hellwig
2023-02-13 18:06 ` Guenter Roeck
2023-02-13 22:43 ` David Howells
2023-02-13 22:51 ` Guenter Roeck
2023-02-13 23:12 ` David Howells
2023-02-13 23:25 ` Guenter Roeck
2023-02-09 10:29 ` [PATCH v13 04/12] iov_iter: Kill ITER_PIPE David Howells
2023-02-13 8:28 ` Christoph Hellwig
2023-02-09 10:29 ` [PATCH v13 05/12] iov_iter: Define flags to qualify page extraction David Howells
2023-02-09 10:29 ` [PATCH v13 06/12] iov_iter: Add a function to extract a page list from an iterator David Howells
2023-02-09 10:29 ` [PATCH v13 07/12] iomap: Don't get an reference on ZERO_PAGE for direct I/O block zeroing David Howells
2023-02-09 10:29 ` [PATCH v13 08/12] block: Fix bio_flagged() so that gcc can better optimise it David Howells
2023-02-09 10:29 ` [PATCH v13 09/12] block: Replace BIO_NO_PAGE_REF with BIO_PAGE_REFFED with inverted logic David Howells
2023-02-09 10:29 ` [PATCH v13 10/12] block: Add BIO_PAGE_PINNED and associated infrastructure David Howells
2023-02-09 10:29 ` [PATCH v13 11/12] block: Convert bio_iov_iter_get_pages to use iov_iter_extract_pages David Howells
2023-02-09 10:29 ` [PATCH v13 12/12] block: convert bio_map_user_iov " David Howells
2023-02-10 22:31 ` [PATCH v13 00/12] iov_iter: Improve page extraction (pin or just list) Jens Axboe
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20230209102954.528942-4-dhowells@redhat.com \
--to=dhowells@redhat.com \
--cc=axboe@kernel.dk \
--cc=david@redhat.com \
--cc=hch@infradead.org \
--cc=hch@lst.de \
--cc=hdanton@sina.com \
--cc=jack@suse.cz \
--cc=jgg@nvidia.com \
--cc=jhubbard@nvidia.com \
--cc=jlayton@kernel.org \
--cc=linux-block@vger.kernel.org \
--cc=linux-fsdevel@vger.kernel.org \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-mm@kvack.org \
--cc=logang@deltatee.com \
--cc=viro@zeniv.linux.org.uk \
--cc=willy@infradead.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).