From: David Howells <dhowells@redhat.com>
To: Steve French <smfrench@gmail.com>
Cc: David Howells <dhowells@redhat.com>, Jens Axboe <axboe@kernel.dk>,
Al Viro <viro@zeniv.linux.org.uk>,
Shyam Prasad N <nspmangalore@gmail.com>,
Rohith Surabattula <rohiths.msft@gmail.com>,
Tom Talpey <tom@talpey.com>, Stefan Metzmacher <metze@samba.org>,
Christoph Hellwig <hch@infradead.org>,
Matthew Wilcox <willy@infradead.org>,
Jeff Layton <jlayton@kernel.org>,
linux-cifs@vger.kernel.org, linux-block@vger.kernel.org,
linux-fsdevel@vger.kernel.org, linux-mm@kvack.org,
linux-kernel@vger.kernel.org, Christoph Hellwig <hch@lst.de>,
David Hildenbrand <david@redhat.com>,
John Hubbard <jhubbard@nvidia.com>
Subject: [PATCH 02/17] splice: Add a func to do a splice from a buffered file without ITER_PIPE
Date: Thu, 16 Feb 2023 21:47:30 +0000 [thread overview]
Message-ID: <20230216214745.3985496-3-dhowells@redhat.com> (raw)
In-Reply-To: <20230216214745.3985496-1-dhowells@redhat.com>
Provide a function to do splice read from a buffered file, pulling the
folios out of the pagecache directly by calling filemap_get_pages() to do
any required reading and then pasting the returned folios into the pipe.
A helper function is provided to do the actual folio pasting and will
handle multipage folios by splicing as many of the relevant subpages as
will fit into the pipe.
The code is loosely based on filemap_read() and might belong in
mm/filemap.c with that as it needs to use filemap_get_pages().
Signed-off-by: David Howells <dhowells@redhat.com>
Reviewed-by: Jens Axboe <axboe@kernel.dk>
cc: Christoph Hellwig <hch@lst.de>
cc: Al Viro <viro@zeniv.linux.org.uk>
cc: David Hildenbrand <david@redhat.com>
cc: John Hubbard <jhubbard@nvidia.com>
cc: linux-mm@kvack.org
cc: linux-block@vger.kernel.org
cc: linux-fsdevel@vger.kernel.org
---
include/linux/fs.h | 3 ++
mm/filemap.c | 128 +++++++++++++++++++++++++++++++++++++++++++++
mm/internal.h | 6 +++
3 files changed, 137 insertions(+)
diff --git a/include/linux/fs.h b/include/linux/fs.h
index c1769a2c5d70..28743e38df91 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -3163,6 +3163,9 @@ ssize_t vfs_iocb_iter_write(struct file *file, struct kiocb *iocb,
struct iov_iter *iter);
/* fs/splice.c */
+ssize_t filemap_splice_read(struct file *in, loff_t *ppos,
+ struct pipe_inode_info *pipe,
+ size_t len, unsigned int flags);
extern ssize_t generic_file_splice_read(struct file *, loff_t *,
struct pipe_inode_info *, size_t, unsigned int);
extern ssize_t iter_file_splice_write(struct pipe_inode_info *,
diff --git a/mm/filemap.c b/mm/filemap.c
index 876e77278d2a..8c7b135c8e23 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -42,6 +42,8 @@
#include <linux/ramfs.h>
#include <linux/page_idle.h>
#include <linux/migrate.h>
+#include <linux/pipe_fs_i.h>
+#include <linux/splice.h>
#include <asm/pgalloc.h>
#include <asm/tlbflush.h>
#include "internal.h"
@@ -2842,6 +2844,132 @@ generic_file_read_iter(struct kiocb *iocb, struct iov_iter *iter)
}
EXPORT_SYMBOL(generic_file_read_iter);
+/*
+ * Splice subpages from a folio into a pipe.
+ */
+size_t splice_folio_into_pipe(struct pipe_inode_info *pipe,
+ struct folio *folio, loff_t fpos, size_t size)
+{
+ struct page *page;
+ size_t spliced = 0, offset = offset_in_folio(folio, fpos);
+
+ page = folio_page(folio, offset / PAGE_SIZE);
+ size = min(size, folio_size(folio) - offset);
+ offset %= PAGE_SIZE;
+
+ while (spliced < size &&
+ !pipe_full(pipe->head, pipe->tail, pipe->max_usage)) {
+ struct pipe_buffer *buf = pipe_head_buf(pipe);
+ size_t part = min_t(size_t, PAGE_SIZE - offset, size - spliced);
+
+ *buf = (struct pipe_buffer) {
+ .ops = &page_cache_pipe_buf_ops,
+ .page = page,
+ .offset = offset,
+ .len = part,
+ };
+ folio_get(folio);
+ pipe->head++;
+ page++;
+ spliced += part;
+ offset = 0;
+ }
+
+ return spliced;
+}
+
+/*
+ * Splice folios from the pagecache of a buffered (ie. non-O_DIRECT) file into
+ * a pipe.
+ */
+ssize_t filemap_splice_read(struct file *in, loff_t *ppos,
+ struct pipe_inode_info *pipe,
+ size_t len, unsigned int flags)
+{
+ struct folio_batch fbatch;
+ struct kiocb iocb;
+ size_t total_spliced = 0, used, npages;
+ loff_t isize, end_offset;
+ bool writably_mapped;
+ int i, error = 0;
+
+ init_sync_kiocb(&iocb, in);
+ iocb.ki_pos = *ppos;
+
+ /* Work out how much data we can actually add into the pipe */
+ used = pipe_occupancy(pipe->head, pipe->tail);
+ npages = max_t(ssize_t, pipe->max_usage - used, 0);
+ len = min_t(size_t, len, npages * PAGE_SIZE);
+
+ folio_batch_init(&fbatch);
+
+ do {
+ cond_resched();
+
+ if (*ppos >= i_size_read(file_inode(in)))
+ break;
+
+ iocb.ki_pos = *ppos;
+ error = filemap_get_pages(&iocb, len, &fbatch, true);
+ if (error < 0)
+ break;
+
+ /*
+ * i_size must be checked after we know the pages are Uptodate.
+ *
+ * Checking i_size after the check allows us to calculate
+ * the correct value for "nr", which means the zero-filled
+ * part of the page is not copied back to userspace (unless
+ * another truncate extends the file - this is desired though).
+ */
+ isize = i_size_read(file_inode(in));
+ if (unlikely(*ppos >= isize))
+ break;
+ end_offset = min_t(loff_t, isize, *ppos + len);
+
+ /*
+ * Once we start copying data, we don't want to be touching any
+ * cachelines that might be contended:
+ */
+ writably_mapped = mapping_writably_mapped(in->f_mapping);
+
+ for (i = 0; i < folio_batch_count(&fbatch); i++) {
+ struct folio *folio = fbatch.folios[i];
+ size_t n;
+
+ if (folio_pos(folio) >= end_offset)
+ goto out;
+ folio_mark_accessed(folio);
+
+ /*
+ * If users can be writing to this folio using arbitrary
+ * virtual addresses, take care of potential aliasing
+ * before reading the folio on the kernel side.
+ */
+ if (writably_mapped)
+ flush_dcache_folio(folio);
+
+ n = splice_folio_into_pipe(pipe, folio, *ppos, len);
+ if (!n)
+ goto out;
+ len -= n;
+ total_spliced += n;
+ *ppos += n;
+ in->f_ra.prev_pos = *ppos;
+ if (pipe_full(pipe->head, pipe->tail, pipe->max_usage))
+ goto out;
+ }
+
+ folio_batch_release(&fbatch);
+ } while (len);
+
+out:
+ folio_batch_release(&fbatch);
+ file_accessed(in);
+
+ return total_spliced ? total_spliced : error;
+}
+
static inline loff_t folio_seek_hole_data(struct xa_state *xas,
struct address_space *mapping, struct folio *folio,
loff_t start, loff_t end, bool seek_data)
diff --git a/mm/internal.h b/mm/internal.h
index bcf75a8b032d..6d4ca98f3844 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -794,6 +794,12 @@ struct migration_target_control {
gfp_t gfp_mask;
};
+/*
+ * mm/filemap.c
+ */
+size_t splice_folio_into_pipe(struct pipe_inode_info *pipe,
+ struct folio *folio, loff_t fpos, size_t size);
+
/*
* mm/vmalloc.c
*/
next prev parent reply other threads:[~2023-02-16 21:49 UTC|newest]
Thread overview: 25+ messages / expand[flat|nested] mbox.gz Atom feed top
2023-02-16 21:47 [PATCH 00/17] smb3: Use iov_iters down to the network transport and fix DIO page pinning David Howells
2023-02-16 21:47 ` [PATCH 01/17] mm: Pass info, not iter, into filemap_get_pages() David Howells
2023-02-16 21:47 ` David Howells [this message]
2023-02-16 21:47 ` [PATCH 03/17] splice: Add a func to do a splice from an O_DIRECT file without ITER_PIPE David Howells
2023-02-17 9:00 ` David Howells
2023-02-16 21:47 ` [PATCH 04/17] iov_iter: Define flags to qualify page extraction David Howells
2023-02-16 21:47 ` [PATCH 05/17] iov_iter: Add a function to extract a page list from an iterator David Howells
2023-02-16 21:47 ` [PATCH 06/17] splice: Export filemap/direct_splice_read() David Howells
2023-02-16 21:47 ` [PATCH 07/17] cifs: Implement splice_read to pass down ITER_BVEC not ITER_PIPE David Howells
2023-02-16 21:47 ` [PATCH 08/17] netfs: Add a function to extract a UBUF or IOVEC into a BVEC iterator David Howells
2023-02-16 21:47 ` [PATCH 09/17] netfs: Add a function to extract an iterator into a scatterlist David Howells
2023-02-16 21:47 ` [PATCH 10/17] cifs: Add a function to build an RDMA SGE list from an iterator David Howells
2023-02-16 21:47 ` [PATCH 11/17] cifs: Add a function to Hash the contents of " David Howells
2023-02-17 3:04 ` Eric Biggers
2023-02-16 21:47 ` [PATCH 12/17] cifs: Add some helper functions David Howells
2023-02-16 21:47 ` [PATCH 13/17] cifs: Add a function to read into an iter from a socket David Howells
2023-02-16 21:47 ` [PATCH 14/17] cifs: Change the I/O paths to use an iterator rather than a page list David Howells
2023-02-17 5:48 ` Steve French
2023-02-17 8:08 ` David Howells
2023-02-17 17:48 ` Steve French
2023-02-16 21:47 ` [PATCH 15/17] cifs: Build the RDMA SGE list directly from an iterator David Howells
2023-02-16 21:47 ` [PATCH 16/17] cifs: Remove unused code David Howells
2023-02-16 21:47 ` [PATCH 17/17] cifs: DIO to/from KVEC-type iterators should now work David Howells
2023-02-17 5:52 ` [PATCH 00/17] smb3: Use iov_iters down to the network transport and fix DIO page pinning Steve French
2023-02-17 8:22 ` David Howells
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20230216214745.3985496-3-dhowells@redhat.com \
--to=dhowells@redhat.com \
--cc=axboe@kernel.dk \
--cc=david@redhat.com \
--cc=hch@infradead.org \
--cc=hch@lst.de \
--cc=jhubbard@nvidia.com \
--cc=jlayton@kernel.org \
--cc=linux-block@vger.kernel.org \
--cc=linux-cifs@vger.kernel.org \
--cc=linux-fsdevel@vger.kernel.org \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-mm@kvack.org \
--cc=metze@samba.org \
--cc=nspmangalore@gmail.com \
--cc=rohiths.msft@gmail.com \
--cc=smfrench@gmail.com \
--cc=tom@talpey.com \
--cc=viro@zeniv.linux.org.uk \
--cc=willy@infradead.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox