* [PATCH net-next v6 04/14] splice, net: Add a splice_eof op to file-ops and socket-ops
[not found] <20230607181920.2294972-1-dhowells@redhat.com>
@ 2023-06-07 18:19 ` David Howells
2023-06-07 18:19 ` [PATCH net-next v6 10/14] splice, net: Fix SPLICE_F_MORE signalling in splice_direct_to_actor() David Howells
1 sibling, 0 replies; 2+ messages in thread
From: David Howells @ 2023-06-07 18:19 UTC (permalink / raw)
To: netdev, Linus Torvalds
Cc: David Howells, Chuck Lever, Boris Pismenny, John Fastabend,
Jakub Kicinski, David S. Miller, Eric Dumazet, Paolo Abeni,
Willem de Bruijn, David Ahern, Matthew Wilcox, Jens Axboe,
linux-mm, linux-kernel, Christoph Hellwig, Al Viro, Jan Kara,
Jeff Layton, David Hildenbrand, Christian Brauner, linux-fsdevel,
linux-block
Add an optional method, ->splice_eof(), to allow splice to indicate the
premature termination of a splice to struct file_operations and struct
proto_ops.
This is called if sendfile() or splice() encounters all of the following
conditions inside splice_direct_to_actor():
(1) the user did not set SPLICE_F_MORE (splice only), and
(2) an EOF condition occurred (->splice_read() returned 0), and
(3) we haven't read enough to fulfill the request (ie. len > 0 still), and
(4) we have already spliced at least one byte.
A further patch will modify the behaviour of SPLICE_F_MORE to always be
passed to the actor if either the user set it or we haven't yet read
sufficient data to fulfill the request.
Suggested-by: Linus Torvalds <torvalds@linux-foundation.org>
Link: https://lore.kernel.org/r/CAHk-=wh=V579PDYvkpnTobCLGczbgxpMgGmmhqiTyE34Cpi5Gg@mail.gmail.com/
Signed-off-by: David Howells <dhowells@redhat.com>
Reviewed-by: Jakub Kicinski <kuba@kernel.org>
cc: Jens Axboe <axboe@kernel.dk>
cc: Christoph Hellwig <hch@lst.de>
cc: Al Viro <viro@zeniv.linux.org.uk>
cc: Matthew Wilcox <willy@infradead.org>
cc: Jan Kara <jack@suse.cz>
cc: Jeff Layton <jlayton@kernel.org>
cc: David Hildenbrand <david@redhat.com>
cc: Christian Brauner <brauner@kernel.org>
cc: Chuck Lever <chuck.lever@oracle.com>
cc: Boris Pismenny <borisp@nvidia.com>
cc: John Fastabend <john.fastabend@gmail.com>
cc: Eric Dumazet <edumazet@google.com>
cc: "David S. Miller" <davem@davemloft.net>
cc: Paolo Abeni <pabeni@redhat.com>
cc: linux-fsdevel@vger.kernel.org
cc: linux-block@vger.kernel.org
cc: linux-mm@kvack.org
cc: netdev@vger.kernel.org
---
fs/splice.c | 31 ++++++++++++++++++++++++++++++-
include/linux/fs.h | 1 +
include/linux/net.h | 1 +
include/linux/splice.h | 1 +
include/net/sock.h | 1 +
net/socket.c | 10 ++++++++++
6 files changed, 44 insertions(+), 1 deletion(-)
diff --git a/fs/splice.c b/fs/splice.c
index e337630aed64..67dbd85db207 100644
--- a/fs/splice.c
+++ b/fs/splice.c
@@ -969,6 +969,17 @@ static long do_splice_from(struct pipe_inode_info *pipe, struct file *out,
return out->f_op->splice_write(pipe, out, ppos, len, flags);
}
+/*
+ * Indicate to the caller that there was a premature EOF when reading from the
+ * source and the caller didn't indicate they would be sending more data after
+ * this.
+ */
+static void do_splice_eof(struct splice_desc *sd)
+{
+ if (sd->splice_eof)
+ sd->splice_eof(sd);
+}
+
/*
* Attempt to initiate a splice from a file to a pipe.
*/
@@ -1068,7 +1079,7 @@ ssize_t splice_direct_to_actor(struct file *in, struct splice_desc *sd,
ret = do_splice_to(in, &pos, pipe, len, flags);
if (unlikely(ret <= 0))
- goto out_release;
+ goto read_failure;
read_len = ret;
sd->total_len = read_len;
@@ -1108,6 +1119,15 @@ ssize_t splice_direct_to_actor(struct file *in, struct splice_desc *sd,
file_accessed(in);
return bytes;
+read_failure:
+ /*
+ * If the user did *not* set SPLICE_F_MORE *and* we didn't hit that
+ * "use all of len" case that cleared SPLICE_F_MORE, *and* we did a
+ * "->splice_in()" that returned EOF (ie zero) *and* we have sent at
+ * least 1 byte *then* we will also do the ->splice_eof() call.
+ */
+ if (ret == 0 && !more && len > 0 && bytes)
+ do_splice_eof(sd);
out_release:
/*
* If we did an incomplete transfer we must release
@@ -1136,6 +1156,14 @@ static int direct_splice_actor(struct pipe_inode_info *pipe,
sd->flags);
}
+static void direct_file_splice_eof(struct splice_desc *sd)
+{
+ struct file *file = sd->u.file;
+
+ if (file->f_op->splice_eof)
+ file->f_op->splice_eof(file);
+}
+
/**
* do_splice_direct - splices data directly between two files
* @in: file to splice from
@@ -1161,6 +1189,7 @@ long do_splice_direct(struct file *in, loff_t *ppos, struct file *out,
.flags = flags,
.pos = *ppos,
.u.file = out,
+ .splice_eof = direct_file_splice_eof,
.opos = opos,
};
long ret;
diff --git a/include/linux/fs.h b/include/linux/fs.h
index df92f4b3d122..de2cb1132f07 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1796,6 +1796,7 @@ struct file_operations {
int (*flock) (struct file *, int, struct file_lock *);
ssize_t (*splice_write)(struct pipe_inode_info *, struct file *, loff_t *, size_t, unsigned int);
ssize_t (*splice_read)(struct file *, loff_t *, struct pipe_inode_info *, size_t, unsigned int);
+ void (*splice_eof)(struct file *file);
int (*setlease)(struct file *, long, struct file_lock **, void **);
long (*fallocate)(struct file *file, int mode, loff_t offset,
loff_t len);
diff --git a/include/linux/net.h b/include/linux/net.h
index b73ad8e3c212..8defc8f1d82e 100644
--- a/include/linux/net.h
+++ b/include/linux/net.h
@@ -210,6 +210,7 @@ struct proto_ops {
int offset, size_t size, int flags);
ssize_t (*splice_read)(struct socket *sock, loff_t *ppos,
struct pipe_inode_info *pipe, size_t len, unsigned int flags);
+ void (*splice_eof)(struct socket *sock);
int (*set_peek_off)(struct sock *sk, int val);
int (*peek_len)(struct socket *sock);
diff --git a/include/linux/splice.h b/include/linux/splice.h
index 991ae318b6eb..4fab18a6e371 100644
--- a/include/linux/splice.h
+++ b/include/linux/splice.h
@@ -38,6 +38,7 @@ struct splice_desc {
struct file *file; /* file to read/write */
void *data; /* cookie */
} u;
+ void (*splice_eof)(struct splice_desc *sd); /* Unexpected EOF handler */
loff_t pos; /* file position */
loff_t *opos; /* sendfile: output position */
size_t num_spliced; /* number of bytes already spliced */
diff --git a/include/net/sock.h b/include/net/sock.h
index b418425d7230..ae2d74a0bc4c 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -1271,6 +1271,7 @@ struct proto {
size_t len, int flags, int *addr_len);
int (*sendpage)(struct sock *sk, struct page *page,
int offset, size_t size, int flags);
+ void (*splice_eof)(struct socket *sock);
int (*bind)(struct sock *sk,
struct sockaddr *addr, int addr_len);
int (*bind_add)(struct sock *sk,
diff --git a/net/socket.c b/net/socket.c
index c4d9104418c8..b778fc03c6e0 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -130,6 +130,7 @@ static int sock_fasync(int fd, struct file *filp, int on);
static ssize_t sock_splice_read(struct file *file, loff_t *ppos,
struct pipe_inode_info *pipe, size_t len,
unsigned int flags);
+static void sock_splice_eof(struct file *file);
#ifdef CONFIG_PROC_FS
static void sock_show_fdinfo(struct seq_file *m, struct file *f)
@@ -163,6 +164,7 @@ static const struct file_operations socket_file_ops = {
.fasync = sock_fasync,
.splice_write = splice_to_socket,
.splice_read = sock_splice_read,
+ .splice_eof = sock_splice_eof,
.show_fdinfo = sock_show_fdinfo,
};
@@ -1076,6 +1078,14 @@ static ssize_t sock_splice_read(struct file *file, loff_t *ppos,
return sock->ops->splice_read(sock, ppos, pipe, len, flags);
}
+static void sock_splice_eof(struct file *file)
+{
+ struct socket *sock = file->private_data;
+
+ if (sock->ops->splice_eof)
+ sock->ops->splice_eof(sock);
+}
+
static ssize_t sock_read_iter(struct kiocb *iocb, struct iov_iter *to)
{
struct file *file = iocb->ki_filp;
^ permalink raw reply related [flat|nested] 2+ messages in thread
* [PATCH net-next v6 10/14] splice, net: Fix SPLICE_F_MORE signalling in splice_direct_to_actor()
[not found] <20230607181920.2294972-1-dhowells@redhat.com>
2023-06-07 18:19 ` [PATCH net-next v6 04/14] splice, net: Add a splice_eof op to file-ops and socket-ops David Howells
@ 2023-06-07 18:19 ` David Howells
1 sibling, 0 replies; 2+ messages in thread
From: David Howells @ 2023-06-07 18:19 UTC (permalink / raw)
To: netdev, Linus Torvalds
Cc: David Howells, Chuck Lever, Boris Pismenny, John Fastabend,
Jakub Kicinski, David S. Miller, Eric Dumazet, Paolo Abeni,
Willem de Bruijn, David Ahern, Matthew Wilcox, Jens Axboe,
linux-mm, linux-kernel, Christoph Hellwig, Al Viro, Jan Kara,
Jeff Layton, David Hildenbrand, Christian Brauner, linux-fsdevel,
linux-block
splice_direct_to_actor() doesn't manage SPLICE_F_MORE correctly[1] - and,
as a result, it incorrectly signals/fails to signal MSG_MORE when splicing
to a socket. The problem I'm seeing happens when a short splice occurs
because we got a short read due to hitting the EOF on a file: as the length
read (read_len) is less than the remaining size to be spliced (len),
SPLICE_F_MORE (and thus MSG_MORE) is set.
The issue is that, for the moment, we have no way to know *why* the short
read occurred and so can't make a good decision on whether we *should* keep
MSG_MORE set.
MSG_SENDPAGE_NOTLAST was added to work around this, but that is also set
incorrectly under some circumstances - for example if a short read fills a
single pipe_buffer, but the next read would return more (seqfile can do
this).
This was observed with the multi_chunk_sendfile tests in the tls kselftest
program. Some of those tests would hang and time out when the last chunk
of file was less than the sendfile request size:
build/kselftest/net/tls -r tls.12_aes_gcm.multi_chunk_sendfile
This has been observed before[2] and worked around in AF_TLS[3].
Fix this by making splice_direct_to_actor() always signal SPLICE_F_MORE if
we haven't yet hit the requested operation size. SPLICE_F_MORE remains
signalled if the user passed it in to splice() but otherwise gets cleared
when we've read sufficient data to fulfill the request.
If, however, we get a premature EOF from ->splice_read(), have sent at
least one byte and SPLICE_F_MORE was not set by the caller, ->splice_eof()
will be invoked.
Signed-off-by: David Howells <dhowells@redhat.com>
cc: Linus Torvalds <torvalds@linux-foundation.org>
cc: Jakub Kicinski <kuba@kernel.org>
cc: Jens Axboe <axboe@kernel.dk>
cc: Christoph Hellwig <hch@lst.de>
cc: Al Viro <viro@zeniv.linux.org.uk>
cc: Matthew Wilcox <willy@infradead.org>
cc: Jan Kara <jack@suse.cz>
cc: Jeff Layton <jlayton@kernel.org>
cc: David Hildenbrand <david@redhat.com>
cc: Christian Brauner <brauner@kernel.org>
cc: Chuck Lever <chuck.lever@oracle.com>
cc: Boris Pismenny <borisp@nvidia.com>
cc: John Fastabend <john.fastabend@gmail.com>
cc: Eric Dumazet <edumazet@google.com>
cc: "David S. Miller" <davem@davemloft.net>
cc: Paolo Abeni <pabeni@redhat.com>
cc: linux-fsdevel@vger.kernel.org
cc: linux-block@vger.kernel.org
cc: linux-mm@kvack.org
cc: netdev@vger.kernel.org
Link: https://lore.kernel.org/r/499791.1685485603@warthog.procyon.org.uk/ [1]
Link: https://lore.kernel.org/r/1591392508-14592-1-git-send-email-pooja.trivedi@stackpath.com/ [2]
Link: https://git.kernel.org/pub/scm/linux/kernel/git/netdev/net-next.git/commit/?id=d452d48b9f8b1a7f8152d33ef52cfd7fe1735b0a [3]
---
Notes:
ver #4)
- Use ->splice_eof() to signal a premature EOF to the splice output.
fs/splice.c | 18 ++++++++++--------
1 file changed, 10 insertions(+), 8 deletions(-)
diff --git a/fs/splice.c b/fs/splice.c
index 67dbd85db207..67ddaac1f5c5 100644
--- a/fs/splice.c
+++ b/fs/splice.c
@@ -1063,13 +1063,17 @@ ssize_t splice_direct_to_actor(struct file *in, struct splice_desc *sd,
*/
bytes = 0;
len = sd->total_len;
+
+ /* Don't block on output, we have to drain the direct pipe. */
flags = sd->flags;
+ sd->flags &= ~SPLICE_F_NONBLOCK;
/*
- * Don't block on output, we have to drain the direct pipe.
+ * We signal MORE until we've read sufficient data to fulfill the
+ * request and we keep signalling it if the caller set it.
*/
- sd->flags &= ~SPLICE_F_NONBLOCK;
more = sd->flags & SPLICE_F_MORE;
+ sd->flags |= SPLICE_F_MORE;
WARN_ON_ONCE(!pipe_empty(pipe->head, pipe->tail));
@@ -1085,14 +1089,12 @@ ssize_t splice_direct_to_actor(struct file *in, struct splice_desc *sd,
sd->total_len = read_len;
/*
- * If more data is pending, set SPLICE_F_MORE
- * If this is the last data and SPLICE_F_MORE was not set
- * initially, clears it.
+ * If we now have sufficient data to fulfill the request then
+ * we clear SPLICE_F_MORE if it was not set initially.
*/
- if (read_len < len)
- sd->flags |= SPLICE_F_MORE;
- else if (!more)
+ if (read_len >= len && !more)
sd->flags &= ~SPLICE_F_MORE;
+
/*
* NOTE: nonblocking mode only applies to the input. We
* must not do the output in nonblocking mode as then we
^ permalink raw reply related [flat|nested] 2+ messages in thread
end of thread, other threads:[~2023-06-07 18:21 UTC | newest]
Thread overview: 2+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
[not found] <20230607181920.2294972-1-dhowells@redhat.com>
2023-06-07 18:19 ` [PATCH net-next v6 04/14] splice, net: Add a splice_eof op to file-ops and socket-ops David Howells
2023-06-07 18:19 ` [PATCH net-next v6 10/14] splice, net: Fix SPLICE_F_MORE signalling in splice_direct_to_actor() David Howells
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).