* [PATCH v3 0/3] io_uring: add splice(2) support
@ 2020-02-18 19:11 Pavel Begunkov
2020-02-18 19:11 ` [PATCH v3 1/3] splice: make do_splice public Pavel Begunkov
` (2 more replies)
0 siblings, 3 replies; 4+ messages in thread
From: Pavel Begunkov @ 2020-02-18 19:11 UTC (permalink / raw)
To: Jens Axboe, io-uring
Not the fastets implementation, but I'd need to stir up/duplicate
splice.c bits to do it more efficiently.
note: rebase on top of the recent inflight patchset.
v2:
- u32 len and SQE layout changes (Jens)
- output file is in sqe->fd for automatic hash_reg_file support
- handle unbound_nonreg_file for the second fd
- file leaks fixed with REQ_F_NEED_CLEANUP
- place SPLICE_F_FD_IN_FIXED in splice flags (Jens)
- loff_t* -> loff_t, -1 means not specified offset
v3: [PATCH 3/3] changes
- fd u32 -> s32 (Stefan Metzmacher)
- add BUILD_BUG_SQE_ELEM() (Stefan Metzmacher)
- accept and ignore ioprio (Stefan Metzmacher)
- off_in -> splice_off_in
Pavel Begunkov (3):
splice: make do_splice public
io_uring: add interface for getting files
io_uring: add splice(2) support
fs/io_uring.c | 175 +++++++++++++++++++++++++++++-----
fs/splice.c | 6 +-
include/linux/splice.h | 3 +
include/uapi/linux/io_uring.h | 14 ++-
4 files changed, 169 insertions(+), 29 deletions(-)
--
2.24.0
^ permalink raw reply [flat|nested] 4+ messages in thread
* [PATCH v3 1/3] splice: make do_splice public
2020-02-18 19:11 [PATCH v3 0/3] io_uring: add splice(2) support Pavel Begunkov
@ 2020-02-18 19:11 ` Pavel Begunkov
2020-02-18 19:11 ` [PATCH v3 2/3] io_uring: add interface for getting files Pavel Begunkov
2020-02-18 19:11 ` [PATCH v3 3/3] io_uring: add splice(2) support Pavel Begunkov
2 siblings, 0 replies; 4+ messages in thread
From: Pavel Begunkov @ 2020-02-18 19:11 UTC (permalink / raw)
To: Jens Axboe, io-uring
Make do_splice(), so other kernel parts can reuse it
Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
---
fs/splice.c | 6 +++---
include/linux/splice.h | 3 +++
2 files changed, 6 insertions(+), 3 deletions(-)
diff --git a/fs/splice.c b/fs/splice.c
index 3009652a41c8..6a6f30432688 100644
--- a/fs/splice.c
+++ b/fs/splice.c
@@ -1109,9 +1109,9 @@ static int splice_pipe_to_pipe(struct pipe_inode_info *ipipe,
/*
* Determine where to splice to/from.
*/
-static long do_splice(struct file *in, loff_t __user *off_in,
- struct file *out, loff_t __user *off_out,
- size_t len, unsigned int flags)
+long do_splice(struct file *in, loff_t __user *off_in,
+ struct file *out, loff_t __user *off_out,
+ size_t len, unsigned int flags)
{
struct pipe_inode_info *ipipe;
struct pipe_inode_info *opipe;
diff --git a/include/linux/splice.h b/include/linux/splice.h
index 74b4911ac16d..ebbbfea48aa0 100644
--- a/include/linux/splice.h
+++ b/include/linux/splice.h
@@ -78,6 +78,9 @@ extern ssize_t add_to_pipe(struct pipe_inode_info *,
struct pipe_buffer *);
extern ssize_t splice_direct_to_actor(struct file *, struct splice_desc *,
splice_direct_actor *);
+extern long do_splice(struct file *in, loff_t __user *off_in,
+ struct file *out, loff_t __user *off_out,
+ size_t len, unsigned int flags);
/*
* for dynamic pipe sizing
--
2.24.0
^ permalink raw reply related [flat|nested] 4+ messages in thread
* [PATCH v3 2/3] io_uring: add interface for getting files
2020-02-18 19:11 [PATCH v3 0/3] io_uring: add splice(2) support Pavel Begunkov
2020-02-18 19:11 ` [PATCH v3 1/3] splice: make do_splice public Pavel Begunkov
@ 2020-02-18 19:11 ` Pavel Begunkov
2020-02-18 19:11 ` [PATCH v3 3/3] io_uring: add splice(2) support Pavel Begunkov
2 siblings, 0 replies; 4+ messages in thread
From: Pavel Begunkov @ 2020-02-18 19:11 UTC (permalink / raw)
To: Jens Axboe, io-uring
Preparation without functional changes. Adds io_get_file(), that allows
to grab files not only into req->file.
Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
---
fs/io_uring.c | 66 ++++++++++++++++++++++++++++++++-------------------
1 file changed, 41 insertions(+), 25 deletions(-)
diff --git a/fs/io_uring.c b/fs/io_uring.c
index 5ff7c602ad4d..389db6f5568b 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -1255,6 +1255,15 @@ static struct io_kiocb *io_get_req(struct io_ring_ctx *ctx,
return NULL;
}
+static inline void io_put_file(struct io_ring_ctx *ctx, struct file *file,
+ bool fixed)
+{
+ if (fixed)
+ percpu_ref_put(&ctx->file_data->refs);
+ else
+ fput(file);
+}
+
static void __io_req_do_free(struct io_kiocb *req)
{
if (likely(!io_is_fallback_req(req)))
@@ -1268,12 +1277,8 @@ static void __io_req_aux_free(struct io_kiocb *req)
struct io_ring_ctx *ctx = req->ctx;
kfree(req->io);
- if (req->file) {
- if (req->flags & REQ_F_FIXED_FILE)
- percpu_ref_put(&ctx->file_data->refs);
- else
- fput(req->file);
- }
+ if (req->file)
+ io_put_file(ctx, req->file, (req->flags & REQ_F_FIXED_FILE));
io_req_work_drop_env(req);
}
@@ -4573,41 +4578,52 @@ static inline struct file *io_file_from_index(struct io_ring_ctx *ctx,
return table->files[index & IORING_FILE_TABLE_MASK];;
}
-static int io_req_set_file(struct io_submit_state *state, struct io_kiocb *req,
- const struct io_uring_sqe *sqe)
+static int io_get_file(struct io_submit_state *state, struct io_ring_ctx *ctx,
+ int fd, struct file **out_file, bool fixed)
{
- struct io_ring_ctx *ctx = req->ctx;
- unsigned flags;
- int fd;
-
- flags = READ_ONCE(sqe->flags);
- fd = READ_ONCE(sqe->fd);
-
- if (!io_req_needs_file(req, fd))
- return 0;
+ struct file *file;
- if (flags & IOSQE_FIXED_FILE) {
+ if (fixed) {
if (unlikely(!ctx->file_data ||
(unsigned) fd >= ctx->nr_user_files))
return -EBADF;
fd = array_index_nospec(fd, ctx->nr_user_files);
- req->file = io_file_from_index(ctx, fd);
- if (!req->file)
+ file = io_file_from_index(ctx, fd);
+ if (!file)
return -EBADF;
- req->flags |= REQ_F_FIXED_FILE;
percpu_ref_get(&ctx->file_data->refs);
} else {
- if (req->needs_fixed_file)
- return -EBADF;
trace_io_uring_file_get(ctx, fd);
- req->file = io_file_get(state, fd);
- if (unlikely(!req->file))
+ file = io_file_get(state, fd);
+ if (unlikely(!file))
return -EBADF;
}
+ *out_file = file;
return 0;
}
+static int io_req_set_file(struct io_submit_state *state, struct io_kiocb *req,
+ const struct io_uring_sqe *sqe)
+{
+ struct io_ring_ctx *ctx = req->ctx;
+ unsigned flags;
+ int fd;
+ bool fixed;
+
+ flags = READ_ONCE(sqe->flags);
+ fd = READ_ONCE(sqe->fd);
+
+ if (!io_req_needs_file(req, fd))
+ return 0;
+
+ fixed = (flags & IOSQE_FIXED_FILE);
+ if (unlikely(!fixed && req->needs_fixed_file))
+ return -EBADF;
+
+ return io_get_file(state, ctx, fd, &req->file, fixed);
+}
+
static int io_grab_files(struct io_kiocb *req)
{
int ret = -EBADF;
--
2.24.0
^ permalink raw reply related [flat|nested] 4+ messages in thread
* [PATCH v3 3/3] io_uring: add splice(2) support
2020-02-18 19:11 [PATCH v3 0/3] io_uring: add splice(2) support Pavel Begunkov
2020-02-18 19:11 ` [PATCH v3 1/3] splice: make do_splice public Pavel Begunkov
2020-02-18 19:11 ` [PATCH v3 2/3] io_uring: add interface for getting files Pavel Begunkov
@ 2020-02-18 19:11 ` Pavel Begunkov
2 siblings, 0 replies; 4+ messages in thread
From: Pavel Begunkov @ 2020-02-18 19:11 UTC (permalink / raw)
To: Jens Axboe, io-uring
Add support for splice(2).
- output file is specified as sqe->fd, so it's handled by generic code
- hash_reg_file handled by generic code as well
- len is 32bit, but should be fine
- the fd_in is registered file, when SPLICE_F_FD_IN_FIXED is set, which
is a splice flag (i.e. sqe->splice_flags).
Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
---
fs/io_uring.c | 109 ++++++++++++++++++++++++++++++++++
include/uapi/linux/io_uring.h | 14 ++++-
2 files changed, 122 insertions(+), 1 deletion(-)
diff --git a/fs/io_uring.c b/fs/io_uring.c
index 389db6f5568b..c535d870c23d 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -76,6 +76,7 @@
#include <linux/fadvise.h>
#include <linux/eventpoll.h>
#include <linux/fs_struct.h>
+#include <linux/splice.h>
#define CREATE_TRACE_POINTS
#include <trace/events/io_uring.h>
@@ -433,6 +434,15 @@ struct io_epoll {
struct epoll_event event;
};
+struct io_splice {
+ struct file *file_out;
+ struct file *file_in;
+ loff_t off_out;
+ loff_t off_in;
+ u64 len;
+ unsigned int flags;
+};
+
struct io_async_connect {
struct sockaddr_storage address;
};
@@ -546,6 +556,7 @@ struct io_kiocb {
struct io_fadvise fadvise;
struct io_madvise madvise;
struct io_epoll epoll;
+ struct io_splice splice;
};
struct io_async_ctx *io;
@@ -746,6 +757,11 @@ static const struct io_op_def io_op_defs[] = {
.unbound_nonreg_file = 1,
.file_table = 1,
},
+ [IORING_OP_SPLICE] = {
+ .needs_file = 1,
+ .hash_reg_file = 1,
+ .unbound_nonreg_file = 1,
+ }
};
static void io_wq_submit_work(struct io_wq_work **workptr);
@@ -760,6 +776,10 @@ static int __io_sqe_files_update(struct io_ring_ctx *ctx,
static int io_grab_files(struct io_kiocb *req);
static void io_ring_file_ref_flush(struct fixed_file_data *data);
static void io_cleanup_req(struct io_kiocb *req);
+static int io_get_file(struct io_submit_state *state,
+ struct io_ring_ctx *ctx,
+ int fd, struct file **out_file,
+ bool fixed);
static struct kmem_cache *req_cachep;
@@ -2412,6 +2432,77 @@ static int io_write(struct io_kiocb *req, struct io_kiocb **nxt,
return ret;
}
+static int io_splice_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
+{
+ struct io_splice* sp = &req->splice;
+ unsigned int valid_flags = SPLICE_F_FD_IN_FIXED | SPLICE_F_ALL;
+ int ret;
+
+ if (req->flags & REQ_F_NEED_CLEANUP)
+ return 0;
+
+ sp->file_in = NULL;
+ sp->off_in = READ_ONCE(sqe->splice_off_in);
+ sp->off_out = READ_ONCE(sqe->off);
+ sp->len = READ_ONCE(sqe->len);
+ sp->flags = READ_ONCE(sqe->splice_flags);
+
+ if (unlikely(sp->flags & ~valid_flags))
+ return -EINVAL;
+
+ ret = io_get_file(NULL, req->ctx, READ_ONCE(sqe->splice_fd_in),
+ &sp->file_in, (sp->flags & SPLICE_F_FD_IN_FIXED));
+ if (ret)
+ return ret;
+ req->flags |= REQ_F_NEED_CLEANUP;
+
+ if (!S_ISREG(file_inode(sp->file_in)->i_mode))
+ req->work.flags |= IO_WQ_WORK_UNBOUND;
+
+ return 0;
+}
+
+static bool io_splice_punt(struct file *file)
+{
+ if (get_pipe_info(file))
+ return false;
+ if (!io_file_supports_async(file))
+ return true;
+ return !(file->f_mode & O_NONBLOCK);
+}
+
+static int io_splice(struct io_kiocb *req, struct io_kiocb **nxt,
+ bool force_nonblock)
+{
+ struct io_splice *sp = &req->splice;
+ struct file *in = sp->file_in;
+ struct file *out = sp->file_out;
+ unsigned int flags = sp->flags & ~SPLICE_F_FD_IN_FIXED;
+ loff_t *poff_in, *poff_out;
+ long ret;
+
+ if (force_nonblock) {
+ if (io_splice_punt(in) || io_splice_punt(out))
+ return -EAGAIN;
+ flags |= SPLICE_F_NONBLOCK;
+ }
+
+ poff_in = (sp->off_in == -1) ? NULL : &sp->off_in;
+ poff_out = (sp->off_out == -1) ? NULL : &sp->off_out;
+ ret = do_splice(in, poff_in, out, poff_out, sp->len, flags);
+ if (force_nonblock && ret == -EAGAIN)
+ return -EAGAIN;
+
+ io_put_file(req->ctx, in, (sp->flags & SPLICE_F_FD_IN_FIXED));
+ req->flags &= ~REQ_F_NEED_CLEANUP;
+
+ io_cqring_add_event(req, ret);
+ if (ret != sp->len)
+ req_set_fail_links(req);
+ io_put_req_find_next(req, nxt);
+ return 0;
+}
+
/*
* IORING_OP_NOP just posts a completion event, nothing else.
*/
@@ -4227,6 +4318,9 @@ static int io_req_defer_prep(struct io_kiocb *req,
case IORING_OP_EPOLL_CTL:
ret = io_epoll_ctl_prep(req, sqe);
break;
+ case IORING_OP_SPLICE:
+ ret = io_splice_prep(req, sqe);
+ break;
default:
printk_once(KERN_WARNING "io_uring: unhandled opcode %d\n",
req->opcode);
@@ -4289,6 +4383,10 @@ static void io_cleanup_req(struct io_kiocb *req)
case IORING_OP_STATX:
putname(req->open.filename);
break;
+ case IORING_OP_SPLICE:
+ io_put_file(req->ctx, req->splice.file_in,
+ (req->splice.flags & SPLICE_F_FD_IN_FIXED));
+ break;
}
req->flags &= ~REQ_F_NEED_CLEANUP;
@@ -4492,6 +4590,14 @@ static int io_issue_sqe(struct io_kiocb *req, const struct io_uring_sqe *sqe,
}
ret = io_epoll_ctl(req, nxt, force_nonblock);
break;
+ case IORING_OP_SPLICE:
+ if (sqe) {
+ ret = io_splice_prep(req, sqe);
+ if (ret < 0)
+ break;
+ }
+ ret = io_splice(req, nxt, force_nonblock);
+ break;
default:
ret = -EINVAL;
break;
@@ -7227,6 +7333,7 @@ static int __init io_uring_init(void)
BUILD_BUG_SQE_ELEM(8, __u64, off);
BUILD_BUG_SQE_ELEM(8, __u64, addr2);
BUILD_BUG_SQE_ELEM(16, __u64, addr);
+ BUILD_BUG_SQE_ELEM(16, __u64, splice_off_in);
BUILD_BUG_SQE_ELEM(24, __u32, len);
BUILD_BUG_SQE_ELEM(28, __kernel_rwf_t, rw_flags);
BUILD_BUG_SQE_ELEM(28, /* compat */ int, rw_flags);
@@ -7241,9 +7348,11 @@ static int __init io_uring_init(void)
BUILD_BUG_SQE_ELEM(28, __u32, open_flags);
BUILD_BUG_SQE_ELEM(28, __u32, statx_flags);
BUILD_BUG_SQE_ELEM(28, __u32, fadvise_advice);
+ BUILD_BUG_SQE_ELEM(28, __u32, splice_flags);
BUILD_BUG_SQE_ELEM(32, __u64, user_data);
BUILD_BUG_SQE_ELEM(40, __u16, buf_index);
BUILD_BUG_SQE_ELEM(42, __u16, personality);
+ BUILD_BUG_SQE_ELEM(44, __s32, splice_fd_in);
BUILD_BUG_ON(ARRAY_SIZE(io_op_defs) != IORING_OP_LAST);
req_cachep = KMEM_CACHE(io_kiocb, SLAB_HWCACHE_ALIGN | SLAB_PANIC);
diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h
index 3f7961c1c243..08891cc1c1e7 100644
--- a/include/uapi/linux/io_uring.h
+++ b/include/uapi/linux/io_uring.h
@@ -23,7 +23,10 @@ struct io_uring_sqe {
__u64 off; /* offset into file */
__u64 addr2;
};
- __u64 addr; /* pointer to buffer or iovecs */
+ union {
+ __u64 addr; /* pointer to buffer or iovecs */
+ __u64 splice_off_in;
+ };
__u32 len; /* buffer size or number of iovecs */
union {
__kernel_rwf_t rw_flags;
@@ -37,6 +40,7 @@ struct io_uring_sqe {
__u32 open_flags;
__u32 statx_flags;
__u32 fadvise_advice;
+ __u32 splice_flags;
};
__u64 user_data; /* data to be passed back at completion time */
union {
@@ -45,6 +49,7 @@ struct io_uring_sqe {
__u16 buf_index;
/* personality to use, if used */
__u16 personality;
+ __s32 splice_fd_in;
};
__u64 __pad2[3];
};
@@ -113,6 +118,7 @@ enum {
IORING_OP_RECV,
IORING_OP_OPENAT2,
IORING_OP_EPOLL_CTL,
+ IORING_OP_SPLICE,
/* this goes last, obviously */
IORING_OP_LAST,
@@ -128,6 +134,12 @@ enum {
*/
#define IORING_TIMEOUT_ABS (1U << 0)
+/*
+ * sqe->splice_flags
+ * extends splice(2) flags
+ */
+#define SPLICE_F_FD_IN_FIXED (1U << 31) /* the last bit of __u32 */
+
/*
* IO completion data structure (Completion Queue Entry)
*/
--
2.24.0
^ permalink raw reply related [flat|nested] 4+ messages in thread
end of thread, other threads:[~2020-02-18 19:12 UTC | newest]
Thread overview: 4+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2020-02-18 19:11 [PATCH v3 0/3] io_uring: add splice(2) support Pavel Begunkov
2020-02-18 19:11 ` [PATCH v3 1/3] splice: make do_splice public Pavel Begunkov
2020-02-18 19:11 ` [PATCH v3 2/3] io_uring: add interface for getting files Pavel Begunkov
2020-02-18 19:11 ` [PATCH v3 3/3] io_uring: add splice(2) support Pavel Begunkov
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.