* [PATCH 1/2] io_uring/rsrc: allow cloning at an offset
2024-10-30 16:54 [PATCHSET 0/2] Add support for cloning partial buffer sets Jens Axboe
@ 2024-10-30 16:54 ` Jens Axboe
0 siblings, 0 replies; 4+ messages in thread
From: Jens Axboe @ 2024-10-30 16:54 UTC (permalink / raw)
To: io-uring; +Cc: Jens Axboe
Right now buffer cloning is an all-or-nothing kind of thing - either the
whole table is cloned from a source to a destination ring, or nothing at
all.
However, it's not always desired to clone the whole thing. Allow for
the application to specify a source and destination offset, and a
number of buffers to clone. If the destination offset is non-zero, then
allocate sparse nodes upfront.
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
include/uapi/linux/io_uring.h | 5 ++++-
io_uring/rsrc.c | 36 +++++++++++++++++++++++++++++------
2 files changed, 34 insertions(+), 7 deletions(-)
diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h
index 024745283783..cc8dbe78c126 100644
--- a/include/uapi/linux/io_uring.h
+++ b/include/uapi/linux/io_uring.h
@@ -719,7 +719,10 @@ enum {
struct io_uring_clone_buffers {
__u32 src_fd;
__u32 flags;
- __u32 pad[6];
+ __u32 src_off;
+ __u32 dst_off;
+ __u32 nr;
+ __u32 pad[3];
};
struct io_uring_buf {
diff --git a/io_uring/rsrc.c b/io_uring/rsrc.c
index af60d9f597be..4c149dc42fd7 100644
--- a/io_uring/rsrc.c
+++ b/io_uring/rsrc.c
@@ -924,10 +924,11 @@ int io_import_fixed(int ddir, struct iov_iter *iter,
return 0;
}
-static int io_clone_buffers(struct io_ring_ctx *ctx, struct io_ring_ctx *src_ctx)
+static int io_clone_buffers(struct io_ring_ctx *ctx, struct io_ring_ctx *src_ctx,
+ struct io_uring_clone_buffers *arg)
{
+ int i, ret, nbufs, off, nr;
struct io_rsrc_data data;
- int i, ret, nbufs;
/*
* Drop our own lock here. We'll setup the data we need and reference
@@ -940,11 +941,33 @@ static int io_clone_buffers(struct io_ring_ctx *ctx, struct io_ring_ctx *src_ctx
nbufs = src_ctx->buf_table.nr;
if (!nbufs)
goto out_unlock;
- ret = io_rsrc_data_alloc(&data, nbufs);
+ ret = -EINVAL;
+ if (!arg->nr)
+ arg->nr = nbufs;
+ else if (arg->nr > nbufs)
+ goto out_unlock;
+ ret = -EOVERFLOW;
+ if (check_add_overflow(arg->nr, arg->src_off, &off))
+ goto out_unlock;
+ if (off > nbufs)
+ goto out_unlock;
+ if (check_add_overflow(arg->nr, arg->dst_off, &off))
+ goto out_unlock;
+ ret = -EINVAL;
+ if (off > IORING_MAX_REG_BUFFERS)
+ goto out_unlock;
+ ret = io_rsrc_data_alloc(&data, off);
if (ret)
goto out_unlock;
- for (i = 0; i < nbufs; i++) {
+ /* fill empty/sparse nodes, if needed */
+ for (i = 0; i < arg->dst_off; i++)
+ data.nodes[i] = rsrc_empty_node;
+
+ off = arg->dst_off;
+ i = arg->src_off;
+ nr = arg->nr;
+ while (nr--) {
struct io_rsrc_node *dst_node, *src_node;
src_node = io_rsrc_node_lookup(&src_ctx->buf_table, i);
@@ -960,7 +983,8 @@ static int io_clone_buffers(struct io_ring_ctx *ctx, struct io_ring_ctx *src_ctx
refcount_inc(&src_node->buf->refs);
dst_node->buf = src_node->buf;
}
- data.nodes[i] = dst_node;
+ data.nodes[off++] = dst_node;
+ i++;
}
/* Have a ref on the bufs now, drop src lock and re-grab our own lock */
@@ -1015,7 +1039,7 @@ int io_register_clone_buffers(struct io_ring_ctx *ctx, void __user *arg)
file = io_uring_register_get_file(buf.src_fd, registered_src);
if (IS_ERR(file))
return PTR_ERR(file);
- ret = io_clone_buffers(ctx, file->private_data);
+ ret = io_clone_buffers(ctx, file->private_data, &buf);
if (!registered_src)
fput(file);
return ret;
--
2.45.2
^ permalink raw reply related [flat|nested] 4+ messages in thread
* [PATCHSET v2 0/2] Add support for cloning partial buffer sets
@ 2024-10-31 1:44 Jens Axboe
2024-10-31 1:44 ` [PATCH 1/2] io_uring/rsrc: allow cloning at an offset Jens Axboe
2024-10-31 1:44 ` [PATCH 2/2] io_uring/rsrc: allow cloning with node replacements Jens Axboe
0 siblings, 2 replies; 4+ messages in thread
From: Jens Axboe @ 2024-10-31 1:44 UTC (permalink / raw)
To: io-uring
Hi,
6.12 added buffer cloning support, but it's an all-or-nothing kind of
thing - if there's an existing buffer table in the destination ring,
then nothing can be cloned to it.
This adds support for cloning partial buffer sets, specifying a
source/dest offset and the number of buffers to clone. And it allows
cloning to replace existing nodes as well, specified with a separate
flag.
Changes since v1:
- Rebase on current tree (no rsrc_empty_node)
- Rewrite the replacement code to be much simpler
- Write more test cases
--
Jens Axboe
^ permalink raw reply [flat|nested] 4+ messages in thread
* [PATCH 1/2] io_uring/rsrc: allow cloning at an offset
2024-10-31 1:44 [PATCHSET v2 0/2] Add support for cloning partial buffer sets Jens Axboe
@ 2024-10-31 1:44 ` Jens Axboe
2024-10-31 1:44 ` [PATCH 2/2] io_uring/rsrc: allow cloning with node replacements Jens Axboe
1 sibling, 0 replies; 4+ messages in thread
From: Jens Axboe @ 2024-10-31 1:44 UTC (permalink / raw)
To: io-uring; +Cc: Jens Axboe
Right now buffer cloning is an all-or-nothing kind of thing - either the
whole table is cloned from a source to a destination ring, or nothing at
all.
However, it's not always desired to clone the whole thing. Allow for
the application to specify a source and destination offset, and a
number of buffers to clone. If the destination offset is non-zero, then
allocate sparse nodes upfront.
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
include/uapi/linux/io_uring.h | 5 ++++-
io_uring/rsrc.c | 32 ++++++++++++++++++++++++++------
2 files changed, 30 insertions(+), 7 deletions(-)
diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h
index 024745283783..cc8dbe78c126 100644
--- a/include/uapi/linux/io_uring.h
+++ b/include/uapi/linux/io_uring.h
@@ -719,7 +719,10 @@ enum {
struct io_uring_clone_buffers {
__u32 src_fd;
__u32 flags;
- __u32 pad[6];
+ __u32 src_off;
+ __u32 dst_off;
+ __u32 nr;
+ __u32 pad[3];
};
struct io_uring_buf {
diff --git a/io_uring/rsrc.c b/io_uring/rsrc.c
index af60d9f597be..d00870128bb9 100644
--- a/io_uring/rsrc.c
+++ b/io_uring/rsrc.c
@@ -924,10 +924,11 @@ int io_import_fixed(int ddir, struct iov_iter *iter,
return 0;
}
-static int io_clone_buffers(struct io_ring_ctx *ctx, struct io_ring_ctx *src_ctx)
+static int io_clone_buffers(struct io_ring_ctx *ctx, struct io_ring_ctx *src_ctx,
+ struct io_uring_clone_buffers *arg)
{
+ int i, ret, nbufs, off, nr;
struct io_rsrc_data data;
- int i, ret, nbufs;
/*
* Drop our own lock here. We'll setup the data we need and reference
@@ -940,11 +941,29 @@ static int io_clone_buffers(struct io_ring_ctx *ctx, struct io_ring_ctx *src_ctx
nbufs = src_ctx->buf_table.nr;
if (!nbufs)
goto out_unlock;
- ret = io_rsrc_data_alloc(&data, nbufs);
+ ret = -EINVAL;
+ if (!arg->nr)
+ arg->nr = nbufs;
+ else if (arg->nr > nbufs)
+ goto out_unlock;
+ ret = -EOVERFLOW;
+ if (check_add_overflow(arg->nr, arg->src_off, &off))
+ goto out_unlock;
+ if (off > nbufs)
+ goto out_unlock;
+ if (check_add_overflow(arg->nr, arg->dst_off, &off))
+ goto out_unlock;
+ ret = -EINVAL;
+ if (off > IORING_MAX_REG_BUFFERS)
+ goto out_unlock;
+ ret = io_rsrc_data_alloc(&data, off);
if (ret)
goto out_unlock;
- for (i = 0; i < nbufs; i++) {
+ off = arg->dst_off;
+ i = arg->src_off;
+ nr = arg->nr;
+ while (nr--) {
struct io_rsrc_node *dst_node, *src_node;
src_node = io_rsrc_node_lookup(&src_ctx->buf_table, i);
@@ -960,7 +979,8 @@ static int io_clone_buffers(struct io_ring_ctx *ctx, struct io_ring_ctx *src_ctx
refcount_inc(&src_node->buf->refs);
dst_node->buf = src_node->buf;
}
- data.nodes[i] = dst_node;
+ data.nodes[off++] = dst_node;
+ i++;
}
/* Have a ref on the bufs now, drop src lock and re-grab our own lock */
@@ -1015,7 +1035,7 @@ int io_register_clone_buffers(struct io_ring_ctx *ctx, void __user *arg)
file = io_uring_register_get_file(buf.src_fd, registered_src);
if (IS_ERR(file))
return PTR_ERR(file);
- ret = io_clone_buffers(ctx, file->private_data);
+ ret = io_clone_buffers(ctx, file->private_data, &buf);
if (!registered_src)
fput(file);
return ret;
--
2.45.2
^ permalink raw reply related [flat|nested] 4+ messages in thread
* [PATCH 2/2] io_uring/rsrc: allow cloning with node replacements
2024-10-31 1:44 [PATCHSET v2 0/2] Add support for cloning partial buffer sets Jens Axboe
2024-10-31 1:44 ` [PATCH 1/2] io_uring/rsrc: allow cloning at an offset Jens Axboe
@ 2024-10-31 1:44 ` Jens Axboe
1 sibling, 0 replies; 4+ messages in thread
From: Jens Axboe @ 2024-10-31 1:44 UTC (permalink / raw)
To: io-uring; +Cc: Jens Axboe
Currently cloning a buffer table will fail if the destination already has
a table. But it should be possible to use it to replace existing elements.
Add a IORING_REGISTER_DST_REPLACE cloning flag, which if set, will allow
the destination to already having a buffer table. If that is the case,
then entries designated by offset + nr buffers will be replaced if they
already exist.
Note that it's allowed to use IORING_REGISTER_DST_REPLACE and not have
an existing table, in which case it'll work just like not having the
flag set and an empty table - it'll just assign the newly created table
for that case.
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
include/uapi/linux/io_uring.h | 3 +-
io_uring/rsrc.c | 66 +++++++++++++++++++++++++++--------
2 files changed, 54 insertions(+), 15 deletions(-)
diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h
index cc8dbe78c126..ce58c4590de6 100644
--- a/include/uapi/linux/io_uring.h
+++ b/include/uapi/linux/io_uring.h
@@ -713,7 +713,8 @@ struct io_uring_clock_register {
};
enum {
- IORING_REGISTER_SRC_REGISTERED = 1,
+ IORING_REGISTER_SRC_REGISTERED = (1U << 0),
+ IORING_REGISTER_DST_REPLACE = (1U << 1),
};
struct io_uring_clone_buffers {
diff --git a/io_uring/rsrc.c b/io_uring/rsrc.c
index d00870128bb9..673ff00da727 100644
--- a/io_uring/rsrc.c
+++ b/io_uring/rsrc.c
@@ -927,8 +927,40 @@ int io_import_fixed(int ddir, struct iov_iter *iter,
static int io_clone_buffers(struct io_ring_ctx *ctx, struct io_ring_ctx *src_ctx,
struct io_uring_clone_buffers *arg)
{
- int i, ret, nbufs, off, nr;
struct io_rsrc_data data;
+ int i, ret, off, nr;
+ unsigned int nbufs;
+
+ /* if offsets are given, must have nr specified too */
+ if (!arg->nr && (arg->dst_off || arg->src_off))
+ return -EINVAL;
+ /* not allowed unless REPLACE is set */
+ if (ctx->buf_table.nr && !(arg->flags & IORING_REGISTER_DST_REPLACE))
+ return -EBUSY;
+
+ nbufs = READ_ONCE(src_ctx->buf_table.nr);
+ if (!arg->nr)
+ arg->nr = nbufs;
+ else if (arg->nr > nbufs)
+ return -EINVAL;
+ else if (arg->nr > IORING_MAX_REG_BUFFERS)
+ return -EINVAL;
+ if (check_add_overflow(arg->nr, arg->dst_off, &nbufs))
+ return -EOVERFLOW;
+
+ ret = io_rsrc_data_alloc(&data, max(nbufs, ctx->buf_table.nr));
+ if (ret)
+ return ret;
+
+ /* Fill entries in data from dst that won't overlap with src */
+ for (i = 0; i < min(arg->dst_off, ctx->buf_table.nr); i++) {
+ struct io_rsrc_node *src_node = ctx->buf_table.nodes[i];
+
+ if (src_node) {
+ data.nodes[i] = src_node;
+ src_node->refs++;
+ }
+ }
/*
* Drop our own lock here. We'll setup the data we need and reference
@@ -951,14 +983,6 @@ static int io_clone_buffers(struct io_ring_ctx *ctx, struct io_ring_ctx *src_ctx
goto out_unlock;
if (off > nbufs)
goto out_unlock;
- if (check_add_overflow(arg->nr, arg->dst_off, &off))
- goto out_unlock;
- ret = -EINVAL;
- if (off > IORING_MAX_REG_BUFFERS)
- goto out_unlock;
- ret = io_rsrc_data_alloc(&data, off);
- if (ret)
- goto out_unlock;
off = arg->dst_off;
i = arg->src_off;
@@ -986,6 +1010,20 @@ static int io_clone_buffers(struct io_ring_ctx *ctx, struct io_ring_ctx *src_ctx
/* Have a ref on the bufs now, drop src lock and re-grab our own lock */
mutex_unlock(&src_ctx->uring_lock);
mutex_lock(&ctx->uring_lock);
+
+ /*
+ * If asked for replace, put the old table. data->nodes[] holds both
+ * old and new nodes at this point.
+ */
+ if (arg->flags & IORING_REGISTER_DST_REPLACE)
+ io_rsrc_data_free(&ctx->buf_table);
+
+ /*
+ * ctx->buf_table should be empty now - either the contents are being
+ * replaced and we just freed the table, or someone raced setting up
+ * a buffer table while the clone was happening. If not empty, fall
+ * through to failure handling.
+ */
if (!ctx->buf_table.nr) {
ctx->buf_table = data;
return 0;
@@ -995,14 +1033,14 @@ static int io_clone_buffers(struct io_ring_ctx *ctx, struct io_ring_ctx *src_ctx
mutex_lock(&src_ctx->uring_lock);
/* someone raced setting up buffers, dump ours */
ret = -EBUSY;
- i = nbufs;
out_put_free:
+ i = data.nr;
while (i--) {
io_buffer_unmap(src_ctx, data.nodes[i]);
kfree(data.nodes[i]);
}
- io_rsrc_data_free(&data);
out_unlock:
+ io_rsrc_data_free(&data);
mutex_unlock(&src_ctx->uring_lock);
mutex_lock(&ctx->uring_lock);
return ret;
@@ -1022,12 +1060,12 @@ int io_register_clone_buffers(struct io_ring_ctx *ctx, void __user *arg)
struct file *file;
int ret;
- if (ctx->buf_table.nr)
- return -EBUSY;
if (copy_from_user(&buf, arg, sizeof(buf)))
return -EFAULT;
- if (buf.flags & ~IORING_REGISTER_SRC_REGISTERED)
+ if (buf.flags & ~(IORING_REGISTER_SRC_REGISTERED|IORING_REGISTER_DST_REPLACE))
return -EINVAL;
+ if (!(buf.flags & IORING_REGISTER_DST_REPLACE) && ctx->buf_table.nr)
+ return -EBUSY;
if (memchr_inv(buf.pad, 0, sizeof(buf.pad)))
return -EINVAL;
--
2.45.2
^ permalink raw reply related [flat|nested] 4+ messages in thread
end of thread, other threads:[~2024-10-31 1:46 UTC | newest]
Thread overview: 4+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2024-10-31 1:44 [PATCHSET v2 0/2] Add support for cloning partial buffer sets Jens Axboe
2024-10-31 1:44 ` [PATCH 1/2] io_uring/rsrc: allow cloning at an offset Jens Axboe
2024-10-31 1:44 ` [PATCH 2/2] io_uring/rsrc: allow cloning with node replacements Jens Axboe
-- strict thread matches above, loose matches on Subject: below --
2024-10-30 16:54 [PATCHSET 0/2] Add support for cloning partial buffer sets Jens Axboe
2024-10-30 16:54 ` [PATCH 1/2] io_uring/rsrc: allow cloning at an offset Jens Axboe
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.