From: Pavel Begunkov <asml.silence@gmail.com>
To: David Wei <dw@davidwei.uk>,
io-uring@vger.kernel.org, netdev@vger.kernel.org
Cc: Jens Axboe <axboe@kernel.dk>
Subject: Re: [PATCH v3 3/3] io_uring/zcrx: share an ifq between rings
Date: Mon, 27 Oct 2025 11:47:51 +0000 [thread overview]
Message-ID: <60f630cf-0057-4675-afcd-2b4e46430a44@gmail.com> (raw)
In-Reply-To: <309cb5ce-b19a-47b8-ba82-e75f69fe5bb3@gmail.com>
On 10/27/25 10:20, Pavel Begunkov wrote:
> On 10/26/25 17:34, David Wei wrote:
>> Add a way to share an ifq from a src ring that is real i.e. bound to a
>> HW RX queue with other rings. This is done by passing a new flag
>> IORING_ZCRX_IFQ_REG_SHARE in the registration struct
>> io_uring_zcrx_ifq_reg, alongside the fd of the src ring and the ifq id
>> to be shared.
>>
>> To prevent the src ring or ifq from being cleaned up or freed while
>> there are still shared ifqs, take the appropriate refs on the src ring
>> (ctx->refs) and src ifq (ifq->refs).
>>
>> Signed-off-by: David Wei <dw@davidwei.uk>
>> ---
>> include/uapi/linux/io_uring.h | 4 ++
>> io_uring/zcrx.c | 74 ++++++++++++++++++++++++++++++++++-
>> 2 files changed, 76 insertions(+), 2 deletions(-)
>>
>> diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h
>> index 04797a9b76bc..4da4552a4215 100644
>> --- a/include/uapi/linux/io_uring.h
>> +++ b/include/uapi/linux/io_uring.h
>> @@ -1063,6 +1063,10 @@ struct io_uring_zcrx_area_reg {
>> __u64 __resv2[2];
>> };
>> +enum io_uring_zcrx_ifq_reg_flags {
>> + IORING_ZCRX_IFQ_REG_SHARE = 1,
>> +};
>> +
>> /*
>> * Argument for IORING_REGISTER_ZCRX_IFQ
>> */
>> diff --git a/io_uring/zcrx.c b/io_uring/zcrx.c
>> index 569cc0338acb..7418c959390a 100644
>> --- a/io_uring/zcrx.c
>> +++ b/io_uring/zcrx.c
>> @@ -22,10 +22,10 @@
>> #include <uapi/linux/io_uring.h>
>> #include "io_uring.h"
>> -#include "kbuf.h"
>> #include "memmap.h"
>> #include "zcrx.h"
>> #include "rsrc.h"
>> +#include "register.h"
>> #define IO_ZCRX_AREA_SUPPORTED_FLAGS (IORING_ZCRX_AREA_DMABUF)
>> @@ -541,6 +541,67 @@ struct io_mapped_region *io_zcrx_get_region(struct io_ring_ctx *ctx,
>> return ifq ? &ifq->region : NULL;
>> }
>> +static int io_share_zcrx_ifq(struct io_ring_ctx *ctx,
>> + struct io_uring_zcrx_ifq_reg __user *arg,
>> + struct io_uring_zcrx_ifq_reg *reg)
>> +{
>> + struct io_ring_ctx *src_ctx;
>> + struct io_zcrx_ifq *src_ifq;
>> + struct file *file;
>> + int src_fd, ret;
>> + u32 src_id, id;
>> +
>> + src_fd = reg->if_idx;
>> + src_id = reg->if_rxq;
>> +
>> + file = io_uring_register_get_file(src_fd, false);
>> + if (IS_ERR(file))
>> + return PTR_ERR(file);
>> +
>> + src_ctx = file->private_data;
>> + if (src_ctx == ctx)
>> + return -EBADFD;
>> +
>> + mutex_unlock(&ctx->uring_lock);
>> + io_lock_two_rings(ctx, src_ctx);
>> +
>> + ret = -EINVAL;
>> + src_ifq = xa_load(&src_ctx->zcrx_ctxs, src_id);
>> + if (!src_ifq)
>> + goto err_unlock;
>> +
>> + percpu_ref_get(&src_ctx->refs);
>> + refcount_inc(&src_ifq->refs);
>> +
>> + scoped_guard(mutex, &ctx->mmap_lock) {
>> + ret = xa_alloc(&ctx->zcrx_ctxs, &id, NULL, xa_limit_31b, GFP_KERNEL);
>> + if (ret)
>> + goto err_unlock;
>> +
>> + ret = -ENOMEM;
>> + if (xa_store(&ctx->zcrx_ctxs, id, src_ifq, GFP_KERNEL)) {
>> + xa_erase(&ctx->zcrx_ctxs, id);
>> + goto err_unlock;
>> + }
>
> It's just xa_alloc(..., src_ifq, ...);
>
>> + }
>> +
>> + reg->zcrx_id = id;
>> + if (copy_to_user(arg, reg, sizeof(*reg))) {
>> + ret = -EFAULT;
>> + goto err;
>> + }
>
> Better to do that before publishing zcrx into ctx->zcrx_ctxs
>
>> + mutex_unlock(&src_ctx->uring_lock);
>> + fput(file);
>> + return 0;
>> +err:
>> + scoped_guard(mutex, &ctx->mmap_lock)
>> + xa_erase(&ctx->zcrx_ctxs, id);
>> +err_unlock:
>> + mutex_unlock(&src_ctx->uring_lock);
>> + fput(file);
>> + return ret;
>> +}
>> +
>> int io_register_zcrx_ifq(struct io_ring_ctx *ctx,
>> struct io_uring_zcrx_ifq_reg __user *arg)
>> {
>> @@ -566,6 +627,8 @@ int io_register_zcrx_ifq(struct io_ring_ctx *ctx,
>> return -EINVAL;
>> if (copy_from_user(®, arg, sizeof(reg)))
>> return -EFAULT;
>> + if (reg.flags & IORING_ZCRX_IFQ_REG_SHARE)
>> + return io_share_zcrx_ifq(ctx, arg, ®);
>> if (copy_from_user(&rd, u64_to_user_ptr(reg.region_ptr), sizeof(rd)))
>> return -EFAULT;
>> if (!mem_is_zero(®.__resv, sizeof(reg.__resv)) ||
>> @@ -663,7 +726,7 @@ void io_unregister_zcrx_ifqs(struct io_ring_ctx *ctx)
>> if (ifq)
>> xa_erase(&ctx->zcrx_ctxs, id);
>> }
>> - if (!ifq)
>> + if (!ifq || ctx != ifq->ctx)
>> break;
>> io_zcrx_ifq_free(ifq);
>> }
>> @@ -734,6 +797,13 @@ void io_shutdown_zcrx_ifqs(struct io_ring_ctx *ctx)
>> if (xa_get_mark(&ctx->zcrx_ctxs, index, XA_MARK_0))
>> continue;
>> + /*
>> + * Only shared ifqs want to put ctx->refs on the owning ifq
>> + * ring. This matches the get in io_share_zcrx_ifq().
>> + */
>> + if (ctx != ifq->ctx)
>> + percpu_ref_put(&ifq->ctx->refs);
>
> After you put this and ifq->refs below down, the zcrx object can get
> destroyed, but this ctx might still have requests using the object.
> Waiting on ctx refs would ensure requests are killed, but that'd
> create a cycle.
Another concerning part is long term cross ctx referencing,
which is even worse than pp locking it up. I mentioned
that it'd be great to reverse the refcounting relation,
but that'd also need additional ground work to break
dependencies.
>
>> +
>> /* Safe to clean up from any ring. */
>> if (refcount_dec_and_test(&ifq->refs)) {
>> io_zcrx_scrub(ifq);
>
--
Pavel Begunkov
next prev parent reply other threads:[~2025-10-27 11:47 UTC|newest]
Thread overview: 12+ messages / expand[flat|nested] mbox.gz Atom feed top
2025-10-26 17:34 [PATCH v3 0/3] io_uring zcrx ifq sharing David Wei
2025-10-26 17:34 ` [PATCH v3 1/3] io_uring/rsrc: rename and export io_lock_two_rings() David Wei
2025-10-27 10:04 ` Pavel Begunkov
2025-10-28 14:54 ` David Wei
2025-10-28 15:19 ` Pavel Begunkov
2025-10-26 17:34 ` [PATCH v3 2/3] io_uring/zcrx: add refcount to struct io_zcrx_ifq David Wei
2025-10-26 17:34 ` [PATCH v3 3/3] io_uring/zcrx: share an ifq between rings David Wei
2025-10-27 10:20 ` Pavel Begunkov
2025-10-27 11:47 ` Pavel Begunkov [this message]
2025-10-27 15:10 ` David Wei
2025-10-28 14:55 ` David Wei
2025-10-28 15:22 ` Pavel Begunkov
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=60f630cf-0057-4675-afcd-2b4e46430a44@gmail.com \
--to=asml.silence@gmail.com \
--cc=axboe@kernel.dk \
--cc=dw@davidwei.uk \
--cc=io-uring@vger.kernel.org \
--cc=netdev@vger.kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).