Re: [PATCH v3 3/3] io_uring/zcrx: share an ifq between rings

io-uring.vger.kernel.org archive mirror
 help / color / mirror / Atom feed

From: Pavel Begunkov <asml.silence@gmail.com>
To: David Wei <dw@davidwei.uk>,
	io-uring@vger.kernel.org, netdev@vger.kernel.org
Cc: Jens Axboe <axboe@kernel.dk>
Subject: Re: [PATCH v3 3/3] io_uring/zcrx: share an ifq between rings
Date: Mon, 27 Oct 2025 11:47:51 +0000	[thread overview]
Message-ID: <60f630cf-0057-4675-afcd-2b4e46430a44@gmail.com> (raw)
In-Reply-To: <309cb5ce-b19a-47b8-ba82-e75f69fe5bb3@gmail.com>

On 10/27/25 10:20, Pavel Begunkov wrote:
> On 10/26/25 17:34, David Wei wrote:
>> Add a way to share an ifq from a src ring that is real i.e. bound to a
>> HW RX queue with other rings. This is done by passing a new flag
>> IORING_ZCRX_IFQ_REG_SHARE in the registration struct
>> io_uring_zcrx_ifq_reg, alongside the fd of the src ring and the ifq id
>> to be shared.
>>
>> To prevent the src ring or ifq from being cleaned up or freed while
>> there are still shared ifqs, take the appropriate refs on the src ring
>> (ctx->refs) and src ifq (ifq->refs).
>>
>> Signed-off-by: David Wei <dw@davidwei.uk>
>> ---
>>   include/uapi/linux/io_uring.h |  4 ++
>>   io_uring/zcrx.c               | 74 ++++++++++++++++++++++++++++++++++-
>>   2 files changed, 76 insertions(+), 2 deletions(-)
>>
>> diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h
>> index 04797a9b76bc..4da4552a4215 100644
>> --- a/include/uapi/linux/io_uring.h
>> +++ b/include/uapi/linux/io_uring.h
>> @@ -1063,6 +1063,10 @@ struct io_uring_zcrx_area_reg {
>>       __u64    __resv2[2];
>>   };
>> +enum io_uring_zcrx_ifq_reg_flags {
>> +    IORING_ZCRX_IFQ_REG_SHARE    = 1,
>> +};
>> +
>>   /*
>>    * Argument for IORING_REGISTER_ZCRX_IFQ
>>    */
>> diff --git a/io_uring/zcrx.c b/io_uring/zcrx.c
>> index 569cc0338acb..7418c959390a 100644
>> --- a/io_uring/zcrx.c
>> +++ b/io_uring/zcrx.c
>> @@ -22,10 +22,10 @@
>>   #include <uapi/linux/io_uring.h>
>>   #include "io_uring.h"
>> -#include "kbuf.h"
>>   #include "memmap.h"
>>   #include "zcrx.h"
>>   #include "rsrc.h"
>> +#include "register.h"
>>   #define IO_ZCRX_AREA_SUPPORTED_FLAGS    (IORING_ZCRX_AREA_DMABUF)
>> @@ -541,6 +541,67 @@ struct io_mapped_region *io_zcrx_get_region(struct io_ring_ctx *ctx,
>>       return ifq ? &ifq->region : NULL;
>>   }
>> +static int io_share_zcrx_ifq(struct io_ring_ctx *ctx,
>> +                 struct io_uring_zcrx_ifq_reg __user *arg,
>> +                 struct io_uring_zcrx_ifq_reg *reg)
>> +{
>> +    struct io_ring_ctx *src_ctx;
>> +    struct io_zcrx_ifq *src_ifq;
>> +    struct file *file;
>> +    int src_fd, ret;
>> +    u32 src_id, id;
>> +
>> +    src_fd = reg->if_idx;
>> +    src_id = reg->if_rxq;
>> +
>> +    file = io_uring_register_get_file(src_fd, false);
>> +    if (IS_ERR(file))
>> +        return PTR_ERR(file);
>> +
>> +    src_ctx = file->private_data;
>> +    if (src_ctx == ctx)
>> +        return -EBADFD;
>> +
>> +    mutex_unlock(&ctx->uring_lock);
>> +    io_lock_two_rings(ctx, src_ctx);
>> +
>> +    ret = -EINVAL;
>> +    src_ifq = xa_load(&src_ctx->zcrx_ctxs, src_id);
>> +    if (!src_ifq)
>> +        goto err_unlock;
>> +
>> +    percpu_ref_get(&src_ctx->refs);
>> +    refcount_inc(&src_ifq->refs);
>> +
>> +    scoped_guard(mutex, &ctx->mmap_lock) {
>> +        ret = xa_alloc(&ctx->zcrx_ctxs, &id, NULL, xa_limit_31b, GFP_KERNEL);
>> +        if (ret)
>> +            goto err_unlock;
>> +
>> +        ret = -ENOMEM;
>> +        if (xa_store(&ctx->zcrx_ctxs, id, src_ifq, GFP_KERNEL)) {
>> +            xa_erase(&ctx->zcrx_ctxs, id);
>> +            goto err_unlock;
>> +        }
> 
> It's just xa_alloc(..., src_ifq, ...);
> 
>> +    }
>> +
>> +    reg->zcrx_id = id;
>> +    if (copy_to_user(arg, reg, sizeof(*reg))) {
>> +        ret = -EFAULT;
>> +        goto err;
>> +    }
> 
> Better to do that before publishing zcrx into ctx->zcrx_ctxs
> 
>> +    mutex_unlock(&src_ctx->uring_lock);
>> +    fput(file);
>> +    return 0;
>> +err:
>> +    scoped_guard(mutex, &ctx->mmap_lock)
>> +        xa_erase(&ctx->zcrx_ctxs, id);
>> +err_unlock:
>> +    mutex_unlock(&src_ctx->uring_lock);
>> +    fput(file);
>> +    return ret;
>> +}
>> +
>>   int io_register_zcrx_ifq(struct io_ring_ctx *ctx,
>>                 struct io_uring_zcrx_ifq_reg __user *arg)
>>   {
>> @@ -566,6 +627,8 @@ int io_register_zcrx_ifq(struct io_ring_ctx *ctx,
>>           return -EINVAL;
>>       if (copy_from_user(&reg, arg, sizeof(reg)))
>>           return -EFAULT;
>> +    if (reg.flags & IORING_ZCRX_IFQ_REG_SHARE)
>> +        return io_share_zcrx_ifq(ctx, arg, &reg);
>>       if (copy_from_user(&rd, u64_to_user_ptr(reg.region_ptr), sizeof(rd)))
>>           return -EFAULT;
>>       if (!mem_is_zero(&reg.__resv, sizeof(reg.__resv)) ||
>> @@ -663,7 +726,7 @@ void io_unregister_zcrx_ifqs(struct io_ring_ctx *ctx)
>>               if (ifq)
>>                   xa_erase(&ctx->zcrx_ctxs, id);
>>           }
>> -        if (!ifq)
>> +        if (!ifq || ctx != ifq->ctx)
>>               break;
>>           io_zcrx_ifq_free(ifq);
>>       }
>> @@ -734,6 +797,13 @@ void io_shutdown_zcrx_ifqs(struct io_ring_ctx *ctx)
>>           if (xa_get_mark(&ctx->zcrx_ctxs, index, XA_MARK_0))
>>               continue;
>> +        /*
>> +         * Only shared ifqs want to put ctx->refs on the owning ifq
>> +         * ring. This matches the get in io_share_zcrx_ifq().
>> +         */
>> +        if (ctx != ifq->ctx)
>> +            percpu_ref_put(&ifq->ctx->refs);
> 
> After you put this and ifq->refs below down, the zcrx object can get
> destroyed, but this ctx might still have requests using the object.
> Waiting on ctx refs would ensure requests are killed, but that'd
> create a cycle.

Another concerning part is long term cross ctx referencing,
which is even worse than pp locking it up. I mentioned
that it'd be great to reverse the refcounting relation,
but that'd also need additional ground work to break
dependencies.

> 
>> +
>>           /* Safe to clean up from any ring. */
>>           if (refcount_dec_and_test(&ifq->refs)) {
>>               io_zcrx_scrub(ifq);
> 

-- 
Pavel Begunkov

next prev parent reply	other threads:[~2025-10-27 11:47 UTC|newest]

Thread overview: 12+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2025-10-26 17:34 [PATCH v3 0/3] io_uring zcrx ifq sharing David Wei
2025-10-26 17:34 ` [PATCH v3 1/3] io_uring/rsrc: rename and export io_lock_two_rings() David Wei
2025-10-27 10:04   ` Pavel Begunkov
2025-10-28 14:54     ` David Wei
2025-10-28 15:19       ` Pavel Begunkov
2025-10-26 17:34 ` [PATCH v3 2/3] io_uring/zcrx: add refcount to struct io_zcrx_ifq David Wei
2025-10-26 17:34 ` [PATCH v3 3/3] io_uring/zcrx: share an ifq between rings David Wei
2025-10-27 10:20   ` Pavel Begunkov
2025-10-27 11:47     ` Pavel Begunkov [this message]
2025-10-27 15:10       ` David Wei
2025-10-28 14:55     ` David Wei
2025-10-28 15:22       ` Pavel Begunkov

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=60f630cf-0057-4675-afcd-2b4e46430a44@gmail.com \
    --to=asml.silence@gmail.com \
    --cc=axboe@kernel.dk \
    --cc=dw@davidwei.uk \
    --cc=io-uring@vger.kernel.org \
    --cc=netdev@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).