From: Ming Lei <ming.lei@redhat.com>
To: Pavel Begunkov <asml.silence@gmail.com>
Cc: io-uring@vger.kernel.org, ming.lei@redhat.com
Subject: Re: [RFC 2/3] io_uring/bpf: allow to register and run BPF programs
Date: Wed, 13 Nov 2024 16:21:48 +0800 [thread overview]
Message-ID: <ZzRhnDXxkahNB0rx@fedora> (raw)
In-Reply-To: <cffec449e9f6a37b0701f2a8fdd37688db25be55.1731285516.git.asml.silence@gmail.com>
On Mon, Nov 11, 2024 at 01:50:45AM +0000, Pavel Begunkov wrote:
> Let the user to register a BPF_PROG_TYPE_IOURING BPF program to a ring.
> The progrma will be run in the waiting loop every time something
> happens, i.e. the task was woken up by a task_work / signal / etc.
>
> Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
> ---
> include/linux/io_uring_types.h | 4 +++
> include/uapi/linux/io_uring.h | 9 +++++
> io_uring/bpf.c | 63 ++++++++++++++++++++++++++++++++++
> io_uring/bpf.h | 41 ++++++++++++++++++++++
> io_uring/io_uring.c | 15 ++++++++
> io_uring/register.c | 7 ++++
> 6 files changed, 139 insertions(+)
> create mode 100644 io_uring/bpf.h
>
> diff --git a/include/linux/io_uring_types.h b/include/linux/io_uring_types.h
> index ad5001102c86..50cee0d3622e 100644
> --- a/include/linux/io_uring_types.h
> +++ b/include/linux/io_uring_types.h
> @@ -8,6 +8,8 @@
> #include <linux/llist.h>
> #include <uapi/linux/io_uring.h>
>
> +struct io_bpf_ctx;
> +
> enum {
> /*
> * A hint to not wake right away but delay until there are enough of
> @@ -246,6 +248,8 @@ struct io_ring_ctx {
>
> enum task_work_notify_mode notify_method;
> unsigned sq_thread_idle;
> +
> + struct io_bpf_ctx *bpf_ctx;
> } ____cacheline_aligned_in_smp;
>
> /* submission data */
> diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h
> index ba373deb8406..f2c2fefc8514 100644
> --- a/include/uapi/linux/io_uring.h
> +++ b/include/uapi/linux/io_uring.h
> @@ -634,6 +634,8 @@ enum io_uring_register_op {
> /* register fixed io_uring_reg_wait arguments */
> IORING_REGISTER_CQWAIT_REG = 34,
>
> + IORING_REGISTER_BPF = 35,
> +
> /* this goes last */
> IORING_REGISTER_LAST,
>
> @@ -905,6 +907,13 @@ enum io_uring_socket_op {
> SOCKET_URING_OP_SETSOCKOPT,
> };
>
> +struct io_uring_bpf_reg {
> + __u64 prog_fd;
> + __u32 flags;
> + __u32 resv1;
> + __u64 resv2[2];
> +};
> +
> #ifdef __cplusplus
> }
> #endif
> diff --git a/io_uring/bpf.c b/io_uring/bpf.c
> index 6eb0c47b4aa9..8b7c74761c63 100644
> --- a/io_uring/bpf.c
> +++ b/io_uring/bpf.c
> @@ -1,6 +1,9 @@
> // SPDX-License-Identifier: GPL-2.0
>
> #include <linux/bpf.h>
> +#include <linux/filter.h>
> +
> +#include "bpf.h"
>
> static const struct bpf_func_proto *
> io_bpf_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
> @@ -22,3 +25,63 @@ const struct bpf_verifier_ops bpf_io_uring_verifier_ops = {
> .get_func_proto = io_bpf_func_proto,
> .is_valid_access = io_bpf_is_valid_access,
> };
> +
> +int io_run_bpf(struct io_ring_ctx *ctx)
> +{
> + struct io_bpf_ctx *bc = ctx->bpf_ctx;
> + int ret;
> +
> + mutex_lock(&ctx->uring_lock);
> + ret = bpf_prog_run_pin_on_cpu(bc->prog, bc);
> + mutex_unlock(&ctx->uring_lock);
> + return ret;
> +}
> +
> +int io_unregister_bpf(struct io_ring_ctx *ctx)
> +{
> + struct io_bpf_ctx *bc = ctx->bpf_ctx;
> +
> + if (!bc)
> + return -ENXIO;
> + bpf_prog_put(bc->prog);
> + kfree(bc);
> + ctx->bpf_ctx = NULL;
> + return 0;
> +}
> +
> +int io_register_bpf(struct io_ring_ctx *ctx, void __user *arg,
> + unsigned int nr_args)
> +{
> + struct __user io_uring_bpf_reg *bpf_reg_usr = arg;
> + struct io_uring_bpf_reg bpf_reg;
> + struct io_bpf_ctx *bc;
> + struct bpf_prog *prog;
> +
> + if (!(ctx->flags & IORING_SETUP_DEFER_TASKRUN))
> + return -EOPNOTSUPP;
> +
> + if (nr_args != 1)
> + return -EINVAL;
> + if (copy_from_user(&bpf_reg, bpf_reg_usr, sizeof(bpf_reg)))
> + return -EFAULT;
> + if (bpf_reg.flags || bpf_reg.resv1 ||
> + bpf_reg.resv2[0] || bpf_reg.resv2[1])
> + return -EINVAL;
> +
> + if (ctx->bpf_ctx)
> + return -ENXIO;
> +
> + bc = kzalloc(sizeof(*bc), GFP_KERNEL);
> + if (!bc)
> + return -ENOMEM;
> +
> + prog = bpf_prog_get_type(bpf_reg.prog_fd, BPF_PROG_TYPE_IOURING);
> + if (IS_ERR(prog)) {
> + kfree(bc);
> + return PTR_ERR(prog);
> + }
> +
> + bc->prog = prog;
> + ctx->bpf_ctx = bc;
> + return 0;
> +}
> diff --git a/io_uring/bpf.h b/io_uring/bpf.h
> new file mode 100644
> index 000000000000..2b4e555ff07a
> --- /dev/null
> +++ b/io_uring/bpf.h
> @@ -0,0 +1,41 @@
> +// SPDX-License-Identifier: GPL-2.0
> +#ifndef IOU_BPF_H
> +#define IOU_BPF_H
> +
> +#include <linux/io_uring/bpf.h>
> +#include <linux/io_uring_types.h>
> +
> +struct bpf_prog;
> +
> +struct io_bpf_ctx {
> + struct io_bpf_ctx_kern kern;
> + struct bpf_prog *prog;
> +};
> +
> +static inline bool io_bpf_enabled(struct io_ring_ctx *ctx)
> +{
> + return IS_ENABLED(CONFIG_BPF) && ctx->bpf_ctx != NULL;
> +}
> +
> +#ifdef CONFIG_BPF
> +int io_register_bpf(struct io_ring_ctx *ctx, void __user *arg,
> + unsigned int nr_args);
> +int io_unregister_bpf(struct io_ring_ctx *ctx);
> +int io_run_bpf(struct io_ring_ctx *ctx);
> +
> +#else
> +static inline int io_register_bpf(struct io_ring_ctx *ctx, void __user *arg,
> + unsigned int nr_args)
> +{
> + return -EOPNOTSUPP;
> +}
> +static inline int io_unregister_bpf(struct io_ring_ctx *ctx)
> +{
> + return -EOPNOTSUPP;
> +}
> +static inline int io_run_bpf(struct io_ring_ctx *ctx)
> +{
> +}
> +#endif
> +
> +#endif
> diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c
> index f34fa1ead2cf..82599e2a888a 100644
> --- a/io_uring/io_uring.c
> +++ b/io_uring/io_uring.c
> @@ -104,6 +104,7 @@
> #include "rw.h"
> #include "alloc_cache.h"
> #include "eventfd.h"
> +#include "bpf.h"
>
> #define SQE_COMMON_FLAGS (IOSQE_FIXED_FILE | IOSQE_IO_LINK | \
> IOSQE_IO_HARDLINK | IOSQE_ASYNC)
> @@ -2834,6 +2835,12 @@ static int io_cqring_wait(struct io_ring_ctx *ctx, int min_events, u32 flags,
>
> io_napi_busy_loop(ctx, &iowq);
>
> + if (io_bpf_enabled(ctx)) {
> + ret = io_run_bpf(ctx);
> + if (ret == IOU_BPF_RET_STOP)
> + return 0;
> + }
> +
> trace_io_uring_cqring_wait(ctx, min_events);
> do {
> unsigned long check_cq;
> @@ -2879,6 +2886,13 @@ static int io_cqring_wait(struct io_ring_ctx *ctx, int min_events, u32 flags,
> if (ret < 0)
> break;
>
> + if (io_bpf_enabled(ctx)) {
> + ret = io_run_bpf(ctx);
> + if (ret == IOU_BPF_RET_STOP)
> + break;
> + continue;
> + }
I believe 'struct_ops' is much simpler to run the prog and return the result.
Then you needn't any bpf core change and the bpf register code.
Thanks,
Ming
next prev parent reply other threads:[~2024-11-13 8:22 UTC|newest]
Thread overview: 8+ messages / expand[flat|nested] mbox.gz Atom feed top
2024-11-11 1:50 [RFC 0/3] Add BPF for io_uring Pavel Begunkov
2024-11-11 1:50 ` [RFC 1/3] bpf/io_uring: add io_uring program type Pavel Begunkov
2024-11-11 1:50 ` [RFC 2/3] io_uring/bpf: allow to register and run BPF programs Pavel Begunkov
2024-11-13 8:21 ` Ming Lei [this message]
2024-11-13 13:09 ` Pavel Begunkov
2024-11-11 1:50 ` [RFC 3/3] io_uring/bpf: add kfuncs for " Pavel Begunkov
2024-11-13 8:13 ` [RFC 0/3] Add BPF for io_uring Ming Lei
2024-11-13 13:09 ` Pavel Begunkov
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=ZzRhnDXxkahNB0rx@fedora \
--to=ming.lei@redhat.com \
--cc=asml.silence@gmail.com \
--cc=io-uring@vger.kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.