From: Pavel Begunkov <asml.silence@gmail.com>
To: io-uring@vger.kernel.org, netdev@vger.kernel.org,
bpf@vger.kernel.org, linux-kernel@vger.kernel.org
Cc: Jens Axboe <axboe@kernel.dk>, Alexei Starovoitov <ast@kernel.org>,
Daniel Borkmann <daniel@iogearbox.net>,
Andrii Nakryiko <andrii@kernel.org>,
Martin KaFai Lau <kafai@fb.com>, Song Liu <songliubraving@fb.com>,
Yonghong Song <yhs@fb.com>,
John Fastabend <john.fastabend@gmail.com>,
KP Singh <kpsingh@kernel.org>,
Horst Schirmeier <horst.schirmeier@tu-dortmund.de>,
"Franz-B . Tuneke" <franz-bernhard.tuneke@tu-dortmund.de>,
Christian Dietrich <stettberger@dokucode.de>
Subject: Re: [PATCH 14/23] io_uring: add support for bpf requests
Date: Fri, 21 May 2021 01:42:57 +0100 [thread overview]
Message-ID: <70ae2078-689f-79d3-e067-2bb720dc9fa5@gmail.com> (raw)
In-Reply-To: <cc2b848d112d86bd1f4ea3f2813d0a016e44a364.1621424513.git.asml.silence@gmail.com>
On 5/19/21 3:13 PM, Pavel Begunkov wrote:
> Wire up a new io_uring operation type IORING_OP_BPF, which executes a
> specified BPF program from the registered prog table. It doesn't allow
> to do anything useful for now, no BPF functions are allowed apart from
> basic ones.
>
> Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
> ---
> fs/io_uring.c | 92 +++++++++++++++++++++++++++++++++++
> include/uapi/linux/io_uring.h | 1 +
> 2 files changed, 93 insertions(+)
>
> diff --git a/fs/io_uring.c b/fs/io_uring.c
> index b13cbcd5c47b..20fddc5945f2 100644
> --- a/fs/io_uring.c
> +++ b/fs/io_uring.c
> @@ -682,6 +682,11 @@ struct io_unlink {
> struct filename *filename;
> };
>
> +struct io_bpf {
> + struct file *file;
> + struct bpf_prog *prog;
> +};
> +
> struct io_completion {
> struct file *file;
> struct list_head list;
> @@ -826,6 +831,7 @@ struct io_kiocb {
> struct io_shutdown shutdown;
> struct io_rename rename;
> struct io_unlink unlink;
> + struct io_bpf bpf;
> /* use only after cleaning per-op data, see io_clean_op() */
> struct io_completion compl;
> };
> @@ -875,6 +881,9 @@ struct io_defer_entry {
> u32 seq;
> };
>
> +struct io_bpf_ctx {
> +};
> +
> struct io_op_def {
> /* needs req->file assigned */
> unsigned needs_file : 1;
> @@ -1039,6 +1048,7 @@ static const struct io_op_def io_op_defs[] = {
> },
> [IORING_OP_RENAMEAT] = {},
> [IORING_OP_UNLINKAT] = {},
> + [IORING_OP_BPF] = {},
> };
>
> static bool io_disarm_next(struct io_kiocb *req);
> @@ -1070,6 +1080,7 @@ static void io_rsrc_put_work(struct work_struct *work);
> static void io_req_task_queue(struct io_kiocb *req);
> static void io_submit_flush_completions(struct io_comp_state *cs,
> struct io_ring_ctx *ctx);
> +static void io_bpf_run(struct io_kiocb *req, unsigned int issue_flags);
> static bool io_poll_remove_waitqs(struct io_kiocb *req);
> static int io_req_prep_async(struct io_kiocb *req);
>
> @@ -3931,6 +3942,53 @@ static int io_openat(struct io_kiocb *req, unsigned int issue_flags)
> return io_openat2(req, issue_flags);
> }
>
> +static int io_bpf_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
> +{
> + struct io_ring_ctx *ctx = req->ctx;
> + struct bpf_prog *prog;
> + unsigned int idx;
> +
> + if (unlikely(ctx->flags & (IORING_SETUP_IOPOLL|IORING_SETUP_SQPOLL)))
> + return -EINVAL;
> + if (unlikely(req->flags & (REQ_F_FIXED_FILE | REQ_F_BUFFER_SELECT)))
> + return -EINVAL;
> + if (sqe->ioprio || sqe->len || sqe->cancel_flags)
> + return -EINVAL;
> + if (sqe->addr)
> + return -EINVAL;
> +
> + idx = READ_ONCE(sqe->off);
> + if (unlikely(idx >= ctx->nr_bpf_progs))
> + return -EFAULT;
> + idx = array_index_nospec(idx, ctx->nr_bpf_progs);
> + prog = ctx->bpf_progs[idx].prog;
> + if (!prog)
> + return -EFAULT;
> +
> + req->bpf.prog = prog;
> + return 0;
> +}
> +
> +static void io_bpf_run_task_work(struct callback_head *cb)
> +{
> + struct io_kiocb *req = container_of(cb, struct io_kiocb, task_work);
> + struct io_ring_ctx *ctx = req->ctx;
> +
> + mutex_lock(&ctx->uring_lock);
> + io_bpf_run(req, 0);
> + mutex_unlock(&ctx->uring_lock);
> +}
> +
> +static int io_bpf(struct io_kiocb *req, unsigned int issue_flags)
> +{
> + init_task_work(&req->task_work, io_bpf_run_task_work);
> + if (unlikely(io_req_task_work_add(req))) {
> + req_ref_get(req);
> + io_req_task_queue_fail(req, -ECANCELED);
> + }
> + return 0;
> +}
> +
> static int io_remove_buffers_prep(struct io_kiocb *req,
> const struct io_uring_sqe *sqe)
> {
> @@ -6002,6 +6060,8 @@ static int io_req_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
> return io_renameat_prep(req, sqe);
> case IORING_OP_UNLINKAT:
> return io_unlinkat_prep(req, sqe);
> + case IORING_OP_BPF:
> + return io_bpf_prep(req, sqe);
> }
>
> printk_once(KERN_WARNING "io_uring: unhandled opcode %d\n",
> @@ -6269,6 +6329,9 @@ static int io_issue_sqe(struct io_kiocb *req, unsigned int issue_flags)
> case IORING_OP_UNLINKAT:
> ret = io_unlinkat(req, issue_flags);
> break;
> + case IORING_OP_BPF:
> + ret = io_bpf(req, issue_flags);
> + break;
> default:
> ret = -EINVAL;
> break;
> @@ -10303,6 +10366,35 @@ const struct bpf_verifier_ops bpf_io_uring_verifier_ops = {
> .is_valid_access = io_bpf_is_valid_access,
> };
>
> +static void io_bpf_run(struct io_kiocb *req, unsigned int issue_flags)
> +{
> + struct io_ring_ctx *ctx = req->ctx;
> + struct io_bpf_ctx bpf_ctx;
> + struct bpf_prog *prog;
> + int ret = -EAGAIN;
> +
> + lockdep_assert_held(&req->ctx->uring_lock);
> +
> + if (unlikely(percpu_ref_is_dying(&ctx->refs) ||
> + atomic_read(&req->task->io_uring->in_idle)))
> + goto done;
> +
> + memset(&bpf_ctx, 0, sizeof(bpf_ctx));
> + prog = req->bpf.prog;
> +
> + if (prog->aux->sleepable) {
Looks forgot to amend, the condition should be inversed.
> + rcu_read_lock();
> + bpf_prog_run_pin_on_cpu(req->bpf.prog, &bpf_ctx);
> + rcu_read_unlock();
> + } else {
> + bpf_prog_run_pin_on_cpu(req->bpf.prog, &bpf_ctx);
> + }
> +
> + ret = 0;
> +done:
> + __io_req_complete(req, issue_flags, ret, 0);
> +}
> +
> SYSCALL_DEFINE4(io_uring_register, unsigned int, fd, unsigned int, opcode,
> void __user *, arg, unsigned int, nr_args)
> {
> diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h
> index b450f41d7389..25ab804670e1 100644
> --- a/include/uapi/linux/io_uring.h
> +++ b/include/uapi/linux/io_uring.h
> @@ -138,6 +138,7 @@ enum {
> IORING_OP_SHUTDOWN,
> IORING_OP_RENAMEAT,
> IORING_OP_UNLINKAT,
> + IORING_OP_BPF,
>
> /* this goes last, obviously */
> IORING_OP_LAST,
>
--
Pavel Begunkov
next prev parent reply other threads:[~2021-05-21 0:43 UTC|newest]
Thread overview: 39+ messages / expand[flat|nested] mbox.gz Atom feed top
2021-05-19 14:13 [RFC v2 00/23] io_uring BPF requests Pavel Begunkov
2021-05-19 14:13 ` [PATCH 01/23] io_uring: shuffle rarely used ctx fields Pavel Begunkov
2021-05-20 21:46 ` Song Liu
2021-05-20 22:46 ` Pavel Begunkov
2021-05-19 14:13 ` [PATCH 02/23] io_uring: localise fixed resources fields Pavel Begunkov
2021-05-19 14:13 ` [PATCH 03/23] io_uring: remove dependency on ring->sq/cq_entries Pavel Begunkov
2021-05-19 14:13 ` [PATCH 04/23] io_uring: deduce cq_mask from cq_entries Pavel Begunkov
2021-05-19 14:13 ` [PATCH 05/23] io_uring: kill cached_cq_overflow Pavel Begunkov
2021-05-19 14:13 ` [PATCH 06/23] io_uring: rename io_get_cqring Pavel Begunkov
2021-05-19 14:13 ` [PATCH 07/23] io_uring: extract struct for CQ Pavel Begunkov
2021-05-19 14:13 ` [PATCH 08/23] io_uring: internally pass CQ indexes Pavel Begunkov
2021-05-19 14:13 ` [PATCH 09/23] io_uring: extract cq size helper Pavel Begunkov
2021-05-19 14:13 ` [PATCH 10/23] io_uring: add support for multiple CQs Pavel Begunkov
2021-05-19 14:13 ` [PATCH 11/23] io_uring: enable mmap'ing additional CQs Pavel Begunkov
2021-05-19 14:13 ` [PATCH 12/23] bpf: add IOURING program type Pavel Begunkov
2021-05-20 23:34 ` Song Liu
2021-05-21 0:56 ` Pavel Begunkov
2021-05-19 14:13 ` [PATCH 13/23] io_uring: implement bpf prog registration Pavel Begunkov
2021-05-20 23:45 ` Song Liu
2021-05-21 0:43 ` Pavel Begunkov
2021-05-19 14:13 ` [PATCH 14/23] io_uring: add support for bpf requests Pavel Begunkov
2021-05-21 0:42 ` Pavel Begunkov [this message]
2021-05-19 14:13 ` [PATCH 15/23] io_uring: enable BPF to submit SQEs Pavel Begunkov
2021-05-21 0:06 ` Song Liu
2021-05-21 1:07 ` Alexei Starovoitov
2021-05-21 9:33 ` Pavel Begunkov
2021-05-19 14:13 ` [PATCH 16/23] io_uring: enable bpf to submit CQEs Pavel Begunkov
2021-05-19 14:13 ` [PATCH 17/23] io_uring: enable bpf to reap CQEs Pavel Begunkov
2021-05-19 14:13 ` [PATCH 18/23] libbpf: support io_uring Pavel Begunkov
2021-05-19 17:38 ` Andrii Nakryiko
2021-05-20 9:58 ` Pavel Begunkov
2021-05-20 17:23 ` Andrii Nakryiko
2021-05-19 14:13 ` [PATCH 19/23] io_uring: pass user_data to bpf executor Pavel Begunkov
2021-05-19 14:13 ` [PATCH 20/23] bpf: Add bpf_copy_to_user() helper Pavel Begunkov
2021-05-19 14:13 ` [PATCH 21/23] io_uring: wire bpf copy to user Pavel Begunkov
2021-05-19 14:13 ` [PATCH 22/23] io_uring: don't wait on CQ exclusively Pavel Begunkov
2021-05-19 14:13 ` [PATCH 23/23] io_uring: enable bpf reqs to wait for CQs Pavel Begunkov
2021-05-21 0:35 ` [RFC v2 00/23] io_uring BPF requests Song Liu
2021-05-21 0:58 ` Pavel Begunkov
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=70ae2078-689f-79d3-e067-2bb720dc9fa5@gmail.com \
--to=asml.silence@gmail.com \
--cc=andrii@kernel.org \
--cc=ast@kernel.org \
--cc=axboe@kernel.dk \
--cc=bpf@vger.kernel.org \
--cc=daniel@iogearbox.net \
--cc=franz-bernhard.tuneke@tu-dortmund.de \
--cc=horst.schirmeier@tu-dortmund.de \
--cc=io-uring@vger.kernel.org \
--cc=john.fastabend@gmail.com \
--cc=kafai@fb.com \
--cc=kpsingh@kernel.org \
--cc=linux-kernel@vger.kernel.org \
--cc=netdev@vger.kernel.org \
--cc=songliubraving@fb.com \
--cc=stettberger@dokucode.de \
--cc=yhs@fb.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.