From: "Toke Høiland-Jørgensen" <toke@redhat.com>
To: Andrii Nakryiko <andrii.nakryiko@gmail.com>
Cc: Alexei Starovoitov <ast@kernel.org>,
Daniel Borkmann <daniel@iogearbox.net>,
Martin KaFai Lau <kafai@fb.com>, Song Liu <songliubraving@fb.com>,
Yonghong Song <yhs@fb.com>, Andrii Nakryiko <andriin@fb.com>,
John Fastabend <john.fastabend@gmail.com>,
Jiri Olsa <jolsa@redhat.com>,
Eelco Chaudron <echaudro@redhat.com>,
KP Singh <kpsingh@chromium.org>,
Networking <netdev@vger.kernel.org>, bpf <bpf@vger.kernel.org>
Subject: Re: [PATCH RESEND bpf-next v3 3/9] bpf: wrap prog->aux->linked_prog in a bpf_tracing_link
Date: Fri, 11 Sep 2020 22:56:23 +0200 [thread overview]
Message-ID: <875z8k9gnc.fsf@toke.dk> (raw)
In-Reply-To: <CAEf4Bzb=va0n3GMaYx-Kk7yCpsUK2iDMjVh2O2bm=9q-troH9A@mail.gmail.com>
Andrii Nakryiko <andrii.nakryiko@gmail.com> writes:
> On Fri, Sep 11, 2020 at 3:00 AM Toke Høiland-Jørgensen <toke@redhat.com> wrote:
>>
>> From: Toke Høiland-Jørgensen <toke@redhat.com>
>>
>> The bpf_tracing_link structure is a convenient data structure to contain
>> the reference to a linked program; in preparation for supporting multiple
>> attachments for the same freplace program, move the linked_prog in
>> prog->aux into a bpf_tracing_link wrapper.
>>
>> With this change, it is no longer possible to attach the same tracing
>> program multiple times (detaching in-between), since the reference from the
>> tracing program to the target disappears on the first attach. However,
>> since the next patch will let the caller supply an attach target, that will
>> also make it possible to attach to the same place multiple times.
>>
>> Signed-off-by: Toke Høiland-Jørgensen <toke@redhat.com>
>> ---
>> include/linux/bpf.h | 21 +++++++++---
>> kernel/bpf/btf.c | 13 +++++---
>> kernel/bpf/core.c | 5 +--
>> kernel/bpf/syscall.c | 81 +++++++++++++++++++++++++++++++++++++----------
>> kernel/bpf/trampoline.c | 12 ++-----
>> kernel/bpf/verifier.c | 13 +++++---
>> 6 files changed, 102 insertions(+), 43 deletions(-)
>>
>> diff --git a/include/linux/bpf.h b/include/linux/bpf.h
>> index 7f19c3216370..722c60f1c1fc 100644
>> --- a/include/linux/bpf.h
>> +++ b/include/linux/bpf.h
>> @@ -26,6 +26,7 @@ struct bpf_verifier_log;
>> struct perf_event;
>> struct bpf_prog;
>> struct bpf_prog_aux;
>> +struct bpf_tracing_link;
>> struct bpf_map;
>> struct sock;
>> struct seq_file;
>> @@ -614,8 +615,8 @@ static __always_inline unsigned int bpf_dispatcher_nop_func(
>> }
>> #ifdef CONFIG_BPF_JIT
>> struct bpf_trampoline *bpf_trampoline_lookup(u64 key);
>> -int bpf_trampoline_link_prog(struct bpf_prog *prog);
>> -int bpf_trampoline_unlink_prog(struct bpf_prog *prog);
>> +int bpf_trampoline_link_prog(struct bpf_prog *prog, struct bpf_trampoline *tr);
>> +int bpf_trampoline_unlink_prog(struct bpf_prog *prog, struct bpf_trampoline *tr);
>> int bpf_trampoline_get(u64 key, void *addr,
>> struct btf_func_model *fmodel,
>> struct bpf_trampoline **trampoline);
>> @@ -667,11 +668,13 @@ static inline struct bpf_trampoline *bpf_trampoline_lookup(u64 key)
>> {
>> return NULL;
>> }
>> -static inline int bpf_trampoline_link_prog(struct bpf_prog *prog)
>> +static inline int bpf_trampoline_link_prog(struct bpf_prog *prog,
>> + struct bpf_trampoline *tr)
>> {
>> return -ENOTSUPP;
>> }
>> -static inline int bpf_trampoline_unlink_prog(struct bpf_prog *prog)
>> +static inline int bpf_trampoline_unlink_prog(struct bpf_prog *prog,
>> + struct bpf_trampoline *tr)
>> {
>> return -ENOTSUPP;
>> }
>> @@ -740,14 +743,13 @@ struct bpf_prog_aux {
>> u32 max_rdonly_access;
>> u32 max_rdwr_access;
>> const struct bpf_ctx_arg_aux *ctx_arg_info;
>> - struct bpf_prog *linked_prog;
>> + struct bpf_tracing_link *tgt_link;
>> bool verifier_zext; /* Zero extensions has been inserted by verifier. */
>> bool offload_requested;
>> bool attach_btf_trace; /* true if attaching to BTF-enabled raw tp */
>> bool func_proto_unreliable;
>> bool sleepable;
>> enum bpf_tramp_prog_type trampoline_prog_type;
>> - struct bpf_trampoline *trampoline;
>> struct hlist_node tramp_hlist;
>> /* BTF_KIND_FUNC_PROTO for valid attach_btf_id */
>> const struct btf_type *attach_func_proto;
>> @@ -827,6 +829,13 @@ struct bpf_link {
>> struct work_struct work;
>> };
>>
>> +struct bpf_tracing_link {
>> + struct bpf_link link;
>> + enum bpf_attach_type attach_type;
>> + struct bpf_trampoline *trampoline;
>> + struct bpf_prog *tgt_prog;
>> +};
>> +
>> struct bpf_link_ops {
>> void (*release)(struct bpf_link *link);
>> void (*dealloc)(struct bpf_link *link);
>> diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c
>> index 2ace56c99c36..e10f13f8251c 100644
>> --- a/kernel/bpf/btf.c
>> +++ b/kernel/bpf/btf.c
>> @@ -3706,10 +3706,10 @@ struct btf *btf_parse_vmlinux(void)
>>
>> struct btf *bpf_prog_get_target_btf(const struct bpf_prog *prog)
>> {
>> - struct bpf_prog *tgt_prog = prog->aux->linked_prog;
>> + struct bpf_tracing_link *tgt_link = prog->aux->tgt_link;
>>
>> - if (tgt_prog) {
>> - return tgt_prog->aux->btf;
>> + if (tgt_link && tgt_link->tgt_prog) {
>> + return tgt_link->tgt_prog->aux->btf;
>> } else {
>> return btf_vmlinux;
>> }
>> @@ -3733,14 +3733,17 @@ bool btf_ctx_access(int off, int size, enum bpf_access_type type,
>> struct bpf_insn_access_aux *info)
>> {
>> const struct btf_type *t = prog->aux->attach_func_proto;
>> - struct bpf_prog *tgt_prog = prog->aux->linked_prog;
>> struct btf *btf = bpf_prog_get_target_btf(prog);
>> const char *tname = prog->aux->attach_func_name;
>> struct bpf_verifier_log *log = info->log;
>> + struct bpf_prog *tgt_prog = NULL;
>> const struct btf_param *args;
>> u32 nr_args, arg;
>> int i, ret;
>>
>> + if (prog->aux->tgt_link)
>> + tgt_prog = prog->aux->tgt_link->tgt_prog;
>> +
>> if (off % 8) {
>> bpf_log(log, "func '%s' offset %d is not multiple of 8\n",
>> tname, off);
>> @@ -4572,7 +4575,7 @@ int btf_prepare_func_args(struct bpf_verifier_env *env, int subprog,
>> return -EFAULT;
>> }
>> if (prog_type == BPF_PROG_TYPE_EXT)
>> - prog_type = prog->aux->linked_prog->type;
>> + prog_type = prog->aux->tgt_link->tgt_prog->type;
>>
>> t = btf_type_by_id(btf, t->type);
>> if (!t || !btf_type_is_func_proto(t)) {
>> diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
>> index ed0b3578867c..54c125cec218 100644
>> --- a/kernel/bpf/core.c
>> +++ b/kernel/bpf/core.c
>> @@ -2130,7 +2130,6 @@ static void bpf_prog_free_deferred(struct work_struct *work)
>> if (aux->prog->has_callchain_buf)
>> put_callchain_buffers();
>> #endif
>> - bpf_trampoline_put(aux->trampoline);
>> for (i = 0; i < aux->func_cnt; i++)
>> bpf_jit_free(aux->func[i]);
>> if (aux->func_cnt) {
>> @@ -2146,8 +2145,8 @@ void bpf_prog_free(struct bpf_prog *fp)
>> {
>> struct bpf_prog_aux *aux = fp->aux;
>>
>> - if (aux->linked_prog)
>> - bpf_prog_put(aux->linked_prog);
>> + if (aux->tgt_link)
>> + bpf_link_put(&aux->tgt_link->link);
>
> Until the link is primed, you shouldn't bpf_link_put() it. At this
> stage the link itself is just a piece of memory that needs to be
> kfree()'d. And your circular dependency problem doesn't exist anymore.
> You'll have to put a trampoline and target prog manually here, though
> (but you have a similar problem below as well, so might just have a
> small helper to do this). But I think it's simpler that relying on an
> artificial "defunct" state of not-yet-activated bpf_link, which you do
> with the dance around link->prog = NULL.
Yeah, makes sense. I initially figured that would be 'breaking the
abstraction' of bpf_link, but I ended up having to do that anyway, so
you're right I might as well treat it as a piece of memory here.
[...]
>> @@ -2574,14 +2614,16 @@ static int bpf_tracing_prog_attach(struct bpf_prog *prog)
>> goto out_put_prog;
>> }
>>
>> - link = kzalloc(sizeof(*link), GFP_USER);
>> + link = READ_ONCE(prog->aux->tgt_link);
>> if (!link) {
>> - err = -ENOMEM;
>> + err = -ENOENT;
>> + goto out_put_prog;
>> + }
>> + olink = cmpxchg(&prog->aux->tgt_link, link, NULL);
>> + if (olink != link) {
>> + err = -ENOENT;
>> goto out_put_prog;
>> }
>
> Wouldn't single xchg to NULL be sufficient to achieve the same?
> READ_ONCE + cmpxchg seems unnecessary to me.
It would, but in the next patch I'm introducing a check on the contents
of the link before cmpxchg'ing it, so figured it was easier to just use
the same pattern here.
>> - bpf_link_init(&link->link, BPF_LINK_TYPE_TRACING,
>> - &bpf_tracing_link_lops, prog);
>> - link->attach_type = prog->expected_attach_type;
>>
>> err = bpf_link_prime(&link->link, &link_primer);
>> if (err) {
>
> if priming errors out, you need to put target prog and trampoline,
> kfree(link) won't do it (and calling bpf_link_cleanup() is not correct
> before priming). See above as well.
Ah yes, good catch!
> BTW, one interesting side effect of all this is that if your initial
> attach failed, you won't be able to try again, because
> prog->aux->tgt_link is gone. If that's the problem, we'll need to
> introduce locking and copy that link, try to attach, then clear out
> prog->aug->tgt_link only if we succeeded. Just bringing this up, as it
> might not be obvious (or I might be wrong :).
Yeah, did think about that. From a purist PoV you're right that a
"destructive attempt" is not ideal; but we already agreed that clearing
out the link on attach was an acceptable change in behaviour. And I
figured that a failure in link_prim() or trampoline_link_prog() would be
quite rare, so not something we'd want to expend a lot of effort
ensuring was really atomic...
-Toke
next prev parent reply other threads:[~2020-09-11 20:56 UTC|newest]
Thread overview: 20+ messages / expand[flat|nested] mbox.gz Atom feed top
2020-09-11 9:59 [PATCH RESEND bpf-next v3 0/9] bpf: Support multi-attach for freplace programs Toke Høiland-Jørgensen
2020-09-11 9:59 ` [PATCH RESEND bpf-next v3 1/9] bpf: change logging calls from verbose() to bpf_log() and use log pointer Toke Høiland-Jørgensen
2020-09-11 9:59 ` [PATCH RESEND bpf-next v3 2/9] bpf: verifier: refactor check_attach_btf_id() Toke Høiland-Jørgensen
2020-09-11 19:13 ` Andrii Nakryiko
2020-09-11 19:50 ` Toke Høiland-Jørgensen
2020-09-11 9:59 ` [PATCH RESEND bpf-next v3 3/9] bpf: wrap prog->aux->linked_prog in a bpf_tracing_link Toke Høiland-Jørgensen
2020-09-11 19:57 ` Andrii Nakryiko
2020-09-11 20:56 ` Toke Høiland-Jørgensen [this message]
2020-09-11 9:59 ` [PATCH RESEND bpf-next v3 4/9] bpf: support attaching freplace programs to multiple attach points Toke Høiland-Jørgensen
2020-09-11 21:10 ` Andrii Nakryiko
2020-09-14 16:08 ` Toke Høiland-Jørgensen
2020-09-14 23:10 ` Andrii Nakryiko
2020-09-15 11:35 ` Toke Høiland-Jørgensen
2020-09-11 9:59 ` [PATCH RESEND bpf-next v3 5/9] bpf: Fix context type resolving for extension programs Toke Høiland-Jørgensen
2020-09-11 9:59 ` [PATCH RESEND bpf-next v3 6/9] tools: add new members to bpf_attr.raw_tracepoint in bpf.h Toke Høiland-Jørgensen
2020-09-11 21:16 ` Andrii Nakryiko
2020-09-11 9:59 ` [PATCH RESEND bpf-next v3 7/9] libbpf: add support for supplying target to bpf_raw_tracepoint_open() Toke Høiland-Jørgensen
2020-09-11 21:21 ` Andrii Nakryiko
2020-09-11 9:59 ` [PATCH RESEND bpf-next v3 8/9] selftests: add test for multiple attachments of freplace program Toke Høiland-Jørgensen
2020-09-11 9:59 ` [PATCH RESEND bpf-next v3 9/9] selftests/bpf: Adding test for arg dereference in extension trace Toke Høiland-Jørgensen
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=875z8k9gnc.fsf@toke.dk \
--to=toke@redhat.com \
--cc=andrii.nakryiko@gmail.com \
--cc=andriin@fb.com \
--cc=ast@kernel.org \
--cc=bpf@vger.kernel.org \
--cc=daniel@iogearbox.net \
--cc=echaudro@redhat.com \
--cc=john.fastabend@gmail.com \
--cc=jolsa@redhat.com \
--cc=kafai@fb.com \
--cc=kpsingh@chromium.org \
--cc=netdev@vger.kernel.org \
--cc=songliubraving@fb.com \
--cc=yhs@fb.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.