From: Martin KaFai Lau <martin.lau@linux.dev>
To: Amery Hung <amery.hung@bytedance.com>
Cc: bpf@vger.kernel.org, netdev@vger.kernel.org,
daniel@iogearbox.net, andrii@kernel.org,
alexei.starovoitov@gmail.com, martin.lau@kernel.org,
sinquersw@gmail.com, toke@redhat.com, jhs@mojatatu.com,
jiri@resnulli.us, stfomichev@gmail.com,
ekarani.silvestre@ccc.ufcg.edu.br, yangpeihao@sjtu.edu.cn,
xiyou.wangcong@gmail.com, yepeilin.cs@gmail.com,
ameryhung@gmail.com
Subject: Re: [PATCH bpf-next v1 06/13] bpf: net_sched: Add basic bpf qdisc kfuncs
Date: Wed, 18 Dec 2024 23:37:31 -0800 [thread overview]
Message-ID: <fd856afb-7ff5-4928-8ba1-22e68c0913e7@linux.dev> (raw)
In-Reply-To: <20241213232958.2388301-7-amery.hung@bytedance.com>
On 12/13/24 3:29 PM, Amery Hung wrote:
> Add basic kfuncs for working on skb in qdisc.
>
> Both bpf_qdisc_skb_drop() and bpf_kfree_skb() can be used to release
> a reference to an skb. However, bpf_qdisc_skb_drop() can only be called
> in .enqueue where a to_free skb list is available from kernel to defer
> the release. bpf_kfree_skb() should be used elsewhere. It is also used
> in bpf_obj_free_fields() when cleaning up skb in maps and collections.
>
> bpf_skb_get_hash() returns the flow hash of an skb, which can be used
> to build flow-based queueing algorithms.
>
> Finally, allow users to create read-only dynptr via bpf_dynptr_from_skb().
>
> Signed-off-by: Amery Hung <amery.hung@bytedance.com>
> ---
> net/sched/bpf_qdisc.c | 77 ++++++++++++++++++++++++++++++++++++++++++-
> 1 file changed, 76 insertions(+), 1 deletion(-)
>
> diff --git a/net/sched/bpf_qdisc.c b/net/sched/bpf_qdisc.c
> index a2e2db29e5fc..28959424eab0 100644
> --- a/net/sched/bpf_qdisc.c
> +++ b/net/sched/bpf_qdisc.c
> @@ -106,6 +106,67 @@ static int bpf_qdisc_btf_struct_access(struct bpf_verifier_log *log,
> return 0;
> }
>
> +__bpf_kfunc_start_defs();
> +
> +/* bpf_skb_get_hash - Get the flow hash of an skb.
> + * @skb: The skb to get the flow hash from.
> + */
> +__bpf_kfunc u32 bpf_skb_get_hash(struct sk_buff *skb)
> +{
> + return skb_get_hash(skb);
> +}
> +
> +/* bpf_kfree_skb - Release an skb's reference and drop it immediately.
> + * @skb: The skb whose reference to be released and dropped.
> + */
> +__bpf_kfunc void bpf_kfree_skb(struct sk_buff *skb)
> +{
> + kfree_skb(skb);
> +}
> +
> +/* bpf_qdisc_skb_drop - Drop an skb by adding it to a deferred free list.
> + * @skb: The skb whose reference to be released and dropped.
> + * @to_free_list: The list of skbs to be dropped.
> + */
> +__bpf_kfunc void bpf_qdisc_skb_drop(struct sk_buff *skb,
> + struct bpf_sk_buff_ptr *to_free_list)
> +{
> + __qdisc_drop(skb, (struct sk_buff **)to_free_list);
> +}
> +
> +__bpf_kfunc_end_defs();
> +
> +#define BPF_QDISC_KFUNC_xxx \
> + BPF_QDISC_KFUNC(bpf_skb_get_hash, KF_TRUSTED_ARGS) \
> + BPF_QDISC_KFUNC(bpf_kfree_skb, KF_RELEASE) \
> + BPF_QDISC_KFUNC(bpf_qdisc_skb_drop, KF_RELEASE) \
> +
> +BTF_KFUNCS_START(bpf_qdisc_kfunc_ids)
> +#define BPF_QDISC_KFUNC(name, flag) BTF_ID_FLAGS(func, name, flag)
> +BPF_QDISC_KFUNC_xxx
> +#undef BPF_QDISC_KFUNC
> +BTF_ID_FLAGS(func, bpf_dynptr_from_skb, KF_TRUSTED_ARGS)
> +BTF_KFUNCS_END(bpf_qdisc_kfunc_ids)
> +
> +#define BPF_QDISC_KFUNC(name, _) BTF_ID_LIST_SINGLE(name##_ids, func, name)
> +BPF_QDISC_KFUNC_xxx
> +#undef BPF_QDISC_KFUNC
> +
> +static int bpf_qdisc_kfunc_filter(const struct bpf_prog *prog, u32 kfunc_id)
> +{
> + if (kfunc_id == bpf_qdisc_skb_drop_ids[0])
> + if (strcmp(prog->aux->attach_func_name, "enqueue"))
The kfunc is registered for all BPF_PROG_TYPE_STRUCT_OPS. Checking func_name
alone is not enough, e.g. another future struct_ops may have the "enqueue" ops.
Checking the btf type of "struct Qdisc_ops" is better. Something like the
following (untested):
diff --git i/include/linux/bpf.h w/include/linux/bpf.h
index c81ac98db439..cf3133f81e7f 100644
--- i/include/linux/bpf.h
+++ w/include/linux/bpf.h
@@ -1809,6 +1809,7 @@ struct bpf_struct_ops {
void *cfi_stubs;
struct module *owner;
const char *name;
+ const struct btf_type *type;
struct btf_func_model func_models[BPF_STRUCT_OPS_MAX_NR_MEMBERS];
};
diff --git i/kernel/bpf/bpf_struct_ops.c w/kernel/bpf/bpf_struct_ops.c
index d9e0af00580b..5c2ca5a84384 100644
--- i/kernel/bpf/bpf_struct_ops.c
+++ w/kernel/bpf/bpf_struct_ops.c
@@ -432,6 +432,8 @@ int bpf_struct_ops_desc_init(struct bpf_struct_ops_desc
*st_ops_desc,
goto errout;
}
+ st_ops->type = t;
+
return 0;
errout:
diff --git i/net/sched/bpf_qdisc.c w/net/sched/bpf_qdisc.c
index 1caa9f696d2d..94e45ea59fef 100644
--- i/net/sched/bpf_qdisc.c
+++ w/net/sched/bpf_qdisc.c
@@ -250,6 +250,11 @@ BPF_QDISC_KFUNC_xxx
static int bpf_qdisc_kfunc_filter(const struct bpf_prog *prog, u32 kfunc_id)
{
+
+ if (bpf_Qdisc_ops.type != btf_type_by_id(prog->aux->attach_btf,
+ prog->aux->attach_btf_id))
+ return -EACCES;
+
if (kfunc_id == bpf_qdisc_skb_drop_ids[0]) {
if (strcmp(prog->aux->attach_func_name, "enqueue"))
return -EACCES;
st_ops->type (and a few others) was refactored to bpf_struct_ops_desc when
adding the kernel module support. I think adding st_ops->type back should be enough.
Also, a bike shedding here, from looking at patch 7 and patch 8 which limit a
set of kfuncs to a particular ops. I think using btf_id_set_contains() is more
inline to other verifier usages.
BTF_SET_START(qdisc_enqueue_kfunc_set)
BTF_ID(func, bpf_qdisc_skb_drop)
BTF_ID(func, bpf_qdisc_watchdog_schedule)
BTF_SET_END(qdisc_enqueue_kfunc_set)
BTF_SET_START(qdisc_dequeue_kfunc_set)
BTF_ID(func, bpf_qdisc_bstats_update)
BTF_ID(func, bpf_qdisc_watchdog_schedule)
BTF_SET_END(qdisc_dequeue_kfunc_set)
BTF_SET_START(qdisc_common_kfunc_set)
BTF_ID(func, bpf_skb_get_hash)
BTF_ID(func, bpf_kfree_skb)
BTF_SET_END(qdisc_common_kfunc_set)
> + return -EACCES;
> +
> + return 0;
> +}
> +
> +static const struct btf_kfunc_id_set bpf_qdisc_kfunc_set = {
> + .owner = THIS_MODULE,
> + .set = &bpf_qdisc_kfunc_ids,
> + .filter = bpf_qdisc_kfunc_filter,
> +};
> +
> static const struct bpf_verifier_ops bpf_qdisc_verifier_ops = {
> .get_func_proto = bpf_qdisc_get_func_proto,
> .is_valid_access = bpf_qdisc_is_valid_access,
> @@ -209,6 +270,20 @@ static struct bpf_struct_ops bpf_Qdisc_ops = {
>
> static int __init bpf_qdisc_kfunc_init(void)
> {
> - return register_bpf_struct_ops(&bpf_Qdisc_ops, Qdisc_ops);
> + int ret;
> + const struct btf_id_dtor_kfunc skb_kfunc_dtors[] = {
> + {
> + .btf_id = bpf_sk_buff_ids[0],
> + .kfunc_btf_id = bpf_kfree_skb_ids[0]
> + },
> + };
> +
> + ret = register_btf_kfunc_id_set(BPF_PROG_TYPE_STRUCT_OPS, &bpf_qdisc_kfunc_set);
> + ret = ret ?: register_btf_id_dtor_kfuncs(skb_kfunc_dtors,
> + ARRAY_SIZE(skb_kfunc_dtors),
> + THIS_MODULE);
> + ret = ret ?: register_bpf_struct_ops(&bpf_Qdisc_ops, Qdisc_ops);
> +
> + return ret;
> }
> late_initcall(bpf_qdisc_kfunc_init);
next prev parent reply other threads:[~2024-12-19 7:37 UTC|newest]
Thread overview: 35+ messages / expand[flat|nested] mbox.gz Atom feed top
2024-12-13 23:29 [PATCH bpf-next v1 00/13] bpf qdisc Amery Hung
2024-12-13 23:29 ` [PATCH bpf-next v1 01/13] bpf: Support getting referenced kptr from struct_ops argument Amery Hung
2024-12-18 0:58 ` Martin KaFai Lau
2024-12-18 1:24 ` Alexei Starovoitov
2024-12-18 16:09 ` Amery Hung
2024-12-18 17:20 ` Alexei Starovoitov
2024-12-18 1:44 ` Jakub Kicinski
2024-12-18 16:57 ` Amery Hung
2024-12-19 23:06 ` Martin KaFai Lau
2024-12-13 23:29 ` [PATCH bpf-next v1 02/13] selftests/bpf: Test referenced kptr arguments of struct_ops programs Amery Hung
2024-12-18 1:17 ` Martin KaFai Lau
2024-12-18 16:10 ` Amery Hung
2024-12-19 3:40 ` Yonghong Song
2024-12-19 20:49 ` Amery Hung
2024-12-13 23:29 ` [PATCH bpf-next v1 03/13] bpf: Allow struct_ops prog to return referenced kptr Amery Hung
2024-12-18 22:29 ` Martin KaFai Lau
2024-12-13 23:29 ` [PATCH bpf-next v1 04/13] selftests/bpf: Test returning referenced kptr from struct_ops programs Amery Hung
2024-12-13 23:29 ` [PATCH bpf-next v1 05/13] bpf: net_sched: Support implementation of Qdisc_ops in bpf Amery Hung
2024-12-14 4:51 ` Cong Wang
2024-12-18 23:37 ` Martin KaFai Lau
2024-12-13 23:29 ` [PATCH bpf-next v1 06/13] bpf: net_sched: Add basic bpf qdisc kfuncs Amery Hung
2024-12-18 17:11 ` Amery Hung
2024-12-19 7:37 ` Martin KaFai Lau [this message]
2024-12-20 0:32 ` Amery Hung
2024-12-13 23:29 ` [PATCH bpf-next v1 07/13] bpf: net_sched: Add a qdisc watchdog timer Amery Hung
2024-12-19 1:16 ` Martin KaFai Lau
2024-12-20 19:24 ` Amery Hung
2024-12-13 23:29 ` [PATCH bpf-next v1 08/13] bpf: net_sched: Support updating bstats Amery Hung
2024-12-13 23:29 ` [PATCH bpf-next v1 09/13] bpf: net_sched: Support updating qstats Amery Hung
2024-12-13 23:29 ` [PATCH bpf-next v1 10/13] bpf: net_sched: Allow writing to more Qdisc members Amery Hung
2024-12-13 23:29 ` [PATCH bpf-next v1 11/13] libbpf: Support creating and destroying qdisc Amery Hung
2024-12-17 18:32 ` Andrii Nakryiko
2024-12-17 19:08 ` Amery Hung
2024-12-13 23:29 ` [PATCH bpf-next v1 12/13] selftests: Add a basic fifo qdisc test Amery Hung
2024-12-13 23:29 ` [PATCH bpf-next v1 13/13] selftests: Add a bpf fq qdisc to selftest Amery Hung
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=fd856afb-7ff5-4928-8ba1-22e68c0913e7@linux.dev \
--to=martin.lau@linux.dev \
--cc=alexei.starovoitov@gmail.com \
--cc=amery.hung@bytedance.com \
--cc=ameryhung@gmail.com \
--cc=andrii@kernel.org \
--cc=bpf@vger.kernel.org \
--cc=daniel@iogearbox.net \
--cc=ekarani.silvestre@ccc.ufcg.edu.br \
--cc=jhs@mojatatu.com \
--cc=jiri@resnulli.us \
--cc=martin.lau@kernel.org \
--cc=netdev@vger.kernel.org \
--cc=sinquersw@gmail.com \
--cc=stfomichev@gmail.com \
--cc=toke@redhat.com \
--cc=xiyou.wangcong@gmail.com \
--cc=yangpeihao@sjtu.edu.cn \
--cc=yepeilin.cs@gmail.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.