netdev.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Martin KaFai Lau <martin.lau@linux.dev>
To: Amery Hung <ameryhung@gmail.com>
Cc: bpf@vger.kernel.org, netdev@vger.kernel.org,
	yangpeihao@sjtu.edu.cn, daniel@iogearbox.net, andrii@kernel.org,
	alexei.starovoitov@gmail.com, martin.lau@kernel.org,
	sinquersw@gmail.com, toke@redhat.com, jhs@mojatatu.com,
	jiri@resnulli.us, sdf@google.com, xiyou.wangcong@gmail.com,
	yepeilin.cs@gmail.com
Subject: Re: [RFC PATCH v9 07/11] bpf: net_sched: Allow more optional operators in Qdisc_ops
Date: Thu, 25 Jul 2024 18:15:37 -0700	[thread overview]
Message-ID: <f3bfe9a5-40e8-4a1c-a5e5-0f7f24b9e395@linux.dev> (raw)
In-Reply-To: <20240714175130.4051012-8-amery.hung@bytedance.com>

On 7/14/24 10:51 AM, Amery Hung wrote:
> So far, init, reset, and destroy are implemented by bpf qdisc infra as
> fixed operators that manipulate the watchdog according to the occasion.
> This patch allows users to implement these three operators to perform
> desired work alongside the predefined ones.
> 
> Signed-off-by: Amery Hung <amery.hung@bytedance.com>
> ---
>   include/net/sch_generic.h |  6 ++++++
>   net/sched/bpf_qdisc.c     | 20 ++++----------------
>   net/sched/sch_api.c       | 11 +++++++++++
>   net/sched/sch_generic.c   |  8 ++++++++
>   4 files changed, 29 insertions(+), 16 deletions(-)
> 
> diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
> index 214ed2e34faa..3041782b7527 100644
> --- a/include/net/sch_generic.h
> +++ b/include/net/sch_generic.h
> @@ -1359,4 +1359,10 @@ static inline void qdisc_synchronize(const struct Qdisc *q)
>   		msleep(1);
>   }
>   
> +#if defined(CONFIG_BPF_SYSCALL) && defined(CONFIG_BPF_JIT)
> +int bpf_qdisc_init_pre_op(struct Qdisc *sch, struct nlattr *opt, struct netlink_ext_ack *extack);
> +void bpf_qdisc_destroy_post_op(struct Qdisc *sch);
> +void bpf_qdisc_reset_post_op(struct Qdisc *sch);
> +#endif
> +
>   #endif
> diff --git a/net/sched/bpf_qdisc.c b/net/sched/bpf_qdisc.c
> index eff7559aa346..903b4eb54510 100644
> --- a/net/sched/bpf_qdisc.c
> +++ b/net/sched/bpf_qdisc.c
> @@ -9,9 +9,6 @@
>   static struct bpf_struct_ops bpf_Qdisc_ops;
>   
>   static u32 unsupported_ops[] = {
> -	offsetof(struct Qdisc_ops, init),
> -	offsetof(struct Qdisc_ops, reset),
> -	offsetof(struct Qdisc_ops, destroy),
>   	offsetof(struct Qdisc_ops, change),
>   	offsetof(struct Qdisc_ops, attach),
>   	offsetof(struct Qdisc_ops, change_real_num_tx),
> @@ -36,8 +33,8 @@ static int bpf_qdisc_init(struct btf *btf)
>   	return 0;
>   }
>   
> -static int bpf_qdisc_init_op(struct Qdisc *sch, struct nlattr *opt,
> -			     struct netlink_ext_ack *extack)
> +int bpf_qdisc_init_pre_op(struct Qdisc *sch, struct nlattr *opt,
> +			  struct netlink_ext_ack *extack)
>   {
>   	struct bpf_sched_data *q = qdisc_priv(sch);
>   
> @@ -45,14 +42,14 @@ static int bpf_qdisc_init_op(struct Qdisc *sch, struct nlattr *opt,
>   	return 0;
>   }
>   
> -static void bpf_qdisc_reset_op(struct Qdisc *sch)
> +void bpf_qdisc_reset_post_op(struct Qdisc *sch)
>   {
>   	struct bpf_sched_data *q = qdisc_priv(sch);
>   
>   	qdisc_watchdog_cancel(&q->watchdog);
>   }
>   
> -static void bpf_qdisc_destroy_op(struct Qdisc *sch)
> +void bpf_qdisc_destroy_post_op(struct Qdisc *sch)

The reset_post_ops and destroy_post_op are identical. They only do 
qdisc_watchdog_cancel().

>   {
>   	struct bpf_sched_data *q = qdisc_priv(sch);
>   
> @@ -235,15 +232,6 @@ static int bpf_qdisc_init_member(const struct btf_type *t,
>   			return -EINVAL;
>   		qdisc_ops->static_flags = TCQ_F_BPF;
>   		return 1;
> -	case offsetof(struct Qdisc_ops, init):
> -		qdisc_ops->init = bpf_qdisc_init_op;
> -		return 1;
> -	case offsetof(struct Qdisc_ops, reset):
> -		qdisc_ops->reset = bpf_qdisc_reset_op;
> -		return 1;
> -	case offsetof(struct Qdisc_ops, destroy):
> -		qdisc_ops->destroy = bpf_qdisc_destroy_op;
> -		return 1;
>   	case offsetof(struct Qdisc_ops, peek):
>   		if (!uqdisc_ops->peek)
>   			qdisc_ops->peek = qdisc_peek_dequeued;
> diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
> index 5064b6d2d1ec..9fb9375e2793 100644
> --- a/net/sched/sch_api.c
> +++ b/net/sched/sch_api.c
> @@ -1352,6 +1352,13 @@ static struct Qdisc *qdisc_create(struct net_device *dev,
>   		rcu_assign_pointer(sch->stab, stab);
>   	}
>   
> +#if defined(CONFIG_BPF_SYSCALL) && defined(CONFIG_BPF_JIT)
> +	if (sch->flags & TCQ_F_BPF) {

I can see the reason why this patch is needed. It is a few line changes and they 
are not in the fast path... still weakly not excited about them but I know it 
could be a personal preference.

I think at the very least, instead of adding a new TCQ_F_BPF, let see if the 
"owner == BPF_MODULE_OWNER" test can be reused like how it is done in the 
bpf_try_module_get().


A rough direction I am spinning...

The pre/post is mainly to initialize and cleanup the "struct bpf_sched_data" 
before/after calling the bpf prog.

For the pre (init), there is a ".gen_prologue(...., const struct bpf_prog 
*prog)" in the "bpf_verifier_ops". Take a look at the tc_cls_act_prologue().
It calls a BPF_FUNC_skb_pull_data helper. It potentially can call a kfunc 
bpf_qdisc_watchdog_cancel. However, the gen_prologue is invoked too late in the 
verifier for kfunc calling now. This will need some thoughts and works.

For the post (destroy,reset), there is no "gen_epilogue" now. If 
bpf_qdisc_watchdog_schedule() is not allowed to be called in the ".reset" and 
".destroy" bpf prog. I think it can be changed to pre also? There is a ".filter" 
function in the "struct btf_kfunc_id_set" during the kfunc register.

> +		err = bpf_qdisc_init_pre_op(sch, tca[TCA_OPTIONS], extack);
> +		if (err != 0)
> +			goto err_out4;
> +	}
> +#endif
>   	if (ops->init) {
>   		err = ops->init(sch, tca[TCA_OPTIONS], extack);
>   		if (err != 0)
> @@ -1388,6 +1395,10 @@ static struct Qdisc *qdisc_create(struct net_device *dev,
>   	 */
>   	if (ops->destroy)
>   		ops->destroy(sch);
> +#if defined(CONFIG_BPF_SYSCALL) && defined(CONFIG_BPF_JIT)
> +	if (sch->flags & TCQ_F_BPF)
> +		bpf_qdisc_destroy_post_op(sch);
> +#endif
>   	qdisc_put_stab(rtnl_dereference(sch->stab));
>   err_out3:
>   	lockdep_unregister_key(&sch->root_lock_key);
> diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
> index 76e4a6efd17c..0ac05665c69f 100644
> --- a/net/sched/sch_generic.c
> +++ b/net/sched/sch_generic.c
> @@ -1033,6 +1033,10 @@ void qdisc_reset(struct Qdisc *qdisc)
>   
>   	if (ops->reset)
>   		ops->reset(qdisc);
> +#if defined(CONFIG_BPF_SYSCALL) && defined(CONFIG_BPF_JIT)
> +	if (qdisc->flags & TCQ_F_BPF)
> +		bpf_qdisc_reset_post_op(qdisc);
> +#endif
>   
>   	__skb_queue_purge(&qdisc->gso_skb);
>   	__skb_queue_purge(&qdisc->skb_bad_txq);
> @@ -1076,6 +1080,10 @@ static void __qdisc_destroy(struct Qdisc *qdisc)
>   
>   	if (ops->destroy)
>   		ops->destroy(qdisc);
> +#if defined(CONFIG_BPF_SYSCALL) && defined(CONFIG_BPF_JIT)
> +	if (qdisc->flags & TCQ_F_BPF)
> +		bpf_qdisc_destroy_post_op(qdisc);
> +#endif
>   
>   	lockdep_unregister_key(&qdisc->root_lock_key);
>   	bpf_module_put(ops, ops->owner);


  parent reply	other threads:[~2024-07-26  1:15 UTC|newest]

Thread overview: 42+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2024-07-14 17:51 [RFC PATCH v9 00/11] bpf qdisc Amery Hung
2024-07-14 17:51 ` [RFC PATCH v9 01/11] bpf: Support getting referenced kptr from struct_ops argument Amery Hung
2024-07-24  0:32   ` Martin KaFai Lau
2024-07-24 17:00     ` Amery Hung
2024-07-25  1:28       ` Martin KaFai Lau
2024-07-14 17:51 ` [RFC PATCH v9 02/11] selftests/bpf: Test referenced kptr arguments of struct_ops programs Amery Hung
2024-07-14 17:51 ` [RFC PATCH v9 03/11] bpf: Allow struct_ops prog to return referenced kptr Amery Hung
2024-07-24  5:36   ` Kui-Feng Lee
2024-07-24 18:27     ` Kui-Feng Lee
2024-07-24 20:44     ` Amery Hung
2024-07-26 18:22       ` Kui-Feng Lee
2024-07-26 22:45         ` Amery Hung
2024-07-24 23:57   ` Martin KaFai Lau
2024-07-14 17:51 ` [RFC PATCH v9 04/11] selftests/bpf: Test returning referenced kptr from struct_ops programs Amery Hung
2024-07-14 17:51 ` [RFC PATCH v9 05/11] bpf: net_sched: Support implementation of Qdisc_ops in bpf Amery Hung
2024-07-18  0:00   ` Amery Hung
2024-07-25 21:24   ` Martin KaFai Lau
2024-07-31  4:09     ` Amery Hung
2024-07-14 17:51 ` [RFC PATCH v9 06/11] bpf: net_sched: Add bpf qdisc kfuncs Amery Hung
2024-07-25 22:38   ` Martin KaFai Lau
2024-07-31  4:08     ` Amery Hung
2024-07-14 17:51 ` [RFC PATCH v9 07/11] bpf: net_sched: Allow more optional operators in Qdisc_ops Amery Hung
2024-07-18  0:01   ` Amery Hung
2024-07-26  1:15   ` Martin KaFai Lau [this message]
2024-07-26 18:30     ` Martin KaFai Lau
2024-07-26 22:30     ` Amery Hung
2024-07-30  0:20       ` Martin KaFai Lau
2024-07-14 17:51 ` [RFC PATCH v9 08/11] libbpf: Support creating and destroying qdisc Amery Hung
2024-07-14 17:51 ` [RFC PATCH v9 09/11] selftests: Add a basic fifo qdisc test Amery Hung
2024-07-14 17:51 ` [RFC PATCH v9 10/11] selftests: Add a bpf fq qdisc to selftest Amery Hung
2024-07-19  1:54   ` Martin KaFai Lau
2024-07-19 18:20     ` Amery Hung
2024-07-14 17:51 ` [RFC PATCH v9 11/11] selftests: Add a bpf netem " Amery Hung
2024-07-17 10:13 ` [RFC PATCH v9 00/11] bpf qdisc Donald Hunter
2024-07-17 22:04   ` Amery Hung
2024-07-19 17:21 ` [OFFLIST RFC 1/4] bpf: Search for kptrs in prog BTF structs Amery Hung
2024-07-19 17:21   ` [OFFLIST RFC 2/4] bpf: Rename ARG_PTR_TO_KPTR -> ARG_KPTR_XCHG_DEST Amery Hung
2024-07-19 17:21   ` [OFFLIST RFC 3/4] bpf: Support bpf_kptr_xchg into local kptr Amery Hung
2024-07-23  0:18     ` Alexei Starovoitov
2024-07-24  0:08       ` Amery Hung
2024-07-19 17:21   ` [OFFLIST RFC 4/4] selftests/bpf: Test bpf_kptr_xchg stashing " Amery Hung
2024-07-23  0:19 ` [RFC PATCH v9 00/11] bpf qdisc Alexei Starovoitov

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=f3bfe9a5-40e8-4a1c-a5e5-0f7f24b9e395@linux.dev \
    --to=martin.lau@linux.dev \
    --cc=alexei.starovoitov@gmail.com \
    --cc=ameryhung@gmail.com \
    --cc=andrii@kernel.org \
    --cc=bpf@vger.kernel.org \
    --cc=daniel@iogearbox.net \
    --cc=jhs@mojatatu.com \
    --cc=jiri@resnulli.us \
    --cc=martin.lau@kernel.org \
    --cc=netdev@vger.kernel.org \
    --cc=sdf@google.com \
    --cc=sinquersw@gmail.com \
    --cc=toke@redhat.com \
    --cc=xiyou.wangcong@gmail.com \
    --cc=yangpeihao@sjtu.edu.cn \
    --cc=yepeilin.cs@gmail.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).