From: Martin KaFai Lau <martin.lau@linux.dev>
To: Eyal Birger <eyal.birger@gmail.com>
Cc: netdev@vger.kernel.org, bpf@vger.kernel.org,
linux-kselftest@vger.kernel.org, davem@davemloft.net,
edumazet@google.com, kuba@kernel.org, pabeni@redhat.com,
steffen.klassert@secunet.com, herbert@gondor.apana.org.au,
andrii@kernel.org, daniel@iogearbox.net,
nicolas.dichtel@6wind.com, razor@blackwall.org, mykolal@fb.com,
ast@kernel.org, song@kernel.org, yhs@fb.com,
john.fastabend@gmail.com, kpsingh@kernel.org, sdf@google.com,
haoluo@google.com, jolsa@kernel.org, shuah@kernel.org
Subject: Re: [PATCH ipsec-next 2/3] xfrm: interface: Add unstable helpers for setting/getting XFRM metadata from TC-BPF
Date: Mon, 28 Nov 2022 17:58:23 -0800 [thread overview]
Message-ID: <c8a2d940-ff85-c952-74d0-25ad2c33c1af@linux.dev> (raw)
In-Reply-To: <20221128160501.769892-3-eyal.birger@gmail.com>
On 11/28/22 8:05 AM, Eyal Birger wrote:
> This change adds xfrm metadata helpers using the unstable kfunc call
> interface for the TC-BPF hooks. This allows steering traffic towards
> different IPsec connections based on logic implemented in bpf programs.
>
> This object is built based on the availabilty of BTF debug info.
>
> The metadata percpu dsts used on TX take ownership of the original skb
> dsts so that they may be used as part of the xfrm transmittion logic -
> e.g. for MTU calculations.
A few quick comments and questions:
>
> Signed-off-by: Eyal Birger <eyal.birger@gmail.com>
> ---
> include/net/dst_metadata.h | 1 +
> include/net/xfrm.h | 20 ++++++++
> net/core/dst.c | 4 ++
> net/xfrm/Makefile | 6 +++
> net/xfrm/xfrm_interface_bpf.c | 92 ++++++++++++++++++++++++++++++++++
Please tag for bpf-next
> net/xfrm/xfrm_interface_core.c | 15 ++++++
> 6 files changed, 138 insertions(+)
> create mode 100644 net/xfrm/xfrm_interface_bpf.c
>
> diff --git a/include/net/dst_metadata.h b/include/net/dst_metadata.h
> index a454cf4327fe..1b7fae4c6b24 100644
> --- a/include/net/dst_metadata.h
> +++ b/include/net/dst_metadata.h
> @@ -26,6 +26,7 @@ struct macsec_info {
> struct xfrm_md_info {
> u32 if_id;
> int link;
> + struct dst_entry *dst_orig;
> };
>
> struct metadata_dst {
[ ... ]
> diff --git a/net/core/dst.c b/net/core/dst.c
> index bc9c9be4e080..4c2eb7e56dab 100644
> --- a/net/core/dst.c
> +++ b/net/core/dst.c
> @@ -315,6 +315,8 @@ void metadata_dst_free(struct metadata_dst *md_dst)
> #ifdef CONFIG_DST_CACHE
> if (md_dst->type == METADATA_IP_TUNNEL)
> dst_cache_destroy(&md_dst->u.tun_info.dst_cache);
> + else if (md_dst->type == METADATA_XFRM)
> + dst_release(md_dst->u.xfrm_info.dst_orig);
Why only release dst_orig under CONFIG_DST_CACHE?
> #endif
> kfree(md_dst);
> }
> @@ -348,6 +350,8 @@ void metadata_dst_free_percpu(struct metadata_dst __percpu *md_dst)
>
> if (one_md_dst->type == METADATA_IP_TUNNEL)
> dst_cache_destroy(&one_md_dst->u.tun_info.dst_cache);
> + else if (one_md_dst->type == METADATA_XFRM)
> + dst_release(one_md_dst->u.xfrm_info.dst_orig);
Same here.
[ ... ]
> diff --git a/net/xfrm/xfrm_interface_bpf.c b/net/xfrm/xfrm_interface_bpf.c
> new file mode 100644
> index 000000000000..d3997ab7cc28
> --- /dev/null
> +++ b/net/xfrm/xfrm_interface_bpf.c
> @@ -0,0 +1,92 @@
> +// SPDX-License-Identifier: GPL-2.0-only
> +/* Unstable XFRM Helpers for TC-BPF hook
> + *
> + * These are called from SCHED_CLS BPF programs. Note that it is
> + * allowed to break compatibility for these functions since the interface they
> + * are exposed through to BPF programs is explicitly unstable.
> + */
> +
> +#include <linux/bpf.h>
> +#include <linux/btf_ids.h>
> +
> +#include <net/dst_metadata.h>
> +#include <net/xfrm.h>
> +
> +struct bpf_xfrm_info {
> + u32 if_id;
> + int link;
> +};
> +
> +static struct metadata_dst __percpu *xfrm_md_dst;
> +__diag_push();
> +__diag_ignore_all("-Wmissing-prototypes",
> + "Global functions as their definitions will be in xfrm_interface BTF");
> +
> +__used noinline
> +int bpf_skb_get_xfrm_info(struct __sk_buff *skb_ctx, struct bpf_xfrm_info *to)
> +{
> + struct sk_buff *skb = (struct sk_buff *)skb_ctx;
> + struct xfrm_md_info *info;
> +
> + memset(to, 0, sizeof(*to));
> +
> + info = skb_xfrm_md_info(skb);
> + if (!info)
> + return -EINVAL;
> +
> + to->if_id = info->if_id;
> + to->link = info->link;
> + return 0;
> +}
> +
> +__used noinline
> +int bpf_skb_set_xfrm_info(struct __sk_buff *skb_ctx,
> + const struct bpf_xfrm_info *from)
> +{
> + struct sk_buff *skb = (struct sk_buff *)skb_ctx;
> + struct metadata_dst *md_dst;
> + struct xfrm_md_info *info;
> +
> + if (unlikely(skb_metadata_dst(skb)))
> + return -EINVAL;
> +
> + md_dst = this_cpu_ptr(xfrm_md_dst);
> +
> + info = &md_dst->u.xfrm_info;
> + memset(info, 0, sizeof(*info));
> +
> + info->if_id = from->if_id;
> + info->link = from->link;
> + info->dst_orig = skb_dst(skb);
However, the dst_orig init is not done under CONFIG_DST_CACHE though...
Also, is it possible that skb->_skb_refdst has SKB_DST_NOREF set and later below
... (contd)
> +
> + dst_hold((struct dst_entry *)md_dst);
> + skb_dst_set(skb, (struct dst_entry *)md_dst);
> + return 0;
> +}
> +
> +__diag_pop()
> +
> +BTF_SET8_START(xfrm_ifc_kfunc_set)
> +BTF_ID_FLAGS(func, bpf_skb_get_xfrm_info)
> +BTF_ID_FLAGS(func, bpf_skb_set_xfrm_info)
> +BTF_SET8_END(xfrm_ifc_kfunc_set)
> +
> +static const struct btf_kfunc_id_set xfrm_interface_kfunc_set = {
> + .owner = THIS_MODULE,
> + .set = &xfrm_ifc_kfunc_set,
> +};
> +
> +int __init register_xfrm_interface_bpf(void)
> +{
> + xfrm_md_dst = metadata_dst_alloc_percpu(0, METADATA_XFRM,
> + GFP_KERNEL);
> + if (!xfrm_md_dst)
> + return -ENOMEM;
> + return register_btf_kfunc_id_set(BPF_PROG_TYPE_SCHED_CLS,
> + &xfrm_interface_kfunc_set);
Will cleanup_xfrm_interface_bpf() be called during error ?
> +}
> +
> +void __exit cleanup_xfrm_interface_bpf(void)
> +{
> + metadata_dst_free_percpu(xfrm_md_dst);
> +}
> diff --git a/net/xfrm/xfrm_interface_core.c b/net/xfrm/xfrm_interface_core.c
> index 5a67b120c4db..1e1e8e965939 100644
> --- a/net/xfrm/xfrm_interface_core.c
> +++ b/net/xfrm/xfrm_interface_core.c
> @@ -396,6 +396,14 @@ xfrmi_xmit2(struct sk_buff *skb, struct net_device *dev, struct flowi *fl)
>
> if_id = md_info->if_id;
> fl->flowi_oif = md_info->link;
> + if (md_info->dst_orig) {
> + struct dst_entry *tmp_dst = dst;
> +
> + dst = md_info->dst_orig;
> + skb_dst_set(skb, dst);
(contd) ... skb_dst_set() is always called here. (considering there is
skb_dst_set_noref()).
> + md_info->dst_orig = NULL;
> + dst_release(tmp_dst);
> + }
> } else {
> if_id = xi->p.if_id;
> }
> @@ -1162,12 +1170,18 @@ static int __init xfrmi_init(void)
> if (err < 0)
> goto rtnl_link_failed;
>
> + err = register_xfrm_interface_bpf();
> + if (err < 0)
> + goto kfunc_failed;
> +
> lwtunnel_encap_add_ops(&xfrmi_encap_ops, LWTUNNEL_ENCAP_XFRM);
>
> xfrm_if_register_cb(&xfrm_if_cb);
>
> return err;
>
> +kfunc_failed:
> + rtnl_link_unregister(&xfrmi_link_ops);
> rtnl_link_failed:
> xfrmi6_fini();
> xfrmi6_failed:
> @@ -1183,6 +1197,7 @@ static void __exit xfrmi_fini(void)
> {
> xfrm_if_unregister_cb();
> lwtunnel_encap_del_ops(&xfrmi_encap_ops, LWTUNNEL_ENCAP_XFRM);
> + cleanup_xfrm_interface_bpf();
> rtnl_link_unregister(&xfrmi_link_ops);
> xfrmi4_fini();
> xfrmi6_fini();
next prev parent reply other threads:[~2022-11-29 1:58 UTC|newest]
Thread overview: 11+ messages / expand[flat|nested] mbox.gz Atom feed top
2022-11-28 16:04 [PATCH ipsec-next 0/3] xfrm: interface: Add unstable helpers for XFRM metadata Eyal Birger
2022-11-28 16:04 ` [PATCH ipsec-next 1/3] xfrm: interface: rename xfrm_interface.c to xfrm_interface_core.c Eyal Birger
2022-11-28 16:05 ` [PATCH ipsec-next 2/3] xfrm: interface: Add unstable helpers for setting/getting XFRM metadata from TC-BPF Eyal Birger
2022-11-29 1:58 ` Martin KaFai Lau [this message]
2022-11-29 8:36 ` Eyal Birger
2022-11-29 9:50 ` Steffen Klassert
2022-11-29 16:15 ` Jakub Kicinski
2022-11-30 8:39 ` Steffen Klassert
2022-11-30 19:10 ` Martin KaFai Lau
2022-12-01 7:12 ` Steffen Klassert
2022-11-28 16:05 ` [PATCH ipsec-next 3/3] selftests/bpf: add xfrm_info tests Eyal Birger
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=c8a2d940-ff85-c952-74d0-25ad2c33c1af@linux.dev \
--to=martin.lau@linux.dev \
--cc=andrii@kernel.org \
--cc=ast@kernel.org \
--cc=bpf@vger.kernel.org \
--cc=daniel@iogearbox.net \
--cc=davem@davemloft.net \
--cc=edumazet@google.com \
--cc=eyal.birger@gmail.com \
--cc=haoluo@google.com \
--cc=herbert@gondor.apana.org.au \
--cc=john.fastabend@gmail.com \
--cc=jolsa@kernel.org \
--cc=kpsingh@kernel.org \
--cc=kuba@kernel.org \
--cc=linux-kselftest@vger.kernel.org \
--cc=mykolal@fb.com \
--cc=netdev@vger.kernel.org \
--cc=nicolas.dichtel@6wind.com \
--cc=pabeni@redhat.com \
--cc=razor@blackwall.org \
--cc=sdf@google.com \
--cc=shuah@kernel.org \
--cc=song@kernel.org \
--cc=steffen.klassert@secunet.com \
--cc=yhs@fb.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).