* [PATCH mptcp-next v1 1/4] bpf: Add mptcp path manager struct_ops
2025-03-21 1:49 [PATCH mptcp-next v1 0/4] BPF path manager, part 7 Geliang Tang
@ 2025-03-21 1:49 ` Geliang Tang
2025-03-21 10:59 ` Matthieu Baerts
2025-03-24 10:26 ` Matthieu Baerts
2025-03-21 1:49 ` [PATCH mptcp-next v1 2/4] bpf: Export mptcp path manager kfuncs Geliang Tang
` (4 subsequent siblings)
5 siblings, 2 replies; 15+ messages in thread
From: Geliang Tang @ 2025-03-21 1:49 UTC (permalink / raw)
To: mptcp; +Cc: Geliang Tang
From: Geliang Tang <tanggeliang@kylinos.cn>
This patch implements a new struct bpf_struct_ops for MPTCP BPF path
manager: bpf_mptcp_pm_ops. Register and unregister the bpf path manager
in .reg and .unreg.
Add write access for some fields of struct mptcp_sock and struct
mptcp_pm_addr_entry in .btf_struct_access.
This MPTCP BPF path manager implementation is similar to BPF TCP CC. And
net/ipv4/bpf_tcp_ca.c is a frame of reference for this patch.
Signed-off-by: Geliang Tang <tanggeliang@kylinos.cn>
---
net/mptcp/bpf.c | 259 +++++++++++++++++++++++++++++++++++++++++++++++-
1 file changed, 258 insertions(+), 1 deletion(-)
diff --git a/net/mptcp/bpf.c b/net/mptcp/bpf.c
index 2b0cfb57df8c..596574102b89 100644
--- a/net/mptcp/bpf.c
+++ b/net/mptcp/bpf.c
@@ -17,10 +17,266 @@
#include "protocol.h"
#ifdef CONFIG_BPF_JIT
-static struct bpf_struct_ops bpf_mptcp_sched_ops;
+static struct bpf_struct_ops bpf_mptcp_pm_ops,
+ bpf_mptcp_sched_ops;
static u32 mptcp_sock_id,
+ mptcp_entry_id,
mptcp_subflow_id;
+/* MPTCP BPF path manager */
+
+static const struct bpf_func_proto *
+bpf_mptcp_pm_get_func_proto(enum bpf_func_id func_id,
+ const struct bpf_prog *prog)
+{
+ switch (func_id) {
+ case BPF_FUNC_sk_storage_get:
+ return &bpf_sk_storage_get_proto;
+ case BPF_FUNC_sk_storage_delete:
+ return &bpf_sk_storage_delete_proto;
+ default:
+ return bpf_base_func_proto(func_id, prog);
+ }
+}
+
+static int bpf_mptcp_pm_btf_struct_access(struct bpf_verifier_log *log,
+ const struct bpf_reg_state *reg,
+ int off, int size)
+{
+ u32 id = reg->btf_id;
+ size_t end;
+
+ if (id == mptcp_sock_id) {
+ switch (off) {
+ case offsetof(struct mptcp_sock, pm.remote.id):
+ end = offsetofend(struct mptcp_sock, pm.remote.id);
+ break;
+ case offsetof(struct mptcp_sock, pm.remote.family):
+ end = offsetofend(struct mptcp_sock, pm.remote.family);
+ break;
+ case offsetof(struct mptcp_sock, pm.remote.port):
+ end = offsetofend(struct mptcp_sock, pm.remote.port);
+ break;
+#if IS_ENABLED(CONFIG_MPTCP_IPV6)
+ case offsetof(struct mptcp_sock, pm.remote.addr6.s6_addr32[0]):
+ end = offsetofend(struct mptcp_sock, pm.remote.addr6.s6_addr32[0]);
+ break;
+ case offsetof(struct mptcp_sock, pm.remote.addr6.s6_addr32[1]):
+ end = offsetofend(struct mptcp_sock, pm.remote.addr6.s6_addr32[1]);
+ break;
+ case offsetof(struct mptcp_sock, pm.remote.addr6.s6_addr32[2]):
+ end = offsetofend(struct mptcp_sock, pm.remote.addr6.s6_addr32[2]);
+ break;
+ case offsetof(struct mptcp_sock, pm.remote.addr6.s6_addr32[3]):
+ end = offsetofend(struct mptcp_sock, pm.remote.addr6.s6_addr32[3]);
+ break;
+#else
+ case offsetof(struct mptcp_sock, pm.remote.addr.s_addr):
+ end = offsetofend(struct mptcp_sock, pm.remote.addr.s_addr);
+ break;
+#endif
+ case offsetof(struct mptcp_sock, pm.work_pending):
+ end = offsetofend(struct mptcp_sock, pm.work_pending);
+ break;
+ case offsetof(struct mptcp_sock, pm.accept_addr):
+ end = offsetofend(struct mptcp_sock, pm.accept_addr);
+ break;
+ case offsetof(struct mptcp_sock, pm.accept_subflow):
+ end = offsetofend(struct mptcp_sock, pm.accept_subflow);
+ break;
+ case offsetof(struct mptcp_sock, pm.add_addr_signaled):
+ end = offsetofend(struct mptcp_sock, pm.add_addr_signaled);
+ break;
+ case offsetof(struct mptcp_sock, pm.local_addr_used):
+ end = offsetofend(struct mptcp_sock, pm.local_addr_used);
+ break;
+ case offsetof(struct mptcp_sock, pm.subflows):
+ end = offsetofend(struct mptcp_sock, pm.subflows);
+ break;
+ default:
+ bpf_log(log, "no write support to mptcp_sock at off %d\n",
+ off);
+ return -EACCES;
+ }
+ } else if (id == mptcp_entry_id) {
+ switch (off) {
+ case offsetof(struct mptcp_pm_addr_entry, addr.id):
+ end = offsetofend(struct mptcp_pm_addr_entry, addr.id);
+ break;
+ case offsetof(struct mptcp_pm_addr_entry, addr.port):
+ end = offsetofend(struct mptcp_pm_addr_entry, addr.port);
+ break;
+ default:
+ bpf_log(log, "no write support to mptcp_pm_addr_entry at off %d\n",
+ off);
+ return -EACCES;
+ }
+ } else {
+ bpf_log(log, "only access to mptcp sock or addr or entry is supported\n");
+ return -EACCES;
+ }
+
+ if (off + size > end) {
+ bpf_log(log, "access beyond %s at off %u size %u ended at %zu",
+ id == mptcp_sock_id ? "mptcp_sock" :
+ (id == mptcp_entry_id ? "mptcp_pm_addr_entry" : "mptcp_addr_info"),
+ off, size, end);
+ return -EACCES;
+ }
+
+ return NOT_INIT;
+}
+
+static const struct bpf_verifier_ops bpf_mptcp_pm_verifier_ops = {
+ .get_func_proto = bpf_mptcp_pm_get_func_proto,
+ .is_valid_access = bpf_tracing_btf_ctx_access,
+ .btf_struct_access = bpf_mptcp_pm_btf_struct_access,
+};
+
+static int bpf_mptcp_pm_reg(void *kdata, struct bpf_link *link)
+{
+ return mptcp_pm_register(kdata);
+}
+
+static void bpf_mptcp_pm_unreg(void *kdata, struct bpf_link *link)
+{
+ mptcp_pm_unregister(kdata);
+}
+
+static int bpf_mptcp_pm_check_member(const struct btf_type *t,
+ const struct btf_member *member,
+ const struct bpf_prog *prog)
+{
+ return 0;
+}
+
+static int bpf_mptcp_pm_init_member(const struct btf_type *t,
+ const struct btf_member *member,
+ void *kdata, const void *udata)
+{
+ const struct mptcp_pm_ops *upm;
+ struct mptcp_pm_ops *pm;
+ u32 moff;
+
+ upm = (const struct mptcp_pm_ops *)udata;
+ pm = (struct mptcp_pm_ops *)kdata;
+
+ moff = __btf_member_bit_offset(t, member) / 8;
+ switch (moff) {
+ case offsetof(struct mptcp_pm_ops, name):
+ if (bpf_obj_name_cpy(pm->name, upm->name,
+ sizeof(pm->name)) <= 0)
+ return -EINVAL;
+ return 1;
+ }
+
+ return 0;
+}
+
+static int bpf_mptcp_pm_init(struct btf *btf)
+{
+ s32 type_id;
+
+ type_id = btf_find_by_name_kind(btf, "mptcp_sock",
+ BTF_KIND_STRUCT);
+ if (type_id < 0)
+ return -EINVAL;
+ mptcp_sock_id = type_id;
+
+ type_id = btf_find_by_name_kind(btf, "mptcp_pm_addr_entry",
+ BTF_KIND_STRUCT);
+ if (type_id < 0)
+ return -EINVAL;
+ mptcp_entry_id = type_id;
+
+ return 0;
+}
+
+static int bpf_mptcp_pm_validate(void *kdata)
+{
+ return mptcp_pm_validate(kdata);
+}
+
+static int __bpf_mptcp_pm_get_local_id(struct mptcp_sock *msk,
+ struct mptcp_pm_addr_entry *skc)
+{
+ return 0;
+}
+
+static bool __bpf_mptcp_pm_get_priority(struct mptcp_sock *msk,
+ struct mptcp_addr_info *skc)
+{
+ return false;
+}
+
+static void __bpf_mptcp_pm_established(struct mptcp_sock *msk)
+{
+}
+
+static void __bpf_mptcp_pm_subflow_established(struct mptcp_sock *msk)
+{
+}
+
+static bool __bpf_mptcp_pm_allow_new_subflow(struct mptcp_sock *msk)
+{
+ return false;
+}
+
+static bool __bpf_mptcp_pm_accept_new_subflow(const struct mptcp_sock *msk)
+{
+ return false;
+}
+
+static bool __bpf_mptcp_pm_add_addr_echo(struct mptcp_sock *msk,
+ const struct mptcp_addr_info *addr)
+{
+ return false;
+}
+
+static int __bpf_mptcp_pm_add_addr_received(struct mptcp_sock *msk,
+ const struct mptcp_addr_info *addr)
+{
+ return 0;
+}
+
+static void __bpf_mptcp_pm_rm_addr_received(struct mptcp_sock *msk)
+{
+}
+
+static void __bpf_mptcp_pm_init(struct mptcp_sock *msk)
+{
+}
+
+static void __bpf_mptcp_pm_release(struct mptcp_sock *msk)
+{
+}
+
+static struct mptcp_pm_ops __bpf_mptcp_pm_ops = {
+ .get_local_id = __bpf_mptcp_pm_get_local_id,
+ .get_priority = __bpf_mptcp_pm_get_priority,
+ .established = __bpf_mptcp_pm_established,
+ .subflow_established = __bpf_mptcp_pm_subflow_established,
+ .allow_new_subflow = __bpf_mptcp_pm_allow_new_subflow,
+ .accept_new_subflow = __bpf_mptcp_pm_accept_new_subflow,
+ .add_addr_echo = __bpf_mptcp_pm_add_addr_echo,
+ .add_addr_received = __bpf_mptcp_pm_add_addr_received,
+ .rm_addr_received = __bpf_mptcp_pm_rm_addr_received,
+ .init = __bpf_mptcp_pm_init,
+ .release = __bpf_mptcp_pm_release,
+};
+
+static struct bpf_struct_ops bpf_mptcp_pm_ops = {
+ .verifier_ops = &bpf_mptcp_pm_verifier_ops,
+ .reg = bpf_mptcp_pm_reg,
+ .unreg = bpf_mptcp_pm_unreg,
+ .check_member = bpf_mptcp_pm_check_member,
+ .init_member = bpf_mptcp_pm_init_member,
+ .init = bpf_mptcp_pm_init,
+ .validate = bpf_mptcp_pm_validate,
+ .name = "mptcp_pm_ops",
+ .cfi_stubs = &__bpf_mptcp_pm_ops,
+};
+
/* MPTCP BPF packet scheduler */
static const struct bpf_func_proto *
@@ -332,6 +588,7 @@ static int __init bpf_mptcp_kfunc_init(void)
ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_STRUCT_OPS,
&bpf_mptcp_common_kfunc_set);
#ifdef CONFIG_BPF_JIT
+ ret = ret ?: register_bpf_struct_ops(&bpf_mptcp_pm_ops, mptcp_pm_ops);
ret = ret ?: register_bpf_struct_ops(&bpf_mptcp_sched_ops, mptcp_sched_ops);
#endif
--
2.43.0
^ permalink raw reply related [flat|nested] 15+ messages in thread* Re: [PATCH mptcp-next v1 1/4] bpf: Add mptcp path manager struct_ops
2025-03-21 1:49 ` [PATCH mptcp-next v1 1/4] bpf: Add mptcp path manager struct_ops Geliang Tang
@ 2025-03-21 10:59 ` Matthieu Baerts
2025-03-24 10:26 ` Matthieu Baerts
1 sibling, 0 replies; 15+ messages in thread
From: Matthieu Baerts @ 2025-03-21 10:59 UTC (permalink / raw)
To: Geliang Tang, mptcp; +Cc: Geliang Tang
Hi Geliang,
On 21/03/2025 02:49, Geliang Tang wrote:
> From: Geliang Tang <tanggeliang@kylinos.cn>
>
> This patch implements a new struct bpf_struct_ops for MPTCP BPF path
> manager: bpf_mptcp_pm_ops. Register and unregister the bpf path manager
> in .reg and .unreg.
>
> Add write access for some fields of struct mptcp_sock and struct
> mptcp_pm_addr_entry in .btf_struct_access.
>
> This MPTCP BPF path manager implementation is similar to BPF TCP CC. And
> net/ipv4/bpf_tcp_ca.c is a frame of reference for this patch.
(...)
> +static int bpf_mptcp_pm_btf_struct_access(struct bpf_verifier_log *log,
> + const struct bpf_reg_state *reg,
> + int off, int size)
I don't know how it works exactly, but with BPF, can we not force a
program to automatically take a lock (pm->lock) when trying to modify
any of the fields below?
Also, is there really a need for a BPF PM to modify any of these fields
directly?
Are most of them handled either by pm.c before calling a callback or are
specific to the in-kernel PM?
(...)
> +static struct mptcp_pm_ops __bpf_mptcp_pm_ops = {
> + .get_local_id = __bpf_mptcp_pm_get_local_id,
> + .get_priority = __bpf_mptcp_pm_get_priority,
> + .established = __bpf_mptcp_pm_established,
> + .subflow_established = __bpf_mptcp_pm_subflow_established,
> + .allow_new_subflow = __bpf_mptcp_pm_allow_new_subflow,
> + .accept_new_subflow = __bpf_mptcp_pm_accept_new_subflow,
There is a mix of spaces and tabs here above. Only use tabs?
> + .add_addr_echo = __bpf_mptcp_pm_add_addr_echo,
> + .add_addr_received = __bpf_mptcp_pm_add_addr_received,
> + .rm_addr_received = __bpf_mptcp_pm_rm_addr_received,
> + .init = __bpf_mptcp_pm_init,
> + .release = __bpf_mptcp_pm_release,
> +};
(...)
Cheers,
Matt
--
Sponsored by the NGI0 Core fund.
^ permalink raw reply [flat|nested] 15+ messages in thread* Re: [PATCH mptcp-next v1 1/4] bpf: Add mptcp path manager struct_ops
2025-03-21 1:49 ` [PATCH mptcp-next v1 1/4] bpf: Add mptcp path manager struct_ops Geliang Tang
2025-03-21 10:59 ` Matthieu Baerts
@ 2025-03-24 10:26 ` Matthieu Baerts
2025-03-24 10:43 ` Geliang Tang
1 sibling, 1 reply; 15+ messages in thread
From: Matthieu Baerts @ 2025-03-24 10:26 UTC (permalink / raw)
To: Geliang Tang, mptcp; +Cc: Geliang Tang
Hi Geliang,
On 21/03/2025 02:49, Geliang Tang wrote:
> From: Geliang Tang <tanggeliang@kylinos.cn>
>
> This patch implements a new struct bpf_struct_ops for MPTCP BPF path
> manager: bpf_mptcp_pm_ops. Register and unregister the bpf path manager
> in .reg and .unreg.
>
> Add write access for some fields of struct mptcp_sock and struct
> mptcp_pm_addr_entry in .btf_struct_access.
>
> This MPTCP BPF path manager implementation is similar to BPF TCP CC. And
> net/ipv4/bpf_tcp_ca.c is a frame of reference for this patch.
>
> Signed-off-by: Geliang Tang <tanggeliang@kylinos.cn>
> ---
> net/mptcp/bpf.c | 259 +++++++++++++++++++++++++++++++++++++++++++++++-
> 1 file changed, 258 insertions(+), 1 deletion(-)
>
> diff --git a/net/mptcp/bpf.c b/net/mptcp/bpf.c
> index 2b0cfb57df8c..596574102b89 100644
> --- a/net/mptcp/bpf.c
> +++ b/net/mptcp/bpf.c
(...)
> +static int __bpf_mptcp_pm_get_local_id(struct mptcp_sock *msk,
> + struct mptcp_pm_addr_entry *skc)
> +{
> + return 0;
> +}
> +
> +static bool __bpf_mptcp_pm_get_priority(struct mptcp_sock *msk,
> + struct mptcp_addr_info *skc)
> +{
> + return false;
> +}
> +
> +static void __bpf_mptcp_pm_established(struct mptcp_sock *msk)
> +{
> +}
> +
> +static void __bpf_mptcp_pm_subflow_established(struct mptcp_sock *msk)
> +{
> +}
> +
> +static bool __bpf_mptcp_pm_allow_new_subflow(struct mptcp_sock *msk)
> +{
> + return false;
> +}
> +
> +static bool __bpf_mptcp_pm_accept_new_subflow(const struct mptcp_sock *msk)
> +{
> + return false;
> +}
> +
> +static bool __bpf_mptcp_pm_add_addr_echo(struct mptcp_sock *msk,
> + const struct mptcp_addr_info *addr)
> +{
> + return false;
> +}
> +
> +static int __bpf_mptcp_pm_add_addr_received(struct mptcp_sock *msk,
> + const struct mptcp_addr_info *addr)
> +{
> + return 0;
> +}
> +
> +static void __bpf_mptcp_pm_rm_addr_received(struct mptcp_sock *msk)
> +{
> +}
> +
> +static void __bpf_mptcp_pm_init(struct mptcp_sock *msk)
> +{
> +}
> +
> +static void __bpf_mptcp_pm_release(struct mptcp_sock *msk)
> +{
> +}
> +
> +static struct mptcp_pm_ops __bpf_mptcp_pm_ops = {
> + .get_local_id = __bpf_mptcp_pm_get_local_id,
> + .get_priority = __bpf_mptcp_pm_get_priority,
> + .established = __bpf_mptcp_pm_established,
> + .subflow_established = __bpf_mptcp_pm_subflow_established,
> + .allow_new_subflow = __bpf_mptcp_pm_allow_new_subflow,
> + .accept_new_subflow = __bpf_mptcp_pm_accept_new_subflow,
> + .add_addr_echo = __bpf_mptcp_pm_add_addr_echo,
> + .add_addr_received = __bpf_mptcp_pm_add_addr_received,
> + .rm_addr_received = __bpf_mptcp_pm_rm_addr_received,
Out of curiosity: I see here that even the optional hooks are assigned:
does it mean that all function pointers will never be NULL and checks
like 'pm->ops->add_addr_received' will always be true with a BPF PM? Or
is it still OK to assign them to NULL for a new BPF PM?
Cheers,
Matt
--
Sponsored by the NGI0 Core fund.
^ permalink raw reply [flat|nested] 15+ messages in thread* Re: [PATCH mptcp-next v1 1/4] bpf: Add mptcp path manager struct_ops
2025-03-24 10:26 ` Matthieu Baerts
@ 2025-03-24 10:43 ` Geliang Tang
2025-03-24 11:06 ` Matthieu Baerts
0 siblings, 1 reply; 15+ messages in thread
From: Geliang Tang @ 2025-03-24 10:43 UTC (permalink / raw)
To: Matthieu Baerts, mptcp; +Cc: Geliang Tang
On Mon, 2025-03-24 at 11:26 +0100, Matthieu Baerts wrote:
> Hi Geliang,
>
> On 21/03/2025 02:49, Geliang Tang wrote:
> > From: Geliang Tang <tanggeliang@kylinos.cn>
> >
> > This patch implements a new struct bpf_struct_ops for MPTCP BPF
> > path
> > manager: bpf_mptcp_pm_ops. Register and unregister the bpf path
> > manager
> > in .reg and .unreg.
> >
> > Add write access for some fields of struct mptcp_sock and struct
> > mptcp_pm_addr_entry in .btf_struct_access.
> >
> > This MPTCP BPF path manager implementation is similar to BPF TCP
> > CC. And
> > net/ipv4/bpf_tcp_ca.c is a frame of reference for this patch.
> >
> > Signed-off-by: Geliang Tang <tanggeliang@kylinos.cn>
> > ---
> > net/mptcp/bpf.c | 259
> > +++++++++++++++++++++++++++++++++++++++++++++++-
> > 1 file changed, 258 insertions(+), 1 deletion(-)
> >
> > diff --git a/net/mptcp/bpf.c b/net/mptcp/bpf.c
> > index 2b0cfb57df8c..596574102b89 100644
> > --- a/net/mptcp/bpf.c
> > +++ b/net/mptcp/bpf.c
>
> (...)
>
> > +static int __bpf_mptcp_pm_get_local_id(struct mptcp_sock *msk,
> > + struct mptcp_pm_addr_entry
> > *skc)
> > +{
> > + return 0;
> > +}
> > +
> > +static bool __bpf_mptcp_pm_get_priority(struct mptcp_sock *msk,
> > + struct mptcp_addr_info
> > *skc)
> > +{
> > + return false;
> > +}
> > +
> > +static void __bpf_mptcp_pm_established(struct mptcp_sock *msk)
> > +{
> > +}
> > +
> > +static void __bpf_mptcp_pm_subflow_established(struct mptcp_sock
> > *msk)
> > +{
> > +}
> > +
> > +static bool __bpf_mptcp_pm_allow_new_subflow(struct mptcp_sock
> > *msk)
> > +{
> > + return false;
> > +}
> > +
> > +static bool __bpf_mptcp_pm_accept_new_subflow(const struct
> > mptcp_sock *msk)
> > +{
> > + return false;
> > +}
> > +
> > +static bool __bpf_mptcp_pm_add_addr_echo(struct mptcp_sock *msk,
> > + const struct
> > mptcp_addr_info *addr)
> > +{
> > + return false;
> > +}
> > +
> > +static int __bpf_mptcp_pm_add_addr_received(struct mptcp_sock
> > *msk,
> > + const struct
> > mptcp_addr_info *addr)
> > +{
> > + return 0;
> > +}
> > +
> > +static void __bpf_mptcp_pm_rm_addr_received(struct mptcp_sock
> > *msk)
> > +{
> > +}
> > +
> > +static void __bpf_mptcp_pm_init(struct mptcp_sock *msk)
> > +{
> > +}
> > +
> > +static void __bpf_mptcp_pm_release(struct mptcp_sock *msk)
> > +{
> > +}
> > +
> > +static struct mptcp_pm_ops __bpf_mptcp_pm_ops = {
> > + .get_local_id = __bpf_mptcp_pm_get_local_id,
> > + .get_priority = __bpf_mptcp_pm_get_priority,
> > + .established = __bpf_mptcp_pm_established,
> > + .subflow_established =
> > __bpf_mptcp_pm_subflow_established,
> > + .allow_new_subflow =
> > __bpf_mptcp_pm_allow_new_subflow,
> > + .accept_new_subflow =
> > __bpf_mptcp_pm_accept_new_subflow,
> > + .add_addr_echo = __bpf_mptcp_pm_add_addr_echo,
> > + .add_addr_received =
> > __bpf_mptcp_pm_add_addr_received,
> > + .rm_addr_received = __bpf_mptcp_pm_rm_addr_received,
>
> Out of curiosity: I see here that even the optional hooks are
> assigned:
Optional hooks must be assigned here, otherwise this hook cannot be
defined in BPF.
> does it mean that all function pointers will never be NULL and checks
> like 'pm->ops->add_addr_received' will always be true with a BPF PM?
> Or
> is it still OK to assign them to NULL for a new BPF PM?
I think it's the latter, it's OK to assign them to NULL.
Thanks,
-Geliang
>
> Cheers,
> Matt
^ permalink raw reply [flat|nested] 15+ messages in thread* Re: [PATCH mptcp-next v1 1/4] bpf: Add mptcp path manager struct_ops
2025-03-24 10:43 ` Geliang Tang
@ 2025-03-24 11:06 ` Matthieu Baerts
2025-03-25 4:15 ` Geliang Tang
0 siblings, 1 reply; 15+ messages in thread
From: Matthieu Baerts @ 2025-03-24 11:06 UTC (permalink / raw)
To: Geliang Tang, mptcp; +Cc: Geliang Tang
Hi Geliang,
On 24/03/2025 11:43, Geliang Tang wrote:
> On Mon, 2025-03-24 at 11:26 +0100, Matthieu Baerts wrote:
>> Hi Geliang,
>>
>> On 21/03/2025 02:49, Geliang Tang wrote:
>>> From: Geliang Tang <tanggeliang@kylinos.cn>
>>>
>>> This patch implements a new struct bpf_struct_ops for MPTCP BPF
>>> path
>>> manager: bpf_mptcp_pm_ops. Register and unregister the bpf path
>>> manager
>>> in .reg and .unreg.
>>>
>>> Add write access for some fields of struct mptcp_sock and struct
>>> mptcp_pm_addr_entry in .btf_struct_access.
>>>
>>> This MPTCP BPF path manager implementation is similar to BPF TCP
>>> CC. And
>>> net/ipv4/bpf_tcp_ca.c is a frame of reference for this patch.
>>>
>>> Signed-off-by: Geliang Tang <tanggeliang@kylinos.cn>
>>> ---
>>> net/mptcp/bpf.c | 259
>>> +++++++++++++++++++++++++++++++++++++++++++++++-
>>> 1 file changed, 258 insertions(+), 1 deletion(-)
>>>
>>> diff --git a/net/mptcp/bpf.c b/net/mptcp/bpf.c
>>> index 2b0cfb57df8c..596574102b89 100644
>>> --- a/net/mptcp/bpf.c
>>> +++ b/net/mptcp/bpf.c
>>
>> (...)
>>
>>> +static int __bpf_mptcp_pm_get_local_id(struct mptcp_sock *msk,
>>> + struct mptcp_pm_addr_entry
>>> *skc)
>>> +{
>>> + return 0;
>>> +}
>>> +
>>> +static bool __bpf_mptcp_pm_get_priority(struct mptcp_sock *msk,
>>> + struct mptcp_addr_info
>>> *skc)
>>> +{
>>> + return false;
>>> +}
>>> +
>>> +static void __bpf_mptcp_pm_established(struct mptcp_sock *msk)
>>> +{
>>> +}
>>> +
>>> +static void __bpf_mptcp_pm_subflow_established(struct mptcp_sock
>>> *msk)
>>> +{
>>> +}
>>> +
>>> +static bool __bpf_mptcp_pm_allow_new_subflow(struct mptcp_sock
>>> *msk)
>>> +{
>>> + return false;
>>> +}
>>> +
>>> +static bool __bpf_mptcp_pm_accept_new_subflow(const struct
>>> mptcp_sock *msk)
>>> +{
>>> + return false;
>>> +}
>>> +
>>> +static bool __bpf_mptcp_pm_add_addr_echo(struct mptcp_sock *msk,
>>> + const struct
>>> mptcp_addr_info *addr)
>>> +{
>>> + return false;
>>> +}
>>> +
>>> +static int __bpf_mptcp_pm_add_addr_received(struct mptcp_sock
>>> *msk,
>>> + const struct
>>> mptcp_addr_info *addr)
>>> +{
>>> + return 0;
>>> +}
>>> +
>>> +static void __bpf_mptcp_pm_rm_addr_received(struct mptcp_sock
>>> *msk)
>>> +{
>>> +}
>>> +
>>> +static void __bpf_mptcp_pm_init(struct mptcp_sock *msk)
>>> +{
>>> +}
>>> +
>>> +static void __bpf_mptcp_pm_release(struct mptcp_sock *msk)
>>> +{
>>> +}
>>> +
>>> +static struct mptcp_pm_ops __bpf_mptcp_pm_ops = {
>>> + .get_local_id = __bpf_mptcp_pm_get_local_id,
>>> + .get_priority = __bpf_mptcp_pm_get_priority,
>>> + .established = __bpf_mptcp_pm_established,
>>> + .subflow_established =
>>> __bpf_mptcp_pm_subflow_established,
>>> + .allow_new_subflow =
>>> __bpf_mptcp_pm_allow_new_subflow,
>>> + .accept_new_subflow =
>>> __bpf_mptcp_pm_accept_new_subflow,
>>> + .add_addr_echo = __bpf_mptcp_pm_add_addr_echo,
>>> + .add_addr_received =
>>> __bpf_mptcp_pm_add_addr_received,
>>> + .rm_addr_received = __bpf_mptcp_pm_rm_addr_received,
>>
>> Out of curiosity: I see here that even the optional hooks are
>> assigned:
>
> Optional hooks must be assigned here, otherwise this hook cannot be
> defined in BPF.
OK, thanks!
>> does it mean that all function pointers will never be NULL and checks
>> like 'pm->ops->add_addr_received' will always be true with a BPF PM?
>> Or
>> is it still OK to assign them to NULL for a new BPF PM?
>
> I think it's the latter, it's OK to assign them to NULL.
If you have the infrastructure ready, can you check if you can set
add_addr_received to NULL in a new BPF struct_ops mptcp_pm_ops for
example please? Also, just to be sure, can you also check that in this
case, in the pm.c, pm->ops->add_addr_received is also set to NULL and
not to __bpf_mptcp_pm_add_addr_received? (not urgent)
Cheers,
Matt
--
Sponsored by the NGI0 Core fund.
^ permalink raw reply [flat|nested] 15+ messages in thread* Re: [PATCH mptcp-next v1 1/4] bpf: Add mptcp path manager struct_ops
2025-03-24 11:06 ` Matthieu Baerts
@ 2025-03-25 4:15 ` Geliang Tang
2025-03-25 10:39 ` Matthieu Baerts
0 siblings, 1 reply; 15+ messages in thread
From: Geliang Tang @ 2025-03-25 4:15 UTC (permalink / raw)
To: Matthieu Baerts, mptcp; +Cc: Geliang Tang
On Mon, 2025-03-24 at 12:06 +0100, Matthieu Baerts wrote:
> Hi Geliang,
>
> On 24/03/2025 11:43, Geliang Tang wrote:
> > On Mon, 2025-03-24 at 11:26 +0100, Matthieu Baerts wrote:
> > > Hi Geliang,
> > >
> > > On 21/03/2025 02:49, Geliang Tang wrote:
> > > > From: Geliang Tang <tanggeliang@kylinos.cn>
> > > >
> > > > This patch implements a new struct bpf_struct_ops for MPTCP BPF
> > > > path
> > > > manager: bpf_mptcp_pm_ops. Register and unregister the bpf path
> > > > manager
> > > > in .reg and .unreg.
> > > >
> > > > Add write access for some fields of struct mptcp_sock and
> > > > struct
> > > > mptcp_pm_addr_entry in .btf_struct_access.
> > > >
> > > > This MPTCP BPF path manager implementation is similar to BPF
> > > > TCP
> > > > CC. And
> > > > net/ipv4/bpf_tcp_ca.c is a frame of reference for this patch.
> > > >
> > > > Signed-off-by: Geliang Tang <tanggeliang@kylinos.cn>
> > > > ---
> > > > net/mptcp/bpf.c | 259
> > > > +++++++++++++++++++++++++++++++++++++++++++++++-
> > > > 1 file changed, 258 insertions(+), 1 deletion(-)
> > > >
> > > > diff --git a/net/mptcp/bpf.c b/net/mptcp/bpf.c
> > > > index 2b0cfb57df8c..596574102b89 100644
> > > > --- a/net/mptcp/bpf.c
> > > > +++ b/net/mptcp/bpf.c
> > >
> > > (...)
> > >
> > > > +static int __bpf_mptcp_pm_get_local_id(struct mptcp_sock *msk,
> > > > + struct
> > > > mptcp_pm_addr_entry
> > > > *skc)
> > > > +{
> > > > + return 0;
> > > > +}
> > > > +
> > > > +static bool __bpf_mptcp_pm_get_priority(struct mptcp_sock
> > > > *msk,
> > > > + struct mptcp_addr_info
> > > > *skc)
> > > > +{
> > > > + return false;
> > > > +}
> > > > +
> > > > +static void __bpf_mptcp_pm_established(struct mptcp_sock *msk)
> > > > +{
> > > > +}
> > > > +
> > > > +static void __bpf_mptcp_pm_subflow_established(struct
> > > > mptcp_sock
> > > > *msk)
> > > > +{
> > > > +}
> > > > +
> > > > +static bool __bpf_mptcp_pm_allow_new_subflow(struct mptcp_sock
> > > > *msk)
> > > > +{
> > > > + return false;
> > > > +}
> > > > +
> > > > +static bool __bpf_mptcp_pm_accept_new_subflow(const struct
> > > > mptcp_sock *msk)
> > > > +{
> > > > + return false;
> > > > +}
> > > > +
> > > > +static bool __bpf_mptcp_pm_add_addr_echo(struct mptcp_sock
> > > > *msk,
> > > > + const struct
> > > > mptcp_addr_info *addr)
> > > > +{
> > > > + return false;
> > > > +}
> > > > +
> > > > +static int __bpf_mptcp_pm_add_addr_received(struct mptcp_sock
> > > > *msk,
> > > > + const struct
> > > > mptcp_addr_info *addr)
> > > > +{
> > > > + return 0;
> > > > +}
> > > > +
> > > > +static void __bpf_mptcp_pm_rm_addr_received(struct mptcp_sock
> > > > *msk)
> > > > +{
> > > > +}
> > > > +
> > > > +static void __bpf_mptcp_pm_init(struct mptcp_sock *msk)
> > > > +{
> > > > +}
> > > > +
> > > > +static void __bpf_mptcp_pm_release(struct mptcp_sock *msk)
> > > > +{
> > > > +}
> > > > +
> > > > +static struct mptcp_pm_ops __bpf_mptcp_pm_ops = {
> > > > + .get_local_id = __bpf_mptcp_pm_get_local_id,
> > > > + .get_priority = __bpf_mptcp_pm_get_priority,
> > > > + .established = __bpf_mptcp_pm_established,
> > > > + .subflow_established =
> > > > __bpf_mptcp_pm_subflow_established,
> > > > + .allow_new_subflow =
> > > > __bpf_mptcp_pm_allow_new_subflow,
> > > > + .accept_new_subflow =
> > > > __bpf_mptcp_pm_accept_new_subflow,
> > > > + .add_addr_echo =
> > > > __bpf_mptcp_pm_add_addr_echo,
> > > > + .add_addr_received =
> > > > __bpf_mptcp_pm_add_addr_received,
> > > > + .rm_addr_received =
> > > > __bpf_mptcp_pm_rm_addr_received,
> > >
> > > Out of curiosity: I see here that even the optional hooks are
> > > assigned:
> >
> > Optional hooks must be assigned here, otherwise this hook cannot be
> > defined in BPF.
>
> OK, thanks!
>
> > > does it mean that all function pointers will never be NULL and
> > > checks
> > > like 'pm->ops->add_addr_received' will always be true with a BPF
> > > PM?
> > > Or
> > > is it still OK to assign them to NULL for a new BPF PM?
> >
> > I think it's the latter, it's OK to assign them to NULL.
>
> If you have the infrastructure ready, can you check if you can set
> add_addr_received to NULL in a new BPF struct_ops mptcp_pm_ops for
> example please? Also, just to be sure, can you also check that in
> this
> case, in the pm.c, pm->ops->add_addr_received is also set to NULL and
> not to __bpf_mptcp_pm_add_addr_received? (not urgent)
Sure, here's the test:
diff --git a/net/mptcp/pm.c b/net/mptcp/pm.c
index f9fed096d77c..6bdca0dcf21e 100644
--- a/net/mptcp/pm.c
+++ b/net/mptcp/pm.c
@@ -578,6 +578,9 @@ void mptcp_pm_add_addr_received(const struct sock
*ssk,
pr_debug("msk=%p remote_id=%d accept=%d\n", msk, addr->id,
READ_ONCE(pm->accept_addr));
+ pr_info("%s name=%s, pm->ops->add_addr_received=%p\n",
+ __func__, pm->ops->name, pm->ops->add_addr_received);
+
mptcp_event_addr_announced(ssk, addr);
spin_lock_bh(&pm->lock);
diff --git a/tools/testing/selftests/bpf/progs/mptcp_bpf_userspace_pm.c
b/tools/testing/selftests/bpf/progs/mptcp_bpf_userspace_pm.c
index 2f8e0e85b5d7..8aa4b8c9ce33 100644
--- a/tools/testing/selftests/bpf/progs/mptcp_bpf_userspace_pm.c
+++ b/tools/testing/selftests/bpf/progs/mptcp_bpf_userspace_pm.c
@@ -265,4 +265,5 @@ struct mptcp_pm_ops bpf_userspace = {
.init = (void *)mptcp_pm_userspace_init,
.release = (void *)mptcp_pm_userspace_release,
.name = "bpf_userspace",
+ .add_addr_received = (void *)NULL,
};
And the output:
[ 18.229067][ C0] MPTCP: mptcp_pm_add_addr_received name=kernel,
pm->ops->add_addr_received=00000000cd865d66
[ 18.231316][ C0] MPTCP: mptcp_pm_add_addr_received name=kernel,
pm->ops->add_addr_received=00000000cd865d66
[ 21.105658][ C0] MPTCP: mptcp_pm_add_addr_received
name=bpf_netlink, pm->ops->add_addr_received=00000000fe7b7426
[ 21.106419][ C0] MPTCP: mptcp_pm_add_addr_received
name=bpf_netlink, pm->ops->add_addr_received=00000000fe7b7426
[ 24.767318][ C0] MPTCP: mptcp_pm_add_addr_received
name=userspace, pm->ops->add_addr_received=0000000000000000
[ 28.220824][ C0] MPTCP: mptcp_pm_add_addr_received
name=bpf_userspace, pm->ops->add_addr_received=0000000000000000
[ 36.623859][ C0] MPTCP: mptcp_pm_add_addr_received
name=bpf_hashmap, pm->ops->add_addr_received=0000000000000000
# #187/1 mptcp/connect:OK
# #187/2 mptcp/base:OK
# #187/3 mptcp/mptcpify:OK
# #187/4 mptcp/subflow:OK
# #187/5 mptcp/iters_subflow:OK
# #187/6 mptcp/netlink_pm:OK
# #187/7 mptcp/bpf_netlink_pm:OK
# #187/8 mptcp/userspace_pm:OK
# #187/9 mptcp/bpf_userspace_pm:OK
# #187/10 mptcp/iters_netlink_address:OK
# #187/11 mptcp/iters_userspace_address:OK
# #187/12 mptcp/bpf_hashmap_pm:OK
# #187/13 mptcp/sockopt:OK
# #187/14 mptcp/default:OK
# #187/15 mptcp/first:OK
# #187/16 mptcp/bkup:OK
# #187/17 mptcp/rr:OK
# #187/18 mptcp/red:OK
# #187/19 mptcp/burst:OK
# #187/20 mptcp/stale:OK
# #187 mptcp:OK
pm->ops->add_addr_received is set to NULL indeed, whether we use
".add_addr_received = (void *)NULL," so that it is explicitly set to
NULL, or simply do not assign a new function to it but assign other
function pointers.
Thanks,
-Geliang
>
> Cheers,
> Matt
^ permalink raw reply related [flat|nested] 15+ messages in thread* Re: [PATCH mptcp-next v1 1/4] bpf: Add mptcp path manager struct_ops
2025-03-25 4:15 ` Geliang Tang
@ 2025-03-25 10:39 ` Matthieu Baerts
0 siblings, 0 replies; 15+ messages in thread
From: Matthieu Baerts @ 2025-03-25 10:39 UTC (permalink / raw)
To: Geliang Tang, mptcp; +Cc: Geliang Tang
Hi Geliang,
On 25/03/2025 05:15, Geliang Tang wrote:
> On Mon, 2025-03-24 at 12:06 +0100, Matthieu Baerts wrote:
>> On 24/03/2025 11:43, Geliang Tang wrote:
>>> On Mon, 2025-03-24 at 11:26 +0100, Matthieu Baerts wrote:
(...)
>>>> does it mean that all function pointers will never be NULL and
>>>> checks
>>>> like 'pm->ops->add_addr_received' will always be true with a BPF
>>>> PM?
>>>> Or
>>>> is it still OK to assign them to NULL for a new BPF PM?
>>>
>>> I think it's the latter, it's OK to assign them to NULL.
>>
>> If you have the infrastructure ready, can you check if you can set
>> add_addr_received to NULL in a new BPF struct_ops mptcp_pm_ops for
>> example please? Also, just to be sure, can you also check that in
>> this
>> case, in the pm.c, pm->ops->add_addr_received is also set to NULL and
>> not to __bpf_mptcp_pm_add_addr_received? (not urgent)
>
> Sure, here's the test:
(...)
> pm->ops->add_addr_received is set to NULL indeed, whether we use
> ".add_addr_received = (void *)NULL," so that it is explicitly set to
> NULL, or simply do not assign a new function to it but assign other
> function pointers.
Good, thank you for having checked! So we can avoid worker operations
(PM), and keeping the MPTCP retransmission callback optional (sched).
Cheers,
Matt
--
Sponsored by the NGI0 Core fund.
^ permalink raw reply [flat|nested] 15+ messages in thread
* [PATCH mptcp-next v1 2/4] bpf: Export mptcp path manager kfuncs
2025-03-21 1:49 [PATCH mptcp-next v1 0/4] BPF path manager, part 7 Geliang Tang
2025-03-21 1:49 ` [PATCH mptcp-next v1 1/4] bpf: Add mptcp path manager struct_ops Geliang Tang
@ 2025-03-21 1:49 ` Geliang Tang
2025-03-21 11:11 ` Matthieu Baerts
2025-03-21 1:49 ` [PATCH mptcp-next v1 3/4] selftests/bpf: Add mptcp netlink pm subtest Geliang Tang
` (3 subsequent siblings)
5 siblings, 1 reply; 15+ messages in thread
From: Geliang Tang @ 2025-03-21 1:49 UTC (permalink / raw)
To: mptcp; +Cc: Geliang Tang
From: Geliang Tang <tanggeliang@kylinos.cn>
This patch exports mptcp path manager helpers into BPF, adds these
kfunc names into mptcp common kfunc_set.
bpf_kmemdup_entry() and bpf_kfree_entry() are wrappers of kmemdup() and
kfree(), using to alloc and free an mptcp address entry.
bpf_set_bit() and bpf_bitmap_fill() are wrappers of __set_bit() and
bitmap_fill(), using for mptcp address ID bitmap.
bpf_spin_lock_bh() and bpf_spin_unlock_bh() are wrappers of spin_lock_bh()
and spin_unlock_bh(), using to lock and unlock the mptcp pm lock.
Signed-off-by: Geliang Tang <tanggeliang@kylinos.cn>
---
net/mptcp/bpf.c | 48 +++++++++++++++++++++++++++++++++++++++++++
net/mptcp/pm_kernel.c | 27 ++++++++++++++++++++++++
2 files changed, 75 insertions(+)
diff --git a/net/mptcp/bpf.c b/net/mptcp/bpf.c
index 596574102b89..e411ae8382f2 100644
--- a/net/mptcp/bpf.c
+++ b/net/mptcp/bpf.c
@@ -540,6 +540,38 @@ bpf_iter_mptcp_subflow_destroy(struct bpf_iter_mptcp_subflow *it)
{
}
+__bpf_kfunc static struct mptcp_pm_addr_entry *
+bpf_kmemdup_entry(struct mptcp_pm_addr_entry *entry, int size, gfp_t priority)
+{
+ return kmemdup(entry, size, priority);
+}
+
+__bpf_kfunc static void
+bpf_kfree_entry(struct mptcp_pm_addr_entry *entry)
+{
+ kfree(entry);
+}
+
+__bpf_kfunc static void bpf_set_bit(unsigned long nr, unsigned long *addr__ign)
+{
+ __set_bit(nr, addr__ign);
+}
+
+__bpf_kfunc static void bpf_bitmap_fill(unsigned long *dst__ign, unsigned int nbits)
+{
+ bitmap_fill(dst__ign, nbits);
+}
+
+__bpf_kfunc static void bpf_spin_lock_bh(spinlock_t *lock)
+{
+ spin_lock_bh(lock);
+}
+
+__bpf_kfunc static void bpf_spin_unlock_bh(spinlock_t *lock)
+{
+ spin_unlock_bh(lock);
+}
+
__bpf_kfunc static bool bpf_mptcp_subflow_queues_empty(struct sock *sk)
{
return tcp_rtx_queue_empty(sk);
@@ -564,6 +596,22 @@ BTF_ID_FLAGS(func, bpf_mptcp_subflow_tcp_sock, KF_RET_NULL)
BTF_ID_FLAGS(func, bpf_iter_mptcp_subflow_new, KF_ITER_NEW | KF_TRUSTED_ARGS)
BTF_ID_FLAGS(func, bpf_iter_mptcp_subflow_next, KF_ITER_NEXT | KF_RET_NULL)
BTF_ID_FLAGS(func, bpf_iter_mptcp_subflow_destroy, KF_ITER_DESTROY)
+BTF_ID_FLAGS(func, bpf_kmemdup_entry)
+BTF_ID_FLAGS(func, bpf_kfree_entry)
+BTF_ID_FLAGS(func, bpf_set_bit)
+BTF_ID_FLAGS(func, bpf_bitmap_fill)
+BTF_ID_FLAGS(func, bpf_spin_lock_bh)
+BTF_ID_FLAGS(func, bpf_spin_unlock_bh)
+BTF_ID_FLAGS(func, mptcp_pm_nl_lookup_addr)
+BTF_ID_FLAGS(func, mptcp_pm_nl_append_new_local_addr_msk)
+BTF_ID_FLAGS(func, mptcp_pm_get_add_addr_signal_max)
+BTF_ID_FLAGS(func, mptcp_pm_get_add_addr_accept_max)
+BTF_ID_FLAGS(func, mptcp_pm_get_subflows_max)
+BTF_ID_FLAGS(func, mptcp_pm_get_local_addr_max)
+BTF_ID_FLAGS(func, mptcp_pm_add_addr_recv)
+BTF_ID_FLAGS(func, mptcp_pm_is_init_remote_addr)
+BTF_ID_FLAGS(func, mptcp_pm_create_subflow_or_signal_addr)
+BTF_ID_FLAGS(func, mptcp_pm_rm_addr_recv)
BTF_ID_FLAGS(func, mptcp_subflow_set_scheduled)
BTF_ID_FLAGS(func, mptcp_subflow_active)
BTF_ID_FLAGS(func, mptcp_set_timeout)
diff --git a/net/mptcp/pm_kernel.c b/net/mptcp/pm_kernel.c
index 4f7b2e0e998d..3cf81986c70d 100644
--- a/net/mptcp/pm_kernel.c
+++ b/net/mptcp/pm_kernel.c
@@ -253,6 +253,9 @@ __lookup_addr(struct pm_nl_pernet *pernet, const struct mptcp_addr_info *info)
return NULL;
}
+__bpf_kfunc_start_defs();
+
+__bpf_kfunc
static void mptcp_pm_create_subflow_or_signal_addr(struct mptcp_sock *msk)
{
struct sock *sk = (struct sock *)msk;
@@ -367,6 +370,8 @@ static void mptcp_pm_create_subflow_or_signal_addr(struct mptcp_sock *msk)
mptcp_pm_nl_check_work_pending(msk);
}
+__bpf_kfunc_end_defs();
+
static void mptcp_pm_kernel_established(struct mptcp_sock *msk)
{
spin_lock_bh(&msk->pm.lock);
@@ -1493,3 +1498,25 @@ void __init mptcp_pm_kernel_register(void)
mptcp_pm_register(&mptcp_pm_kernel);
}
+
+__bpf_kfunc_start_defs();
+
+__bpf_kfunc static struct mptcp_pm_addr_entry *
+mptcp_pm_nl_lookup_addr(struct mptcp_sock *msk, const struct mptcp_addr_info *info)
+{
+ struct pm_nl_pernet *pernet = pm_nl_get_pernet_from_msk(msk);
+
+ return __lookup_addr(pernet, info);
+}
+
+__bpf_kfunc static int
+mptcp_pm_nl_append_new_local_addr_msk(struct mptcp_sock *msk,
+ struct mptcp_pm_addr_entry *entry,
+ bool needs_id, bool replace)
+{
+ struct pm_nl_pernet *pernet = pm_nl_get_pernet_from_msk(msk);
+
+ return mptcp_pm_nl_append_new_local_addr(pernet, entry, needs_id, replace);
+}
+
+__bpf_kfunc_end_defs();
--
2.43.0
^ permalink raw reply related [flat|nested] 15+ messages in thread* Re: [PATCH mptcp-next v1 2/4] bpf: Export mptcp path manager kfuncs
2025-03-21 1:49 ` [PATCH mptcp-next v1 2/4] bpf: Export mptcp path manager kfuncs Geliang Tang
@ 2025-03-21 11:11 ` Matthieu Baerts
0 siblings, 0 replies; 15+ messages in thread
From: Matthieu Baerts @ 2025-03-21 11:11 UTC (permalink / raw)
To: Geliang Tang, mptcp; +Cc: Geliang Tang
Hi Geliang,
On 21/03/2025 02:49, Geliang Tang wrote:
> From: Geliang Tang <tanggeliang@kylinos.cn>
>
> This patch exports mptcp path manager helpers into BPF, adds these
> kfunc names into mptcp common kfunc_set.
>
> bpf_kmemdup_entry() and bpf_kfree_entry() are wrappers of kmemdup() and
> kfree(), using to alloc and free an mptcp address entry.
That feels really wrong: a BPF program cannot crash or cause problems
(deadlock, memleaks, etc.), it should then not be able to reserve memory
except in a map or something that can be automatically freed when the
bpf program is removed.
> bpf_set_bit() and bpf_bitmap_fill() are wrappers of __set_bit() and
> bitmap_fill(), using for mptcp address ID bitmap.
Is it really needed? Are there not already some it not store stuff in a
BPF map?
> bpf_spin_lock_bh() and bpf_spin_unlock_bh() are wrappers of spin_lock_bh()
> and spin_unlock_bh(), using to lock and unlock the mptcp pm lock.
Same here, a BPF should not be able to lock something and never unlock
it. I think there are some stuff in BPF to get a lock automatically.
(Polymorphisme?)
(...)
> @@ -564,6 +596,22 @@ BTF_ID_FLAGS(func, bpf_mptcp_subflow_tcp_sock, KF_RET_NULL)
> BTF_ID_FLAGS(func, bpf_iter_mptcp_subflow_new, KF_ITER_NEW | KF_TRUSTED_ARGS)
> BTF_ID_FLAGS(func, bpf_iter_mptcp_subflow_next, KF_ITER_NEXT | KF_RET_NULL)
> BTF_ID_FLAGS(func, bpf_iter_mptcp_subflow_destroy, KF_ITER_DESTROY)
> +BTF_ID_FLAGS(func, bpf_kmemdup_entry)
> +BTF_ID_FLAGS(func, bpf_kfree_entry)
> +BTF_ID_FLAGS(func, bpf_set_bit)
> +BTF_ID_FLAGS(func, bpf_bitmap_fill)
> +BTF_ID_FLAGS(func, bpf_spin_lock_bh)
> +BTF_ID_FLAGS(func, bpf_spin_unlock_bh)
This should not be needed, a BPF program should not need them. Not sure
about the bitmap, but for the rest, something else should be used
(maps?) or automatically done (locks).
> +BTF_ID_FLAGS(func, mptcp_pm_nl_lookup_addr)
> +BTF_ID_FLAGS(func, mptcp_pm_nl_append_new_local_addr_msk)
That's specific to the in-kernel PM, that feels wrong. Addresses should
be stored in BPF maps or similar instead I guess.
> +BTF_ID_FLAGS(func, mptcp_pm_get_add_addr_signal_max)
> +BTF_ID_FLAGS(func, mptcp_pm_get_add_addr_accept_max)
> +BTF_ID_FLAGS(func, mptcp_pm_get_subflows_max)
> +BTF_ID_FLAGS(func, mptcp_pm_get_local_addr_max)
That feels wrong, it should not be needed, that's specific to the
in-kernel PM as well and set via netlink.
> +BTF_ID_FLAGS(func, mptcp_pm_add_addr_recv)
Should not be needed, see my comments in part 6.
> +BTF_ID_FLAGS(func, mptcp_pm_is_init_remote_addr)
Should it not be handled by the core (pm.c) or specific to in-kernel? (I
didn't check)
> +BTF_ID_FLAGS(func, mptcp_pm_create_subflow_or_signal_addr)
Specific to the in-kernel PM, that feels wrong.
> +BTF_ID_FLAGS(func, mptcp_pm_rm_addr_recv)
Maybe not needed, see my comments in part 6.
(...)
Cheers,
Matt
--
Sponsored by the NGI0 Core fund.
^ permalink raw reply [flat|nested] 15+ messages in thread
* [PATCH mptcp-next v1 3/4] selftests/bpf: Add mptcp netlink pm subtest
2025-03-21 1:49 [PATCH mptcp-next v1 0/4] BPF path manager, part 7 Geliang Tang
2025-03-21 1:49 ` [PATCH mptcp-next v1 1/4] bpf: Add mptcp path manager struct_ops Geliang Tang
2025-03-21 1:49 ` [PATCH mptcp-next v1 2/4] bpf: Export mptcp path manager kfuncs Geliang Tang
@ 2025-03-21 1:49 ` Geliang Tang
2025-03-21 11:21 ` Matthieu Baerts
2025-03-21 1:49 ` [PATCH mptcp-next v1 4/4] selftests/bpf: Add mptcp bpf_netlink " Geliang Tang
` (2 subsequent siblings)
5 siblings, 1 reply; 15+ messages in thread
From: Geliang Tang @ 2025-03-21 1:49 UTC (permalink / raw)
To: mptcp; +Cc: Geliang Tang
From: Geliang Tang <tanggeliang@kylinos.cn>
To verify that the behavior of BPF path manager is the same as that of
netlink pm in the kernel, a netlink pm self-test has been added. BPF
path manager in the next commit will also use this test too.
Signed-off-by: Geliang Tang <tanggeliang@kylinos.cn>
---
.../testing/selftests/bpf/prog_tests/mptcp.c | 236 ++++++++++++++++++
1 file changed, 236 insertions(+)
diff --git a/tools/testing/selftests/bpf/prog_tests/mptcp.c b/tools/testing/selftests/bpf/prog_tests/mptcp.c
index 7c51250e7161..5303cbf38a44 100644
--- a/tools/testing/selftests/bpf/prog_tests/mptcp.c
+++ b/tools/testing/selftests/bpf/prog_tests/mptcp.c
@@ -56,6 +56,12 @@
#endif
#define MPTCP_SCHED_NAME_MAX 16
+enum mptcp_pm_family {
+ IPV4 = 0,
+ IPV4MAPPED,
+ IPV6,
+};
+
static const unsigned int total_bytes = 10 * 1024 * 1024;
static int duration;
@@ -562,6 +568,234 @@ static void test_iters_subflow(void)
close(cgroup_fd);
}
+static int recv_byte(int fd)
+{
+ char buf[1];
+ ssize_t n;
+
+ n = recv(fd, buf, sizeof(buf), 0);
+ if (CHECK(n <= 0, "recv_byte", "recv")) {
+ log_err("failed/partial recv");
+ return -1;
+ }
+ return 0;
+}
+
+static int netlink_pm_add_subflow(char *addr, __u8 id)
+{
+ return SYS_NOFAIL("ip -n %s mptcp endpoint add %s subflow id %u",
+ NS_TEST, addr, id);
+}
+
+static int netlink_pm_rm_subflow(__u8 id)
+{
+ return SYS_NOFAIL("ip -n %s mptcp endpoint delete id %u",
+ NS_TEST, id);
+}
+
+static int netlink_pm_add_addr(char *addr, __u8 id)
+{
+ return SYS_NOFAIL("ip -n %s mptcp endpoint add %s signal id %u",
+ NS_TEST, addr, id);
+}
+
+static int netlink_pm_rm_addr(__u8 id)
+{
+ return SYS_NOFAIL("ip -n %s mptcp endpoint delete id %u",
+ NS_TEST, id);
+}
+
+static int netlink_pm_rm_addr_id_0(char *addr)
+{
+ return SYS_NOFAIL("ip -n %s mptcp endpoint delete id 0 %s",
+ NS_TEST, addr);
+}
+
+static int netlink_pm_set_flags(__u8 id, char *flags)
+{
+ return SYS_NOFAIL("ip -n %s mptcp endpoint change id %u %s",
+ NS_TEST, id, flags);
+}
+
+static int netlink_pm_get_addr(__u8 id, char *output)
+{
+ char cmd[1024];
+ FILE *fp;
+
+ sprintf(cmd, "ip -n %s mptcp endpoint show id %u", NS_TEST, id);
+ fp = popen(cmd, "r");
+ if (!fp)
+ return -1;
+
+ bzero(output, BUFSIZ);
+ fread(output, 1, BUFSIZ, fp);
+ pclose(fp);
+
+ return 0;
+}
+
+static int netlink_pm_dump_addr(char *output)
+{
+ char cmd[1024];
+ FILE *fp;
+
+ sprintf(cmd, "ip -n %s mptcp endpoint show", NS_TEST);
+ fp = popen(cmd, "r");
+ if (!fp)
+ return -1;
+
+ bzero(output, BUFSIZ);
+ fread(output, 1, BUFSIZ, fp);
+ pclose(fp);
+
+ return 0;
+}
+
+static void run_netlink_pm(enum mptcp_pm_family family)
+{
+ bool ipv4mapped = (family == IPV4MAPPED);
+ bool ipv6 = (family == IPV6 || ipv4mapped);
+ int server_fd, client_fd, accept_fd;
+ char output[BUFSIZ], expect[1024];
+ char *addr;
+ int err;
+
+ addr = ipv6 ? (ipv4mapped ? "::ffff:"ADDR_1 : ADDR6_1) : ADDR_1;
+ server_fd = start_mptcp_server(ipv6 ? AF_INET6 : AF_INET, addr, PORT_1, 0);
+ if (!ASSERT_OK_FD(server_fd, "start_mptcp_server"))
+ return;
+
+ client_fd = connect_to_fd(server_fd, 0);
+ if (!ASSERT_OK_FD(client_fd, "connect_to_fd"))
+ goto close_server;
+
+ accept_fd = accept(server_fd, NULL, NULL);
+ if (!ASSERT_OK_FD(accept_fd, "accept"))
+ goto close_client;
+
+ usleep(200000); /* 0.2s */
+ send_byte(client_fd);
+ recv_byte(accept_fd);
+ usleep(200000); /* 0.2s */
+
+ addr = ipv6 ? (ipv4mapped ? "::ffff:"ADDR_2 : ADDR6_2) : ADDR_2;
+ err = netlink_pm_add_subflow(addr, 100);
+ if (!ASSERT_OK(err, "netlink_pm_add_subflow 100"))
+ goto close_accept;
+
+ send_byte(accept_fd);
+ recv_byte(client_fd);
+
+ sprintf(expect, "%s id 100 subflow \n", addr);
+ err = netlink_pm_get_addr(100, output);
+ if (!ASSERT_OK(err, "netlink_pm_get_addr 100") ||
+ !ASSERT_STRNEQ(output, expect, sizeof(expect), "get_addr"))
+ goto close_accept;
+
+ err = netlink_pm_set_flags(100, "backup");
+ if (!ASSERT_OK(err, "netlink_pm_set_flags backup"))
+ goto close_accept;
+
+ send_byte(client_fd);
+ recv_byte(accept_fd);
+
+ sprintf(expect, "%s id 100 subflow backup \n", addr);
+ err = netlink_pm_get_addr(100, output);
+ if (!ASSERT_OK(err, "netlink_pm_get_addr 100") ||
+ !ASSERT_STRNEQ(output, expect, sizeof(expect), "get_addr"))
+ goto close_accept;
+
+ err = netlink_pm_set_flags(100, "nobackup");
+ if (!ASSERT_OK(err, "netlink_pm_set_flags nobackup"))
+ goto close_accept;
+
+ send_byte(accept_fd);
+ recv_byte(client_fd);
+
+ sprintf(expect, "%s id 100 subflow \n", addr);
+ err = netlink_pm_get_addr(100, output);
+ if (!ASSERT_OK(err, "netlink_pm_get_addr 100") ||
+ !ASSERT_STRNEQ(output, expect, sizeof(expect), "get_addr"))
+ goto close_accept;
+
+ err = netlink_pm_rm_subflow(100);
+ if (!ASSERT_OK(err, "netlink_pm_rm_subflow 100"))
+ goto close_accept;
+
+ send_byte(client_fd);
+ recv_byte(accept_fd);
+
+ err = netlink_pm_dump_addr(output);
+ if (!ASSERT_OK(err, "netlink_pm_dump_addr") ||
+ !ASSERT_STRNEQ(output, "", sizeof(output), "dump_addr"))
+ goto close_accept;
+
+ addr = ipv6 ? (ipv4mapped ? "::ffff:"ADDR_3 : ADDR6_3) : ADDR_3;
+ err = netlink_pm_add_addr(addr, 200);
+ if (!ASSERT_OK(err, "netlink_pm_add_addr 200"))
+ goto close_accept;
+
+ send_byte(accept_fd);
+ recv_byte(client_fd);
+
+ sprintf(expect, "%s id 200 signal \n", addr);
+ err = netlink_pm_dump_addr(output);
+ if (!ASSERT_OK(err, "netlink_pm_dump_addr") ||
+ !ASSERT_STRNEQ(output, expect, sizeof(expect), "dump_addr"))
+ goto close_accept;
+
+ err = netlink_pm_rm_addr(200);
+ if (!ASSERT_OK(err, "netlink_pm_rm_addr 200"))
+ goto close_accept;
+
+ send_byte(client_fd);
+ recv_byte(accept_fd);
+
+ err = netlink_pm_rm_addr_id_0(addr);
+ ASSERT_OK(err, "netlink_pm_rm_addr 0");
+
+close_accept:
+ close(accept_fd);
+close_client:
+ close(client_fd);
+close_server:
+ close(server_fd);
+}
+
+static int pm_init(const char *pm_name)
+{
+ if (address_init())
+ goto fail;
+
+ SYS(fail, "ip netns exec %s sysctl -qw net.mptcp.path_manager=%s",
+ NS_TEST, pm_name);
+ SYS(fail, "ip -n %s mptcp limits set add_addr_accepted 4 subflows 4",
+ NS_TEST);
+
+ return 0;
+fail:
+ return -1;
+}
+
+static void test_netlink_pm(void)
+{
+ struct netns_obj *netns;
+ int err;
+
+ netns = netns_new(NS_TEST, true);
+ if (!ASSERT_OK_PTR(netns, "netns_new"))
+ return;
+
+ err = pm_init("kernel");
+ if (!ASSERT_OK(err, "pm_init: netlink pm"))
+ goto fail;
+
+ run_netlink_pm(IPV4MAPPED);
+
+fail:
+ netns_free(netns);
+}
+
static int sched_init(char *flags, char *sched)
{
if (endpoint_init(flags, 2) < 0)
@@ -756,6 +990,8 @@ void test_mptcp(void)
test_subflow();
if (test__start_subtest("iters_subflow"))
test_iters_subflow();
+ if (test__start_subtest("netlink_pm"))
+ test_netlink_pm();
if (test__start_subtest("default"))
test_default();
if (test__start_subtest("first"))
--
2.43.0
^ permalink raw reply related [flat|nested] 15+ messages in thread* Re: [PATCH mptcp-next v1 3/4] selftests/bpf: Add mptcp netlink pm subtest
2025-03-21 1:49 ` [PATCH mptcp-next v1 3/4] selftests/bpf: Add mptcp netlink pm subtest Geliang Tang
@ 2025-03-21 11:21 ` Matthieu Baerts
0 siblings, 0 replies; 15+ messages in thread
From: Matthieu Baerts @ 2025-03-21 11:21 UTC (permalink / raw)
To: Geliang Tang, mptcp; +Cc: Geliang Tang
Hi Geliang,
On 21/03/2025 02:49, Geliang Tang wrote:
> From: Geliang Tang <tanggeliang@kylinos.cn>
>
> To verify that the behavior of BPF path manager is the same as that of
> netlink pm in the kernel, a netlink pm self-test has been added. BPF
> path manager in the next commit will also use this test too.
I didn't check the modifications yet, but that feels complex and long.
To me, it is a good idea to create an BPF PM imitating the in-kernel PM,
but: no netlink should be involved: limits, endpoints, dump, etc.
Instead, the configurations should be done with BPF (hardcoded in the
program, maps, bss, etc.), which means probably duplicating and adapting
code, a bit similar to what you did with the BPF "burst" packet scheduler.
Cheers,
Matt
--
Sponsored by the NGI0 Core fund.
^ permalink raw reply [flat|nested] 15+ messages in thread
* [PATCH mptcp-next v1 4/4] selftests/bpf: Add mptcp bpf_netlink pm subtest
2025-03-21 1:49 [PATCH mptcp-next v1 0/4] BPF path manager, part 7 Geliang Tang
` (2 preceding siblings ...)
2025-03-21 1:49 ` [PATCH mptcp-next v1 3/4] selftests/bpf: Add mptcp netlink pm subtest Geliang Tang
@ 2025-03-21 1:49 ` Geliang Tang
2025-03-21 2:20 ` [PATCH mptcp-next v1 0/4] BPF path manager, part 7 MPTCP CI
2025-03-21 2:59 ` MPTCP CI
5 siblings, 0 replies; 15+ messages in thread
From: Geliang Tang @ 2025-03-21 1:49 UTC (permalink / raw)
To: mptcp; +Cc: Geliang Tang
From: Geliang Tang <tanggeliang@kylinos.cn>
This patch adds an mptcp bpf netlink pm example program, implements
all interfaces of struct mptcp_pm_ops using almost the same logic as
the netlink pm in kernel.
Signed-off-by: Geliang Tang <tanggeliang@kylinos.cn>
---
.../testing/selftests/bpf/prog_tests/mptcp.c | 48 +++++
tools/testing/selftests/bpf/progs/mptcp_bpf.h | 27 +++
.../bpf/progs/mptcp_bpf_netlink_pm.c | 204 ++++++++++++++++++
.../selftests/bpf/progs/mptcp_bpf_pm.h | 52 +++++
4 files changed, 331 insertions(+)
create mode 100644 tools/testing/selftests/bpf/progs/mptcp_bpf_netlink_pm.c
create mode 100644 tools/testing/selftests/bpf/progs/mptcp_bpf_pm.h
diff --git a/tools/testing/selftests/bpf/prog_tests/mptcp.c b/tools/testing/selftests/bpf/prog_tests/mptcp.c
index 5303cbf38a44..c0bc4cfb24d1 100644
--- a/tools/testing/selftests/bpf/prog_tests/mptcp.c
+++ b/tools/testing/selftests/bpf/prog_tests/mptcp.c
@@ -12,6 +12,7 @@
#include "mptcpify.skel.h"
#include "mptcp_subflow.skel.h"
#include "mptcp_bpf_iters.skel.h"
+#include "mptcp_bpf_netlink_pm.skel.h"
#include "mptcp_bpf_first.skel.h"
#include "mptcp_bpf_bkup.skel.h"
#include "mptcp_bpf_rr.skel.h"
@@ -796,6 +797,51 @@ static void test_netlink_pm(void)
netns_free(netns);
}
+static void test_bpf_netlink_pm(void)
+{
+ struct mptcp_bpf_netlink_pm *skel;
+ struct netns_obj *netns;
+ struct bpf_link *link;
+ int err;
+
+ skel = mptcp_bpf_netlink_pm__open();
+ if (!ASSERT_OK_PTR(skel, "open: bpf_netlink pm"))
+ return;
+
+ err = bpf_program__set_flags(skel->progs.mptcp_pm_netlink_established,
+ BPF_F_SLEEPABLE);
+ err = err ?: bpf_program__set_flags(skel->progs.mptcp_pm_netlink_subflow_established,
+ BPF_F_SLEEPABLE);
+ err = err ?: bpf_program__set_flags(skel->progs.mptcp_pm_netlink_rm_addr_received,
+ BPF_F_SLEEPABLE);
+ if (!ASSERT_OK(err, "set sleepable flags"))
+ goto skel_destroy;
+
+ if (!ASSERT_OK(mptcp_bpf_netlink_pm__load(skel), "load: bpf_netlink pm"))
+ goto skel_destroy;
+
+ link = bpf_map__attach_struct_ops(skel->maps.bpf_netlink);
+ if (!ASSERT_OK_PTR(link, "attach_struct_ops: bpf_netlink pm"))
+ goto skel_destroy;
+
+ netns = netns_new(NS_TEST, true);
+ if (!ASSERT_OK_PTR(netns, "netns_new"))
+ goto link_destroy;
+
+ err = pm_init("bpf_netlink");
+ if (!ASSERT_OK(err, "pm_init: bpf_netlink pm"))
+ goto close_netns;
+
+ run_netlink_pm(skel->kconfig->CONFIG_MPTCP_IPV6 ? IPV6 : IPV4);
+
+close_netns:
+ netns_free(netns);
+link_destroy:
+ bpf_link__destroy(link);
+skel_destroy:
+ mptcp_bpf_netlink_pm__destroy(skel);
+}
+
static int sched_init(char *flags, char *sched)
{
if (endpoint_init(flags, 2) < 0)
@@ -992,6 +1038,8 @@ void test_mptcp(void)
test_iters_subflow();
if (test__start_subtest("netlink_pm"))
test_netlink_pm();
+ if (test__start_subtest("bpf_netlink_pm"))
+ test_bpf_netlink_pm();
if (test__start_subtest("default"))
test_default();
if (test__start_subtest("first"))
diff --git a/tools/testing/selftests/bpf/progs/mptcp_bpf.h b/tools/testing/selftests/bpf/progs/mptcp_bpf.h
index 4e901941d5dd..0d5cf8426bc5 100644
--- a/tools/testing/selftests/bpf/progs/mptcp_bpf.h
+++ b/tools/testing/selftests/bpf/progs/mptcp_bpf.h
@@ -4,6 +4,9 @@
#include "bpf_experimental.h"
+#define READ_ONCE(x) (*(const volatile typeof(x) *)&(x))
+#define WRITE_ONCE(x, val) ((*(volatile typeof(x) *) &(x)) = (val))
+
/* list helpers from include/linux/list.h */
static inline int list_is_head(const struct list_head *list,
const struct list_head *head)
@@ -33,6 +36,24 @@ static inline int list_is_head(const struct list_head *list,
#define mptcp_for_each_subflow(__msk, __subflow) \
list_for_each_entry(__subflow, &((__msk)->conn_list), node)
+/* errno macros from include/uapi/asm-generic/errno-base.h */
+#define ESRCH 3 /* No such process */
+#define ENOMEM 12 /* Out of Memory */
+#define EINVAL 22 /* Invalid argument */
+
+/* GFP macros from include/linux/gfp_types.h */
+#define __AC(X,Y) (X##Y)
+#define _AC(X,Y) __AC(X,Y)
+#define _UL(x) (_AC(x, UL))
+#define UL(x) (_UL(x))
+#define BIT(nr) (UL(1) << (nr))
+
+#define ___GFP_HIGH BIT(___GFP_HIGH_BIT)
+#define __GFP_HIGH ((gfp_t)___GFP_HIGH)
+#define ___GFP_KSWAPD_RECLAIM BIT(___GFP_KSWAPD_RECLAIM_BIT)
+#define __GFP_KSWAPD_RECLAIM ((gfp_t)___GFP_KSWAPD_RECLAIM) /* kswapd can wake */
+#define GFP_ATOMIC (__GFP_HIGH|__GFP_KSWAPD_RECLAIM)
+
static __always_inline struct sock *
mptcp_subflow_tcp_sock(const struct mptcp_subflow_context *subflow)
{
@@ -40,6 +61,12 @@ mptcp_subflow_tcp_sock(const struct mptcp_subflow_context *subflow)
}
/* ksym */
+void bpf_rcu_read_lock(void) __ksym;
+void bpf_rcu_read_unlock(void) __ksym;
+
+extern void bpf_spin_lock_bh(spinlock_t *lock) __ksym;
+extern void bpf_spin_unlock_bh(spinlock_t *lock) __ksym;
+
extern struct mptcp_subflow_context *
bpf_mptcp_subflow_ctx(const struct sock *sk) __ksym;
extern struct sock *
diff --git a/tools/testing/selftests/bpf/progs/mptcp_bpf_netlink_pm.c b/tools/testing/selftests/bpf/progs/mptcp_bpf_netlink_pm.c
new file mode 100644
index 000000000000..9a9a396bdf94
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/mptcp_bpf_netlink_pm.c
@@ -0,0 +1,204 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2025, Kylin Software */
+
+#include "mptcp_bpf.h"
+#include "mptcp_bpf_pm.h"
+
+char _license[] SEC("license") = "GPL";
+
+extern bool CONFIG_MPTCP_IPV6 __kconfig __weak;
+
+extern unsigned int
+mptcp_pm_get_add_addr_signal_max(const struct mptcp_sock *msk) __ksym;
+extern unsigned int
+mptcp_pm_get_add_addr_accept_max(const struct mptcp_sock *msk) __ksym;
+extern unsigned int
+mptcp_pm_get_subflows_max(const struct mptcp_sock *msk) __ksym;
+extern unsigned int
+mptcp_pm_get_local_addr_max(const struct mptcp_sock *msk) __ksym;
+extern void bpf_bitmap_fill(unsigned long *dst__ign, unsigned int nbits) __ksym;
+
+extern bool mptcp_pm_is_init_remote_addr(struct mptcp_sock *msk,
+ const struct mptcp_addr_info *remote) __ksym;
+extern bool mptcp_pm_add_addr_recv(struct mptcp_sock *msk) __ksym;
+extern void mptcp_pm_create_subflow_or_signal_addr(struct mptcp_sock *msk) __ksym;
+extern void mptcp_pm_rm_addr_recv(struct mptcp_sock *msk) __ksym;
+extern int mptcp_pm_nl_append_new_local_addr_msk(struct mptcp_sock *msk,
+ struct mptcp_pm_addr_entry *entry,
+ bool needs_id, bool replace) __ksym;
+extern struct mptcp_pm_addr_entry *
+mptcp_pm_nl_lookup_addr(struct mptcp_sock *msk,
+ const struct mptcp_addr_info *info) __ksym;
+
+extern struct mptcp_pm_addr_entry *
+bpf_kmemdup_entry(struct mptcp_pm_addr_entry *entry,
+ int size, gfp_t priority) __ksym;
+extern void
+bpf_kfree_entry(struct mptcp_pm_addr_entry *entry) __ksym;
+
+static void mptcp_pm_copy_addr(struct mptcp_addr_info *dst,
+ const struct mptcp_addr_info *src)
+{
+ dst->id = src->id;
+ dst->family = src->family;
+ dst->port = src->port;
+
+ if (src->family == AF_INET) {
+ dst->addr.s_addr = src->addr.s_addr;
+ } else if (src->family == AF_INET6) {
+ dst->addr6.s6_addr32[0] = src->addr6.s6_addr32[0];
+ dst->addr6.s6_addr32[1] = src->addr6.s6_addr32[1];
+ dst->addr6.s6_addr32[2] = src->addr6.s6_addr32[2];
+ dst->addr6.s6_addr32[3] = src->addr6.s6_addr32[3];
+ }
+}
+
+SEC("struct_ops")
+int BPF_PROG(mptcp_pm_netlink_get_local_id, struct mptcp_sock *msk,
+ struct mptcp_pm_addr_entry *skc)
+{
+ struct mptcp_pm_addr_entry *entry;
+ int ret;
+
+ bpf_rcu_read_lock();
+ entry = mptcp_pm_nl_lookup_addr(msk, &skc->addr);
+ ret = entry ? entry->addr.id : -1;
+ bpf_rcu_read_unlock();
+ if (ret >= 0)
+ return ret;
+
+ entry = bpf_kmemdup_entry(skc, sizeof(*skc), GFP_ATOMIC);
+ if (!entry)
+ return -ENOMEM;
+
+ entry->addr.port = 0;
+ ret = mptcp_pm_nl_append_new_local_addr_msk(msk, entry, true, false);
+ if (ret < 0)
+ bpf_kfree_entry(entry);
+
+ return 0;
+}
+
+SEC("struct_ops")
+bool BPF_PROG(mptcp_pm_netlink_get_priority, struct mptcp_sock *msk,
+ struct mptcp_addr_info *skc)
+{
+ struct mptcp_pm_addr_entry *entry;
+ bool backup;
+
+ bpf_rcu_read_lock();
+ entry = mptcp_pm_nl_lookup_addr(msk, skc);
+ backup = entry && !!(entry->flags & MPTCP_PM_ADDR_FLAG_BACKUP);
+ bpf_rcu_read_unlock();
+
+ return backup;
+}
+
+SEC("struct_ops")
+void BPF_PROG(mptcp_pm_netlink_established, struct mptcp_sock *msk)
+{
+ bpf_spin_lock_bh(&msk->pm.lock);
+ mptcp_pm_create_subflow_or_signal_addr(msk);
+ bpf_spin_unlock_bh(&msk->pm.lock);
+}
+
+SEC("struct_ops")
+void BPF_PROG(mptcp_pm_netlink_subflow_established, struct mptcp_sock *msk)
+{
+ bpf_spin_lock_bh(&msk->pm.lock);
+ mptcp_pm_create_subflow_or_signal_addr(msk);
+ bpf_spin_unlock_bh(&msk->pm.lock);
+}
+
+SEC("struct_ops")
+bool BPF_PROG(mptcp_pm_netlink_allow_new_subflow, struct mptcp_sock *msk)
+{
+ struct mptcp_pm_data *pm = &msk->pm;
+ unsigned int subflows_max;
+ int ret = 0;
+
+ subflows_max = mptcp_pm_get_subflows_max(msk);
+
+ /* try to avoid acquiring the lock below */
+ if (!READ_ONCE(pm->accept_subflow))
+ return false;
+
+ bpf_spin_lock_bh(&pm->lock);
+ if (READ_ONCE(pm->accept_subflow)) {
+ ret = pm->subflows < subflows_max;
+ if (ret && ++pm->subflows == subflows_max)
+ WRITE_ONCE(pm->accept_subflow, false);
+ }
+ bpf_spin_unlock_bh(&pm->lock);
+
+ return ret;
+}
+
+SEC("struct_ops")
+bool BPF_PROG(mptcp_pm_netlink_accept_new_subflow, const struct mptcp_sock *msk)
+{
+ return READ_ONCE(msk->pm.accept_subflow);
+}
+
+SEC("struct_ops")
+bool BPF_PROG(mptcp_pm_netlink_add_addr_echo, struct mptcp_sock *msk,
+ const struct mptcp_addr_info *addr)
+{
+ return (addr->id == 0 && !mptcp_pm_is_init_remote_addr(msk, addr)) ||
+ (addr->id > 0 && !READ_ONCE(msk->pm.accept_addr));
+}
+
+SEC("struct_ops")
+int BPF_PROG(mptcp_pm_netlink_add_addr_received, struct mptcp_sock *msk,
+ const struct mptcp_addr_info *addr)
+{
+ int ret = 0;
+
+ if (mptcp_pm_add_addr_recv(msk))
+ mptcp_pm_copy_addr(&msk->pm.remote, addr);
+ else
+ ret = -EINVAL;
+ return ret;
+}
+
+SEC("struct_ops")
+void BPF_PROG(mptcp_pm_netlink_rm_addr_received, struct mptcp_sock *msk)
+{
+ mptcp_pm_rm_addr_recv(msk);
+}
+
+SEC("struct_ops")
+void BPF_PROG(mptcp_pm_netlink_init, struct mptcp_sock *msk)
+{
+ bool subflows_allowed = !!mptcp_pm_get_subflows_max(msk);
+ struct mptcp_pm_data *pm = &msk->pm;
+
+ bpf_printk("BPF netlink PM (%s)",
+ CONFIG_MPTCP_IPV6 ? "IPv6" : "IPv4");
+
+ WRITE_ONCE(pm->work_pending,
+ (!!mptcp_pm_get_local_addr_max(msk) &&
+ subflows_allowed) ||
+ !!mptcp_pm_get_add_addr_signal_max(msk));
+ WRITE_ONCE(pm->accept_addr,
+ !!mptcp_pm_get_add_addr_accept_max(msk) &&
+ subflows_allowed);
+ WRITE_ONCE(pm->accept_subflow, subflows_allowed);
+
+ bpf_bitmap_fill(pm->id_avail_bitmap, MPTCP_PM_MAX_ADDR_ID + 1);
+}
+
+SEC(".struct_ops.link")
+struct mptcp_pm_ops bpf_netlink = {
+ .get_local_id = (void *)mptcp_pm_netlink_get_local_id,
+ .get_priority = (void *)mptcp_pm_netlink_get_priority,
+ .established = (void *)mptcp_pm_netlink_established,
+ .subflow_established = (void *)mptcp_pm_netlink_subflow_established,
+ .allow_new_subflow = (void *)mptcp_pm_netlink_allow_new_subflow,
+ .accept_new_subflow = (void *)mptcp_pm_netlink_accept_new_subflow,
+ .add_addr_echo = (void *)mptcp_pm_netlink_add_addr_echo,
+ .add_addr_received = (void *)mptcp_pm_netlink_add_addr_received,
+ .rm_addr_received = (void *)mptcp_pm_netlink_rm_addr_received,
+ .init = (void *)mptcp_pm_netlink_init,
+ .name = "bpf_netlink",
+};
diff --git a/tools/testing/selftests/bpf/progs/mptcp_bpf_pm.h b/tools/testing/selftests/bpf/progs/mptcp_bpf_pm.h
new file mode 100644
index 000000000000..0ba21c743a13
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/mptcp_bpf_pm.h
@@ -0,0 +1,52 @@
+/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */
+
+#ifndef __MPTCP_BPF_PM_H__
+#define __MPTCP_BPF_PM_H__
+
+#include "bpf_tracing_net.h"
+
+/* mptcp helpers from include/net/mptcp.h */
+#define U8_MAX ((u8)~0U)
+
+/* max value of mptcp_addr_info.id */
+#define MPTCP_PM_MAX_ADDR_ID U8_MAX
+
+/* mptcp macros from include/uapi/linux/mptcp.h */
+#define MPTCP_PM_ADDR_FLAG_SIGNAL (1 << 0)
+#define MPTCP_PM_ADDR_FLAG_SUBFLOW (1 << 1)
+#define MPTCP_PM_ADDR_FLAG_BACKUP (1 << 2)
+#define MPTCP_PM_ADDR_FLAG_FULLMESH (1 << 3)
+#define MPTCP_PM_ADDR_FLAG_IMPLICIT (1 << 4)
+
+extern void bpf_set_bit(unsigned long nr, unsigned long *addr) __ksym;
+
+extern int mptcp_pm_remove_addr(struct mptcp_sock *msk,
+ const struct mptcp_rm_list *rm_list) __ksym;
+
+#define ipv6_addr_equal(a, b) ((a).s6_addr32[0] == (b).s6_addr32[0] && \
+ (a).s6_addr32[1] == (b).s6_addr32[1] && \
+ (a).s6_addr32[2] == (b).s6_addr32[2] && \
+ (a).s6_addr32[3] == (b).s6_addr32[3])
+
+static __always_inline bool
+mptcp_addresses_equal(const struct mptcp_addr_info *a,
+ const struct mptcp_addr_info *b, bool use_port)
+{
+ bool addr_equals = false;
+
+ if (a->family == b->family) {
+ if (a->family == AF_INET)
+ addr_equals = a->addr.s_addr == b->addr.s_addr;
+ else
+ addr_equals = ipv6_addr_equal(a->addr6, b->addr6);
+ }
+
+ if (!addr_equals)
+ return false;
+ if (!use_port)
+ return true;
+
+ return a->port == b->port;
+}
+
+#endif
--
2.43.0
^ permalink raw reply related [flat|nested] 15+ messages in thread* Re: [PATCH mptcp-next v1 0/4] BPF path manager, part 7
2025-03-21 1:49 [PATCH mptcp-next v1 0/4] BPF path manager, part 7 Geliang Tang
` (3 preceding siblings ...)
2025-03-21 1:49 ` [PATCH mptcp-next v1 4/4] selftests/bpf: Add mptcp bpf_netlink " Geliang Tang
@ 2025-03-21 2:20 ` MPTCP CI
2025-03-21 2:59 ` MPTCP CI
5 siblings, 0 replies; 15+ messages in thread
From: MPTCP CI @ 2025-03-21 2:20 UTC (permalink / raw)
To: Geliang Tang; +Cc: mptcp
Hi Geliang,
Thank you for your modifications, that's great!
But sadly, our CI spotted some issues with it when trying to build it.
You can find more details there:
https://github.com/multipath-tcp/mptcp_net-next/actions/runs/13983028068
Status: failure
Initiator: Patchew Applier
Commits: https://github.com/multipath-tcp/mptcp_net-next/commits/0d2604c2d943
Patchwork: https://patchwork.kernel.org/project/mptcp/list/?series=946095
Feel free to reply to this email if you cannot access logs, if you need
some support to fix the error, if this doesn't seem to be caused by your
modifications or if the error is a false positive one.
Cheers,
MPTCP GH Action bot
Bot operated by Matthieu Baerts (NGI0 Core)
^ permalink raw reply [flat|nested] 15+ messages in thread* Re: [PATCH mptcp-next v1 0/4] BPF path manager, part 7
2025-03-21 1:49 [PATCH mptcp-next v1 0/4] BPF path manager, part 7 Geliang Tang
` (4 preceding siblings ...)
2025-03-21 2:20 ` [PATCH mptcp-next v1 0/4] BPF path manager, part 7 MPTCP CI
@ 2025-03-21 2:59 ` MPTCP CI
5 siblings, 0 replies; 15+ messages in thread
From: MPTCP CI @ 2025-03-21 2:59 UTC (permalink / raw)
To: Geliang Tang; +Cc: mptcp
Hi Geliang,
Thank you for your modifications, that's great!
Our CI did some validations and here is its report:
- KVM Validation: normal: Success! ✅
- KVM Validation: debug: Success! ✅
- KVM Validation: btf-normal (only bpftest_all): Success! ✅
- KVM Validation: btf-debug (only bpftest_all): Success! ✅
- Task: https://github.com/multipath-tcp/mptcp_net-next/actions/runs/13983028079
Initiator: Patchew Applier
Commits: https://github.com/multipath-tcp/mptcp_net-next/commits/0d2604c2d943
Patchwork: https://patchwork.kernel.org/project/mptcp/list/?series=946095
If there are some issues, you can reproduce them using the same environment as
the one used by the CI thanks to a docker image, e.g.:
$ cd [kernel source code]
$ docker run -v "${PWD}:${PWD}:rw" -w "${PWD}" --privileged --rm -it \
--pull always mptcp/mptcp-upstream-virtme-docker:latest \
auto-normal
For more details:
https://github.com/multipath-tcp/mptcp-upstream-virtme-docker
Please note that despite all the efforts that have been already done to have a
stable tests suite when executed on a public CI like here, it is possible some
reported issues are not due to your modifications. Still, do not hesitate to
help us improve that ;-)
Cheers,
MPTCP GH Action bot
Bot operated by Matthieu Baerts (NGI0 Core)
^ permalink raw reply [flat|nested] 15+ messages in thread