* [PATCH mptcp-next v8 1/3] mptcp: add bpf get_subflows helper
2022-04-22 7:47 [PATCH mptcp-next v8 0/3] BPF round-robin scheduler Geliang Tang
@ 2022-04-22 7:47 ` Geliang Tang
2022-04-26 0:42 ` Mat Martineau
2022-04-22 7:47 ` [PATCH mptcp-next v8 2/3] selftests: bpf: add bpf_rr scheduler Geliang Tang
2022-04-22 7:47 ` [PATCH mptcp-next v8 3/3] selftests: bpf: add bpf_rr test Geliang Tang
2 siblings, 1 reply; 5+ messages in thread
From: Geliang Tang @ 2022-04-22 7:47 UTC (permalink / raw)
To: mptcp; +Cc: Geliang Tang
This patch implements a new helper bpf_mptcp_get_subflows() to get all the
subflows of the given mptcp_sock, it returns the number of suflows. Add
a new member subflows in struct mptcp_sock as a pointers array of all the
subflows.
Register this helper in bpf_mptcp_sched_kfunc_init() to make sure it can
be accessed from the BPF context.
Signed-off-by: Geliang Tang <geliang.tang@suse.com>
---
net/mptcp/bpf.c | 47 ++++++++++++++++++++++++++++++++++++++++++++
net/mptcp/protocol.h | 7 +++++++
2 files changed, 54 insertions(+)
diff --git a/net/mptcp/bpf.c b/net/mptcp/bpf.c
index 6c01f6b959a3..3367541b353c 100644
--- a/net/mptcp/bpf.c
+++ b/net/mptcp/bpf.c
@@ -160,6 +160,23 @@ struct bpf_struct_ops bpf_mptcp_sched_ops = {
.name = "mptcp_sched_ops",
};
+BTF_SET_START(bpf_mptcp_sched_kfunc_ids)
+BTF_ID(func, bpf_mptcp_get_subflows_array)
+BTF_ID(func, bpf_mptcp_put_subflows_array)
+BTF_SET_END(bpf_mptcp_sched_kfunc_ids)
+
+static const struct btf_kfunc_id_set bpf_mptcp_sched_kfunc_set = {
+ .owner = THIS_MODULE,
+ .check_set = &bpf_mptcp_sched_kfunc_ids,
+};
+
+static int __init bpf_mptcp_sched_kfunc_init(void)
+{
+ return register_btf_kfunc_id_set(BPF_PROG_TYPE_STRUCT_OPS,
+ &bpf_mptcp_sched_kfunc_set);
+}
+late_initcall(bpf_mptcp_sched_kfunc_init);
+
struct mptcp_sock *bpf_mptcp_sock_from_subflow(struct sock *sk)
{
if (sk && sk_fullsock(sk) && sk->sk_protocol == IPPROTO_TCP && sk_is_mptcp(sk))
@@ -168,3 +185,33 @@ struct mptcp_sock *bpf_mptcp_sock_from_subflow(struct sock *sk)
return NULL;
}
EXPORT_SYMBOL(bpf_mptcp_sock_from_subflow);
+
+struct mptcp_subflows_array *bpf_mptcp_get_subflows_array(struct mptcp_sock *msk)
+{
+ struct mptcp_subflow_context *subflow;
+ struct mptcp_subflows_array *array;
+
+ array = kzalloc(sizeof(*array), GFP_KERNEL);
+ if (!array)
+ return array;
+
+ mptcp_for_each_subflow(msk, subflow)
+ array->subflows[array->nr++] = subflow;
+
+ return array;
+}
+EXPORT_SYMBOL(bpf_mptcp_get_subflows_array);
+
+void bpf_mptcp_put_subflows_array(struct mptcp_subflows_array *array)
+{
+ int i;
+
+ if (!array)
+ return;
+
+ for (i = 0; i < MPTCP_SUBFLOWS_MAX; i++)
+ array->subflows[i] = NULL;
+ array->nr = 0;
+ kfree(array);
+}
+EXPORT_SYMBOL(bpf_mptcp_put_subflows_array);
diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h
index 006914cb78de..c42fb54298ef 100644
--- a/net/mptcp/protocol.h
+++ b/net/mptcp/protocol.h
@@ -500,6 +500,13 @@ struct mptcp_subflow_context {
struct rcu_head rcu;
};
+#define MPTCP_SUBFLOWS_MAX 8
+
+struct mptcp_subflows_array {
+ u8 nr;
+ struct mptcp_subflow_context *subflows[MPTCP_SUBFLOWS_MAX];
+};
+
static inline struct mptcp_subflow_context *
mptcp_subflow_ctx(const struct sock *sk)
{
--
2.34.1
^ permalink raw reply related [flat|nested] 5+ messages in thread* Re: [PATCH mptcp-next v8 1/3] mptcp: add bpf get_subflows helper
2022-04-22 7:47 ` [PATCH mptcp-next v8 1/3] mptcp: add bpf get_subflows helper Geliang Tang
@ 2022-04-26 0:42 ` Mat Martineau
0 siblings, 0 replies; 5+ messages in thread
From: Mat Martineau @ 2022-04-26 0:42 UTC (permalink / raw)
To: Geliang Tang; +Cc: mptcp
On Fri, 22 Apr 2022, Geliang Tang wrote:
> This patch implements a new helper bpf_mptcp_get_subflows() to get all the
> subflows of the given mptcp_sock, it returns the number of suflows. Add
> a new member subflows in struct mptcp_sock as a pointers array of all the
> subflows.
>
> Register this helper in bpf_mptcp_sched_kfunc_init() to make sure it can
> be accessed from the BPF context.
>
> Signed-off-by: Geliang Tang <geliang.tang@suse.com>
> ---
> net/mptcp/bpf.c | 47 ++++++++++++++++++++++++++++++++++++++++++++
> net/mptcp/protocol.h | 7 +++++++
> 2 files changed, 54 insertions(+)
>
> diff --git a/net/mptcp/bpf.c b/net/mptcp/bpf.c
> index 6c01f6b959a3..3367541b353c 100644
> --- a/net/mptcp/bpf.c
> +++ b/net/mptcp/bpf.c
> @@ -160,6 +160,23 @@ struct bpf_struct_ops bpf_mptcp_sched_ops = {
> .name = "mptcp_sched_ops",
> };
>
> +BTF_SET_START(bpf_mptcp_sched_kfunc_ids)
> +BTF_ID(func, bpf_mptcp_get_subflows_array)
> +BTF_ID(func, bpf_mptcp_put_subflows_array)
> +BTF_SET_END(bpf_mptcp_sched_kfunc_ids)
> +
> +static const struct btf_kfunc_id_set bpf_mptcp_sched_kfunc_set = {
> + .owner = THIS_MODULE,
> + .check_set = &bpf_mptcp_sched_kfunc_ids,
> +};
> +
> +static int __init bpf_mptcp_sched_kfunc_init(void)
> +{
> + return register_btf_kfunc_id_set(BPF_PROG_TYPE_STRUCT_OPS,
> + &bpf_mptcp_sched_kfunc_set);
> +}
> +late_initcall(bpf_mptcp_sched_kfunc_init);
> +
> struct mptcp_sock *bpf_mptcp_sock_from_subflow(struct sock *sk)
> {
> if (sk && sk_fullsock(sk) && sk->sk_protocol == IPPROTO_TCP && sk_is_mptcp(sk))
> @@ -168,3 +185,33 @@ struct mptcp_sock *bpf_mptcp_sock_from_subflow(struct sock *sk)
> return NULL;
> }
> EXPORT_SYMBOL(bpf_mptcp_sock_from_subflow);
> +
> +struct mptcp_subflows_array *bpf_mptcp_get_subflows_array(struct mptcp_sock *msk)
> +{
> + struct mptcp_subflow_context *subflow;
> + struct mptcp_subflows_array *array;
> +
> + array = kzalloc(sizeof(*array), GFP_KERNEL);
> + if (!array)
> + return array;
> +
> + mptcp_for_each_subflow(msk, subflow)
> + array->subflows[array->nr++] = subflow;
> +
> + return array;
> +}
> +EXPORT_SYMBOL(bpf_mptcp_get_subflows_array);
> +
> +void bpf_mptcp_put_subflows_array(struct mptcp_subflows_array *array)
> +{
> + int i;
> +
> + if (!array)
> + return;
> +
> + for (i = 0; i < MPTCP_SUBFLOWS_MAX; i++)
> + array->subflows[i] = NULL;
> + array->nr = 0;
> + kfree(array);
We can't trust the caller to always call this function, since we don't
want to allow userspace to leak this array memory either accidentally or
intentionally.
Can BPF code call a helper with a pointer to a struct on the BPF stack?
> +}
> +EXPORT_SYMBOL(bpf_mptcp_put_subflows_array);
> diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h
> index 006914cb78de..c42fb54298ef 100644
> --- a/net/mptcp/protocol.h
> +++ b/net/mptcp/protocol.h
> @@ -500,6 +500,13 @@ struct mptcp_subflow_context {
> struct rcu_head rcu;
> };
>
> +#define MPTCP_SUBFLOWS_MAX 8
I noticed that this value is separately defined here and in the next
commit (tools/testing/selftests/bpf/bpf_mptcp_helpers.h). That seems
fragile to me, there's no way for bpf_mptcp_sched_btf_struct_access() to
enforce access limits correctly.
Do you have some other ideas for how to share this data between kernel and
BPF contexts? Maybe mptcp_sched_data could be expanded to pass the subflow
list/array in to the scheduler function.
> +
> +struct mptcp_subflows_array {
> + u8 nr;
> + struct mptcp_subflow_context *subflows[MPTCP_SUBFLOWS_MAX];
> +};
> +
> static inline struct mptcp_subflow_context *
> mptcp_subflow_ctx(const struct sock *sk)
> {
> --
> 2.34.1
>
>
>
--
Mat Martineau
Intel
^ permalink raw reply [flat|nested] 5+ messages in thread
* [PATCH mptcp-next v8 2/3] selftests: bpf: add bpf_rr scheduler
2022-04-22 7:47 [PATCH mptcp-next v8 0/3] BPF round-robin scheduler Geliang Tang
2022-04-22 7:47 ` [PATCH mptcp-next v8 1/3] mptcp: add bpf get_subflows helper Geliang Tang
@ 2022-04-22 7:47 ` Geliang Tang
2022-04-22 7:47 ` [PATCH mptcp-next v8 3/3] selftests: bpf: add bpf_rr test Geliang Tang
2 siblings, 0 replies; 5+ messages in thread
From: Geliang Tang @ 2022-04-22 7:47 UTC (permalink / raw)
To: mptcp; +Cc: Geliang Tang
This patch implements the round-robin BPF MPTCP scheduler, named bpf_rr,
which always picks the next available subflow to send data. If no such
next subflow available, picks the first one.
Signed-off-by: Geliang Tang <geliang.tang@suse.com>
---
.../testing/selftests/bpf/bpf_mptcp_helpers.h | 13 +++++
.../selftests/bpf/progs/mptcp_bpf_rr.c | 55 +++++++++++++++++++
2 files changed, 68 insertions(+)
create mode 100644 tools/testing/selftests/bpf/progs/mptcp_bpf_rr.c
diff --git a/tools/testing/selftests/bpf/bpf_mptcp_helpers.h b/tools/testing/selftests/bpf/bpf_mptcp_helpers.h
index a0b83fbe8133..f0e120dfe48d 100644
--- a/tools/testing/selftests/bpf/bpf_mptcp_helpers.h
+++ b/tools/testing/selftests/bpf/bpf_mptcp_helpers.h
@@ -27,10 +27,23 @@ struct mptcp_sched_ops {
struct mptcp_sock {
struct inet_connection_sock sk;
+ struct sock *last_snd;
__u32 token;
struct sock *first;
struct mptcp_sched_ops *sched;
char ca_name[TCP_CA_NAME_MAX];
} __attribute__((preserve_access_index));
+struct mptcp_subflow_context {
+ __u32 token;
+ struct sock *tcp_sock; /* tcp sk backpointer */
+} __attribute__((preserve_access_index));
+
+#define MPTCP_SUBFLOWS_MAX 8
+
+struct mptcp_subflows_array {
+ __u8 nr;
+ struct mptcp_subflow_context *subflows[MPTCP_SUBFLOWS_MAX];
+};
+
#endif
diff --git a/tools/testing/selftests/bpf/progs/mptcp_bpf_rr.c b/tools/testing/selftests/bpf/progs/mptcp_bpf_rr.c
new file mode 100644
index 000000000000..e68e3753fb4a
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/mptcp_bpf_rr.c
@@ -0,0 +1,55 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2022, SUSE. */
+
+#include <linux/bpf.h>
+#include <linux/stddef.h>
+#include <linux/tcp.h>
+#include "bpf_mptcp_helpers.h"
+
+char _license[] SEC("license") = "GPL";
+
+extern struct mptcp_subflows_array *
+bpf_mptcp_get_subflows_array(struct mptcp_sock *msk) __ksym;
+extern void bpf_mptcp_put_subflows_array(struct mptcp_subflows_array *array) __ksym;
+
+SEC("struct_ops/mptcp_sched_rr_init")
+void BPF_PROG(mptcp_sched_rr_init, struct mptcp_sock *msk)
+{
+}
+
+SEC("struct_ops/mptcp_sched_rr_release")
+void BPF_PROG(mptcp_sched_rr_release, struct mptcp_sock *msk)
+{
+}
+
+void BPF_STRUCT_OPS(bpf_rr_get_subflow, struct mptcp_sock *msk,
+ bool reinject, struct mptcp_sched_data *data)
+{
+ struct mptcp_subflow_context *subflow;
+ struct mptcp_subflows_array *array;
+ struct sock *ssk = msk->first;
+
+ array = bpf_mptcp_get_subflows_array(msk);
+ for (int i = 0; i < MPTCP_SUBFLOWS_MAX; i++) {
+ if (i >= array->nr)
+ break;
+
+ subflow = array->subflows[i];
+ if (subflow->tcp_sock != msk->last_snd) {
+ ssk = subflow->tcp_sock;
+ break;
+ }
+ }
+ bpf_mptcp_put_subflows_array(array);
+
+ data->sock = ssk;
+ data->call_again = 0;
+}
+
+SEC(".struct_ops")
+struct mptcp_sched_ops rr = {
+ .init = (void *)mptcp_sched_rr_init,
+ .release = (void *)mptcp_sched_rr_release,
+ .get_subflow = (void *)bpf_rr_get_subflow,
+ .name = "bpf_rr",
+};
--
2.34.1
^ permalink raw reply related [flat|nested] 5+ messages in thread* [PATCH mptcp-next v8 3/3] selftests: bpf: add bpf_rr test
2022-04-22 7:47 [PATCH mptcp-next v8 0/3] BPF round-robin scheduler Geliang Tang
2022-04-22 7:47 ` [PATCH mptcp-next v8 1/3] mptcp: add bpf get_subflows helper Geliang Tang
2022-04-22 7:47 ` [PATCH mptcp-next v8 2/3] selftests: bpf: add bpf_rr scheduler Geliang Tang
@ 2022-04-22 7:47 ` Geliang Tang
2 siblings, 0 replies; 5+ messages in thread
From: Geliang Tang @ 2022-04-22 7:47 UTC (permalink / raw)
To: mptcp; +Cc: Geliang Tang
This patch adds the round-robin BPF MPTCP scheduler test. Use sysctl to
set net.mptcp.scheduler to use this sched. Add a veth net device to
simulate the multiple addresses case. Use 'ip mptcp endpoint' command to
add this new endpoint to PM netlink.
Signed-off-by: Geliang Tang <geliang.tang@suse.com>
---
.../testing/selftests/bpf/prog_tests/mptcp.c | 37 +++++++++++++++++++
1 file changed, 37 insertions(+)
diff --git a/tools/testing/selftests/bpf/prog_tests/mptcp.c b/tools/testing/selftests/bpf/prog_tests/mptcp.c
index 377ebc0fbcbe..7052c520c0f7 100644
--- a/tools/testing/selftests/bpf/prog_tests/mptcp.c
+++ b/tools/testing/selftests/bpf/prog_tests/mptcp.c
@@ -6,6 +6,7 @@
#include "cgroup_helpers.h"
#include "network_helpers.h"
#include "mptcp_bpf_first.skel.h"
+#include "mptcp_bpf_rr.skel.h"
#ifndef TCP_CA_NAME_MAX
#define TCP_CA_NAME_MAX 16
@@ -362,10 +363,46 @@ static void test_first(void)
mptcp_bpf_first__destroy(first_skel);
}
+static void test_rr(void)
+{
+ struct mptcp_bpf_rr *rr_skel;
+ int server_fd, client_fd;
+ struct bpf_link *link;
+
+ rr_skel = mptcp_bpf_rr__open_and_load();
+ if (CHECK(!rr_skel, "bpf_rr__open_and_load", "failed\n"))
+ return;
+
+ link = bpf_map__attach_struct_ops(rr_skel->maps.rr);
+ if (!ASSERT_OK_PTR(link, "bpf_map__attach_struct_ops")) {
+ mptcp_bpf_rr__destroy(rr_skel);
+ return;
+ }
+
+ system("ip link add veth1 type veth; \
+ ip addr add 10.0.1.1/24 dev veth1; \
+ ip link set veth1 up");
+ system("ip mptcp endpoint add 10.0.1.1 subflow");
+ system("sysctl -q net.mptcp.scheduler=bpf_rr");
+ server_fd = start_mptcp_server(AF_INET, NULL, 0, 0);
+ client_fd = connect_to_mptcp_fd(server_fd, 0);
+
+ send_data(server_fd, client_fd);
+
+ close(client_fd);
+ close(server_fd);
+ system("ip mptcp endpoint flush");
+ system("ip link del veth1");
+ bpf_link__destroy(link);
+ mptcp_bpf_rr__destroy(rr_skel);
+}
+
void test_mptcp(void)
{
if (test__start_subtest("base"))
test_base();
if (test__start_subtest("first"))
test_first();
+ if (test__start_subtest("rr"))
+ test_rr();
}
--
2.34.1
^ permalink raw reply related [flat|nested] 5+ messages in thread