* [PATCH mptcp-next v9 1/3] mptcp: add subflows array in mptcp_sched_data
2022-04-29 14:51 [PATCH mptcp-next v9 0/3] BPF round-robin scheduler Geliang Tang
@ 2022-04-29 14:51 ` Geliang Tang
2022-04-30 0:30 ` Mat Martineau
2022-04-29 14:51 ` [PATCH mptcp-next v9 2/3] selftests: bpf: add bpf_rr scheduler Geliang Tang
2022-04-29 14:51 ` [PATCH mptcp-next v9 3/3] selftests: bpf: add bpf_rr test Geliang Tang
2 siblings, 1 reply; 6+ messages in thread
From: Geliang Tang @ 2022-04-29 14:51 UTC (permalink / raw)
To: mptcp; +Cc: Geliang Tang
This patch adds a subflow pointers array in struct mptcp_sched_data. Set
the array before invoking get_subflow(), then get it in get_subflow() in
the BPF contexts.
Signed-off-by: Geliang Tang <geliang.tang@suse.com>
---
include/net/mptcp.h | 3 +++
net/mptcp/protocol.h | 16 ++++++++++++++++
tools/testing/selftests/bpf/bpf_mptcp_helpers.h | 3 +++
3 files changed, 22 insertions(+)
diff --git a/include/net/mptcp.h b/include/net/mptcp.h
index bea7608d72d3..1a48e31f3ac7 100644
--- a/include/net/mptcp.h
+++ b/include/net/mptcp.h
@@ -96,10 +96,13 @@ struct mptcp_out_options {
};
#define MPTCP_SCHED_NAME_MAX 16
+#define MPTCP_SUBFLOWS_MAX 8
struct mptcp_sched_data {
struct sock *sock;
bool call_again;
+ u8 subflows;
+ struct mptcp_subflow_context *array[MPTCP_SUBFLOWS_MAX];
};
struct mptcp_sched_ops {
diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h
index 0da2a91ad197..95a65582f1e8 100644
--- a/net/mptcp/protocol.h
+++ b/net/mptcp/protocol.h
@@ -993,7 +993,9 @@ struct sock *mptcp_subflow_get_retrans(struct mptcp_sock *msk);
static inline struct sock *mptcp_sched_get_send(struct mptcp_sock *msk)
{
+ struct mptcp_subflow_context *subflow;
struct mptcp_sched_data data;
+ int i = 0;
sock_owned_by_me((struct sock *)msk);
@@ -1007,14 +1009,22 @@ static inline struct sock *mptcp_sched_get_send(struct mptcp_sock *msk)
if (!msk->sched)
return mptcp_subflow_get_send(msk);
+ mptcp_for_each_subflow(msk, subflow)
+ data.array[i++] = subflow;
+ data.subflows = i;
msk->sched->get_subflow(msk, false, &data);
+ for (i = 0; i < MPTCP_SUBFLOWS_MAX; i++)
+ data.array[i++] = NULL;
+ data.subflows = 0;
return data.sock;
}
static inline struct sock *mptcp_sched_get_retrans(struct mptcp_sock *msk)
{
+ struct mptcp_subflow_context *subflow;
struct mptcp_sched_data data;
+ int i = 0;
sock_owned_by_me((const struct sock *)msk);
@@ -1025,7 +1035,13 @@ static inline struct sock *mptcp_sched_get_retrans(struct mptcp_sock *msk)
if (!msk->sched)
return mptcp_subflow_get_retrans(msk);
+ mptcp_for_each_subflow(msk, subflow)
+ data.array[i++] = subflow;
+ data.subflows = i;
msk->sched->get_subflow(msk, true, &data);
+ for (i = 0; i < MPTCP_SUBFLOWS_MAX; i++)
+ data.array[i++] = NULL;
+ data.subflows = 0;
return data.sock;
}
diff --git a/tools/testing/selftests/bpf/bpf_mptcp_helpers.h b/tools/testing/selftests/bpf/bpf_mptcp_helpers.h
index ee07caf978b3..1fe3d0a97429 100644
--- a/tools/testing/selftests/bpf/bpf_mptcp_helpers.h
+++ b/tools/testing/selftests/bpf/bpf_mptcp_helpers.h
@@ -7,10 +7,13 @@
#include "bpf_tcp_helpers.h"
#define MPTCP_SCHED_NAME_MAX 16
+#define MPTCP_SUBFLOWS_MAX 8
struct mptcp_sched_data {
struct sock *sock;
bool call_again;
+ __u8 subflows;
+ struct mptcp_subflow_context *array[MPTCP_SUBFLOWS_MAX];
};
struct mptcp_sched_ops {
--
2.34.1
^ permalink raw reply related [flat|nested] 6+ messages in thread* Re: [PATCH mptcp-next v9 1/3] mptcp: add subflows array in mptcp_sched_data
2022-04-29 14:51 ` [PATCH mptcp-next v9 1/3] mptcp: add subflows array in mptcp_sched_data Geliang Tang
@ 2022-04-30 0:30 ` Mat Martineau
0 siblings, 0 replies; 6+ messages in thread
From: Mat Martineau @ 2022-04-30 0:30 UTC (permalink / raw)
To: Geliang Tang; +Cc: mptcp
On Fri, 29 Apr 2022, Geliang Tang wrote:
> This patch adds a subflow pointers array in struct mptcp_sched_data. Set
> the array before invoking get_subflow(), then get it in get_subflow() in
> the BPF contexts.
>
> Signed-off-by: Geliang Tang <geliang.tang@suse.com>
> ---
> include/net/mptcp.h | 3 +++
> net/mptcp/protocol.h | 16 ++++++++++++++++
> tools/testing/selftests/bpf/bpf_mptcp_helpers.h | 3 +++
> 3 files changed, 22 insertions(+)
>
> diff --git a/include/net/mptcp.h b/include/net/mptcp.h
> index bea7608d72d3..1a48e31f3ac7 100644
> --- a/include/net/mptcp.h
> +++ b/include/net/mptcp.h
> @@ -96,10 +96,13 @@ struct mptcp_out_options {
> };
>
> #define MPTCP_SCHED_NAME_MAX 16
> +#define MPTCP_SUBFLOWS_MAX 8
>
> struct mptcp_sched_data {
> struct sock *sock;
> bool call_again;
> + u8 subflows;
> + struct mptcp_subflow_context *array[MPTCP_SUBFLOWS_MAX];
> };
>
> struct mptcp_sched_ops {
> diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h
> index 0da2a91ad197..95a65582f1e8 100644
> --- a/net/mptcp/protocol.h
> +++ b/net/mptcp/protocol.h
> @@ -993,7 +993,9 @@ struct sock *mptcp_subflow_get_retrans(struct mptcp_sock *msk);
>
> static inline struct sock *mptcp_sched_get_send(struct mptcp_sock *msk)
> {
> + struct mptcp_subflow_context *subflow;
> struct mptcp_sched_data data;
> + int i = 0;
>
> sock_owned_by_me((struct sock *)msk);
>
> @@ -1007,14 +1009,22 @@ static inline struct sock *mptcp_sched_get_send(struct mptcp_sock *msk)
> if (!msk->sched)
> return mptcp_subflow_get_send(msk);
>
> + mptcp_for_each_subflow(msk, subflow)
> + data.array[i++] = subflow;
> + data.subflows = i;
As I mentioned in a previous patch, a helper function here would be good.
> msk->sched->get_subflow(msk, false, &data);
> + for (i = 0; i < MPTCP_SUBFLOWS_MAX; i++)
> + data.array[i++] = NULL;
> + data.subflows = 0;
Why zero out this data on the stack if BPF does not have write access to
it?
- Mat
>
> return data.sock;
> }
>
> static inline struct sock *mptcp_sched_get_retrans(struct mptcp_sock *msk)
> {
> + struct mptcp_subflow_context *subflow;
> struct mptcp_sched_data data;
> + int i = 0;
>
> sock_owned_by_me((const struct sock *)msk);
>
> @@ -1025,7 +1035,13 @@ static inline struct sock *mptcp_sched_get_retrans(struct mptcp_sock *msk)
> if (!msk->sched)
> return mptcp_subflow_get_retrans(msk);
>
> + mptcp_for_each_subflow(msk, subflow)
> + data.array[i++] = subflow;
> + data.subflows = i;
> msk->sched->get_subflow(msk, true, &data);
> + for (i = 0; i < MPTCP_SUBFLOWS_MAX; i++)
> + data.array[i++] = NULL;
> + data.subflows = 0;
>
> return data.sock;
> }
> diff --git a/tools/testing/selftests/bpf/bpf_mptcp_helpers.h b/tools/testing/selftests/bpf/bpf_mptcp_helpers.h
> index ee07caf978b3..1fe3d0a97429 100644
> --- a/tools/testing/selftests/bpf/bpf_mptcp_helpers.h
> +++ b/tools/testing/selftests/bpf/bpf_mptcp_helpers.h
> @@ -7,10 +7,13 @@
> #include "bpf_tcp_helpers.h"
>
> #define MPTCP_SCHED_NAME_MAX 16
> +#define MPTCP_SUBFLOWS_MAX 8
>
> struct mptcp_sched_data {
> struct sock *sock;
> bool call_again;
> + __u8 subflows;
> + struct mptcp_subflow_context *array[MPTCP_SUBFLOWS_MAX];
> };
>
> struct mptcp_sched_ops {
> --
> 2.34.1
>
>
>
--
Mat Martineau
Intel
^ permalink raw reply [flat|nested] 6+ messages in thread
* [PATCH mptcp-next v9 2/3] selftests: bpf: add bpf_rr scheduler
2022-04-29 14:51 [PATCH mptcp-next v9 0/3] BPF round-robin scheduler Geliang Tang
2022-04-29 14:51 ` [PATCH mptcp-next v9 1/3] mptcp: add subflows array in mptcp_sched_data Geliang Tang
@ 2022-04-29 14:51 ` Geliang Tang
2022-04-30 0:28 ` Mat Martineau
2022-04-29 14:51 ` [PATCH mptcp-next v9 3/3] selftests: bpf: add bpf_rr test Geliang Tang
2 siblings, 1 reply; 6+ messages in thread
From: Geliang Tang @ 2022-04-29 14:51 UTC (permalink / raw)
To: mptcp; +Cc: Geliang Tang
This patch implements the round-robin BPF MPTCP scheduler, named bpf_rr,
which always picks the next available subflow to send data. If no such
next subflow available, picks the first one.
Signed-off-by: Geliang Tang <geliang.tang@suse.com>
---
.../testing/selftests/bpf/bpf_mptcp_helpers.h | 5 ++
.../selftests/bpf/progs/mptcp_bpf_rr.c | 49 +++++++++++++++++++
2 files changed, 54 insertions(+)
create mode 100644 tools/testing/selftests/bpf/progs/mptcp_bpf_rr.c
diff --git a/tools/testing/selftests/bpf/bpf_mptcp_helpers.h b/tools/testing/selftests/bpf/bpf_mptcp_helpers.h
index 1fe3d0a97429..8ce5b1603962 100644
--- a/tools/testing/selftests/bpf/bpf_mptcp_helpers.h
+++ b/tools/testing/selftests/bpf/bpf_mptcp_helpers.h
@@ -37,4 +37,9 @@ struct mptcp_sock {
char ca_name[TCP_CA_NAME_MAX];
} __attribute__((preserve_access_index));
+struct mptcp_subflow_context {
+ __u32 token;
+ struct sock *tcp_sock; /* tcp sk backpointer */
+} __attribute__((preserve_access_index));
+
#endif
diff --git a/tools/testing/selftests/bpf/progs/mptcp_bpf_rr.c b/tools/testing/selftests/bpf/progs/mptcp_bpf_rr.c
new file mode 100644
index 000000000000..2f31de4cfc84
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/mptcp_bpf_rr.c
@@ -0,0 +1,49 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2022, SUSE. */
+
+#include <linux/bpf.h>
+#include <linux/stddef.h>
+#include <linux/tcp.h>
+#include "bpf_mptcp_helpers.h"
+
+char _license[] SEC("license") = "GPL";
+
+SEC("struct_ops/mptcp_sched_rr_init")
+void BPF_PROG(mptcp_sched_rr_init, struct mptcp_sock *msk)
+{
+}
+
+SEC("struct_ops/mptcp_sched_rr_release")
+void BPF_PROG(mptcp_sched_rr_release, struct mptcp_sock *msk)
+{
+}
+
+void BPF_STRUCT_OPS(bpf_rr_get_subflow, struct mptcp_sock *msk,
+ bool reinject, struct mptcp_sched_data *data)
+{
+ struct mptcp_subflow_context *subflow;
+ struct sock *ssk = msk->first;
+
+ for (int i = 0; i < MPTCP_SUBFLOWS_MAX; i++) {
+ if (i >= data->subflows)
+ break;
+
+ subflow = data->array[i];
+ if (subflow->tcp_sock != msk->last_snd) {
+ ssk = subflow->tcp_sock;
+ break;
+ }
+ }
+
+ msk->last_snd = ssk;
+ data->sock = ssk;
+ data->call_again = 0;
+}
+
+SEC(".struct_ops")
+struct mptcp_sched_ops rr = {
+ .init = (void *)mptcp_sched_rr_init,
+ .release = (void *)mptcp_sched_rr_release,
+ .get_subflow = (void *)bpf_rr_get_subflow,
+ .name = "bpf_rr",
+};
--
2.34.1
^ permalink raw reply related [flat|nested] 6+ messages in thread* Re: [PATCH mptcp-next v9 2/3] selftests: bpf: add bpf_rr scheduler
2022-04-29 14:51 ` [PATCH mptcp-next v9 2/3] selftests: bpf: add bpf_rr scheduler Geliang Tang
@ 2022-04-30 0:28 ` Mat Martineau
0 siblings, 0 replies; 6+ messages in thread
From: Mat Martineau @ 2022-04-30 0:28 UTC (permalink / raw)
To: Geliang Tang; +Cc: mptcp
On Fri, 29 Apr 2022, Geliang Tang wrote:
> This patch implements the round-robin BPF MPTCP scheduler, named bpf_rr,
> which always picks the next available subflow to send data. If no such
> next subflow available, picks the first one.
>
> Signed-off-by: Geliang Tang <geliang.tang@suse.com>
> ---
> .../testing/selftests/bpf/bpf_mptcp_helpers.h | 5 ++
> .../selftests/bpf/progs/mptcp_bpf_rr.c | 49 +++++++++++++++++++
> 2 files changed, 54 insertions(+)
> create mode 100644 tools/testing/selftests/bpf/progs/mptcp_bpf_rr.c
>
> diff --git a/tools/testing/selftests/bpf/bpf_mptcp_helpers.h b/tools/testing/selftests/bpf/bpf_mptcp_helpers.h
> index 1fe3d0a97429..8ce5b1603962 100644
> --- a/tools/testing/selftests/bpf/bpf_mptcp_helpers.h
> +++ b/tools/testing/selftests/bpf/bpf_mptcp_helpers.h
> @@ -37,4 +37,9 @@ struct mptcp_sock {
> char ca_name[TCP_CA_NAME_MAX];
> } __attribute__((preserve_access_index));
>
> +struct mptcp_subflow_context {
> + __u32 token;
> + struct sock *tcp_sock; /* tcp sk backpointer */
> +} __attribute__((preserve_access_index));
> +
> #endif
> diff --git a/tools/testing/selftests/bpf/progs/mptcp_bpf_rr.c b/tools/testing/selftests/bpf/progs/mptcp_bpf_rr.c
> new file mode 100644
> index 000000000000..2f31de4cfc84
> --- /dev/null
> +++ b/tools/testing/selftests/bpf/progs/mptcp_bpf_rr.c
> @@ -0,0 +1,49 @@
> +// SPDX-License-Identifier: GPL-2.0
> +/* Copyright (c) 2022, SUSE. */
> +
> +#include <linux/bpf.h>
> +#include <linux/stddef.h>
> +#include <linux/tcp.h>
> +#include "bpf_mptcp_helpers.h"
> +
> +char _license[] SEC("license") = "GPL";
> +
> +SEC("struct_ops/mptcp_sched_rr_init")
> +void BPF_PROG(mptcp_sched_rr_init, struct mptcp_sock *msk)
> +{
> +}
> +
> +SEC("struct_ops/mptcp_sched_rr_release")
> +void BPF_PROG(mptcp_sched_rr_release, struct mptcp_sock *msk)
> +{
> +}
> +
> +void BPF_STRUCT_OPS(bpf_rr_get_subflow, struct mptcp_sock *msk,
> + bool reinject, struct mptcp_sched_data *data)
> +{
> + struct mptcp_subflow_context *subflow;
> + struct sock *ssk = msk->first;
> +
> + for (int i = 0; i < MPTCP_SUBFLOWS_MAX; i++) {
> + if (i >= data->subflows)
> + break;
Is the extra 'if' statement required by the BPF verifier? If this is not
allowed:
for (int i = 0; i < MPTCP_SUBFLOWS_MAX; i++)
is this?
for (int i = 0; i < MPTCP_SUBFLOWS_MAX && i < data->subflows; i++)
> +
> + subflow = data->array[i];
> + if (subflow->tcp_sock != msk->last_snd) {
> + ssk = subflow->tcp_sock;
> + break;
> + }
> + }
> +
> + msk->last_snd = ssk;
> + data->sock = ssk;
> + data->call_again = 0;
> +}
> +
> +SEC(".struct_ops")
> +struct mptcp_sched_ops rr = {
> + .init = (void *)mptcp_sched_rr_init,
> + .release = (void *)mptcp_sched_rr_release,
> + .get_subflow = (void *)bpf_rr_get_subflow,
> + .name = "bpf_rr",
> +};
> --
> 2.34.1
>
>
>
--
Mat Martineau
Intel
^ permalink raw reply [flat|nested] 6+ messages in thread
* [PATCH mptcp-next v9 3/3] selftests: bpf: add bpf_rr test
2022-04-29 14:51 [PATCH mptcp-next v9 0/3] BPF round-robin scheduler Geliang Tang
2022-04-29 14:51 ` [PATCH mptcp-next v9 1/3] mptcp: add subflows array in mptcp_sched_data Geliang Tang
2022-04-29 14:51 ` [PATCH mptcp-next v9 2/3] selftests: bpf: add bpf_rr scheduler Geliang Tang
@ 2022-04-29 14:51 ` Geliang Tang
2 siblings, 0 replies; 6+ messages in thread
From: Geliang Tang @ 2022-04-29 14:51 UTC (permalink / raw)
To: mptcp; +Cc: Geliang Tang
This patch adds the round-robin BPF MPTCP scheduler test. Use sysctl to
set net.mptcp.scheduler to use this sched. Add a veth net device to
simulate the multiple addresses case. Use 'ip mptcp endpoint' command to
add this new endpoint to PM netlink.
Signed-off-by: Geliang Tang <geliang.tang@suse.com>
---
.../testing/selftests/bpf/prog_tests/mptcp.c | 37 +++++++++++++++++++
1 file changed, 37 insertions(+)
diff --git a/tools/testing/selftests/bpf/prog_tests/mptcp.c b/tools/testing/selftests/bpf/prog_tests/mptcp.c
index 44484a63e62a..fb4a4b37e818 100644
--- a/tools/testing/selftests/bpf/prog_tests/mptcp.c
+++ b/tools/testing/selftests/bpf/prog_tests/mptcp.c
@@ -5,6 +5,7 @@
#include "cgroup_helpers.h"
#include "network_helpers.h"
#include "mptcp_bpf_first.skel.h"
+#include "mptcp_bpf_rr.skel.h"
#ifndef TCP_CA_NAME_MAX
#define TCP_CA_NAME_MAX 16
@@ -361,10 +362,46 @@ static void test_first(void)
mptcp_bpf_first__destroy(first_skel);
}
+static void test_rr(void)
+{
+ struct mptcp_bpf_rr *rr_skel;
+ int server_fd, client_fd;
+ struct bpf_link *link;
+
+ rr_skel = mptcp_bpf_rr__open_and_load();
+ if (CHECK(!rr_skel, "bpf_rr__open_and_load", "failed\n"))
+ return;
+
+ link = bpf_map__attach_struct_ops(rr_skel->maps.rr);
+ if (!ASSERT_OK_PTR(link, "bpf_map__attach_struct_ops")) {
+ mptcp_bpf_rr__destroy(rr_skel);
+ return;
+ }
+
+ system("ip link add veth1 type veth; \
+ ip addr add 10.0.1.1/24 dev veth1; \
+ ip link set veth1 up");
+ system("ip mptcp endpoint add 10.0.1.1 subflow");
+ system("sysctl -q net.mptcp.scheduler=bpf_rr");
+ server_fd = start_mptcp_server(AF_INET, NULL, 0, 0);
+ client_fd = connect_to_mptcp_fd(server_fd, 0);
+
+ send_data(server_fd, client_fd);
+
+ close(client_fd);
+ close(server_fd);
+ system("ip mptcp endpoint flush");
+ system("ip link del veth1");
+ bpf_link__destroy(link);
+ mptcp_bpf_rr__destroy(rr_skel);
+}
+
void test_mptcp(void)
{
if (test__start_subtest("base"))
test_base();
if (test__start_subtest("first"))
test_first();
+ if (test__start_subtest("rr"))
+ test_rr();
}
--
2.34.1
^ permalink raw reply related [flat|nested] 6+ messages in thread