* [PATCH mptcp-next v2 0/8] BPF packet scheduler
@ 2022-03-18 2:58 Geliang Tang
2022-03-18 2:58 ` [PATCH mptcp-next v2 1/8] mptcp: add struct mptcp_sched_ops Geliang Tang
` (7 more replies)
0 siblings, 8 replies; 9+ messages in thread
From: Geliang Tang @ 2022-03-18 2:58 UTC (permalink / raw)
To: mptcp; +Cc: Geliang Tang
v2:
- split into more small patches.
- change all parameters of mptcp_sched_ops from sk to msk:
void (*init)(struct mptcp_sock *msk);
void (*release)(struct mptcp_sock *msk);
struct sock * (*get_subflow)(struct mptcp_sock *msk);
- add tests in bpf_tcp_ca.c, instead of adding a new one.
Depends on the bpf patches queued in the patchwork:
c42b1c51caa4 selftests: bpf: add bpf_first test
49747d1d690e selftests: bpf: add bpf_first scheduler
857cc219d855 mptcp: add bpf_mptcp_sched_ops
287b994479ea mptcp: add mptcp_get_subflow wrapper
a626e160359a mptcp: add sched for struct mptcp_sock
7613acc6e70e mptcp: add a new sysctl scheduler
70ff4322bee0 mptcp: register default scheduler
50a302c839de mptcp: add struct mptcp_sched_ops
2b41b6b9a948 selftests: bpf: verify first subflow of mptcp_sock
91e52c32a832 selftests: bpf: verify ca_name of struct mptcp_sock
07a66e354126 Squash to "selftests: bpf: test bpf_skc_to_mptcp_sock"
168ee1d1a118 selftests: bpf: test bpf_skc_to_mptcp_sock
7a773e6af914 Squash to "selftests: bpf: add MPTCP test base"
a1933407b912 bpf: add bpf_skc_to_mptcp_sock_proto
0095d8ca9a67 Revert "bpf: add 'bpf_mptcp_sock' structure and helper"
2856d8c8020a Revert "selftests: bpf: add bpf_mptcp_sock() verifier tests"
df0e2a4c394f (origin/export, origin/HEAD) DO-NOT-MERGE: mptcp: enabled by default
v1:
Addressed to the commends in the RFC version:
https://patchwork.kernel.org/project/mptcp/cover/cover.1631011068.git.geliangtang@xiaomi.com/
Closes: https://github.com/multipath-tcp/mptcp_net-next/issues/75
Geliang Tang (8):
mptcp: add struct mptcp_sched_ops
mptcp: register default scheduler
mptcp: add a new sysctl scheduler
mptcp: add sched for struct mptcp_sock
mptcp: add mptcp_get_subflow wrapper
mptcp: add bpf_mptcp_sched_ops
selftests: bpf: add bpf_first scheduler
selftests: bpf: add bpf_first test
Documentation/networking/mptcp-sysctl.rst | 8 ++
include/net/mptcp.h | 13 +++
kernel/bpf/bpf_struct_ops_types.h | 4 +
net/mptcp/Makefile | 2 +-
net/mptcp/bpf.c | 102 ++++++++++++++++++
net/mptcp/ctrl.c | 14 +++
net/mptcp/protocol.c | 12 ++-
net/mptcp/protocol.h | 9 ++
net/mptcp/sched.c | 89 +++++++++++++++
tools/testing/selftests/bpf/bpf_tcp_helpers.h | 12 +++
.../selftests/bpf/prog_tests/bpf_tcp_ca.c | 42 +++++++-
tools/testing/selftests/bpf/progs/bpf_first.c | 24 +++++
12 files changed, 322 insertions(+), 9 deletions(-)
create mode 100644 net/mptcp/sched.c
create mode 100644 tools/testing/selftests/bpf/progs/bpf_first.c
--
2.34.1
^ permalink raw reply [flat|nested] 9+ messages in thread
* [PATCH mptcp-next v2 1/8] mptcp: add struct mptcp_sched_ops
2022-03-18 2:58 [PATCH mptcp-next v2 0/8] BPF packet scheduler Geliang Tang
@ 2022-03-18 2:58 ` Geliang Tang
2022-03-18 2:58 ` [PATCH mptcp-next v2 2/8] mptcp: register default scheduler Geliang Tang
` (6 subsequent siblings)
7 siblings, 0 replies; 9+ messages in thread
From: Geliang Tang @ 2022-03-18 2:58 UTC (permalink / raw)
To: mptcp; +Cc: Geliang Tang
This patch added struct mptcp_sched_ops. And define the scheduler
register, unregister and find functions.
Signed-off-by: Geliang Tang <geliang.tang@suse.com>
---
include/net/mptcp.h | 13 ++++++++++
net/mptcp/Makefile | 2 +-
net/mptcp/protocol.h | 3 +++
net/mptcp/sched.c | 56 ++++++++++++++++++++++++++++++++++++++++++++
4 files changed, 73 insertions(+), 1 deletion(-)
create mode 100644 net/mptcp/sched.c
diff --git a/include/net/mptcp.h b/include/net/mptcp.h
index 8b1afd6f5cc4..e3a0baa8dbd7 100644
--- a/include/net/mptcp.h
+++ b/include/net/mptcp.h
@@ -95,6 +95,19 @@ struct mptcp_out_options {
#endif
};
+#define MPTCP_SCHED_NAME_MAX 16
+
+struct mptcp_sched_ops {
+ struct sock * (*get_subflow)(struct mptcp_sock *msk);
+
+ char name[MPTCP_SCHED_NAME_MAX];
+ struct module *owner;
+ struct list_head list;
+
+ void (*init)(struct mptcp_sock *msk);
+ void (*release)(struct mptcp_sock *msk);
+} ____cacheline_aligned_in_smp;
+
#ifdef CONFIG_MPTCP
extern struct request_sock_ops mptcp_subflow_request_sock_ops;
diff --git a/net/mptcp/Makefile b/net/mptcp/Makefile
index 0a0608b6b4b4..aa5c10d1b80a 100644
--- a/net/mptcp/Makefile
+++ b/net/mptcp/Makefile
@@ -3,7 +3,7 @@ obj-$(CONFIG_MPTCP) += mptcp.o
ccflags-y += -DDEBUG
mptcp-y := protocol.o subflow.o options.o token.o crypto.o ctrl.o pm.o diag.o \
- mib.o pm_netlink.o sockopt.o
+ mib.o pm_netlink.o sockopt.o sched.o
obj-$(CONFIG_SYN_COOKIES) += syncookies.o
obj-$(CONFIG_INET_MPTCP_DIAG) += mptcp_diag.o
diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h
index c8bada4537e2..500dc7b3fde8 100644
--- a/net/mptcp/protocol.h
+++ b/net/mptcp/protocol.h
@@ -605,6 +605,9 @@ int mptcp_subflow_create_socket(struct sock *sk, struct socket **new_sock);
void mptcp_info2sockaddr(const struct mptcp_addr_info *info,
struct sockaddr_storage *addr,
unsigned short family);
+struct mptcp_sched_ops *mptcp_sched_find(const char *name);
+int mptcp_register_scheduler(struct mptcp_sched_ops *sched);
+void mptcp_unregister_scheduler(struct mptcp_sched_ops *sched);
static inline bool __mptcp_subflow_active(struct mptcp_subflow_context *subflow)
{
diff --git a/net/mptcp/sched.c b/net/mptcp/sched.c
new file mode 100644
index 000000000000..88ddc4aba4ea
--- /dev/null
+++ b/net/mptcp/sched.c
@@ -0,0 +1,56 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Multipath TCP
+ *
+ * Copyright (c) 2022, SUSE.
+ */
+
+#define pr_fmt(fmt) "MPTCP: " fmt
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/indirect_call_wrapper.h>
+#include "protocol.h"
+
+static DEFINE_SPINLOCK(mptcp_sched_list_lock);
+static LIST_HEAD(mptcp_sched_list);
+
+struct mptcp_sched_ops *mptcp_sched_find(const char *name)
+{
+ struct mptcp_sched_ops *ops;
+
+ list_for_each_entry_rcu(ops, &mptcp_sched_list, list) {
+ if (!strcmp(ops->name, name))
+ return ops;
+ }
+
+ return NULL;
+}
+
+int mptcp_register_scheduler(struct mptcp_sched_ops *sched)
+{
+ int ret = 0;
+
+ if (!sched->get_subflow)
+ return -EINVAL;
+
+ spin_lock(&mptcp_sched_list_lock);
+ if (mptcp_sched_find(sched->name)) {
+ pr_debug("%s already registered", sched->name);
+ ret = -EEXIST;
+ } else {
+ list_add_tail_rcu(&sched->list, &mptcp_sched_list);
+ pr_debug("%s registered", sched->name);
+ }
+ spin_unlock(&mptcp_sched_list_lock);
+
+ return ret;
+}
+
+void mptcp_unregister_scheduler(struct mptcp_sched_ops *sched)
+{
+ spin_lock(&mptcp_sched_list_lock);
+ list_del_rcu(&sched->list);
+ spin_unlock(&mptcp_sched_list_lock);
+
+ synchronize_rcu();
+}
--
2.34.1
^ permalink raw reply related [flat|nested] 9+ messages in thread
* [PATCH mptcp-next v2 2/8] mptcp: register default scheduler
2022-03-18 2:58 [PATCH mptcp-next v2 0/8] BPF packet scheduler Geliang Tang
2022-03-18 2:58 ` [PATCH mptcp-next v2 1/8] mptcp: add struct mptcp_sched_ops Geliang Tang
@ 2022-03-18 2:58 ` Geliang Tang
2022-03-18 2:58 ` [PATCH mptcp-next v2 3/8] mptcp: add a new sysctl scheduler Geliang Tang
` (5 subsequent siblings)
7 siblings, 0 replies; 9+ messages in thread
From: Geliang Tang @ 2022-03-18 2:58 UTC (permalink / raw)
To: mptcp; +Cc: Geliang Tang
This patch defined the default packet scheduler mptcp_sched_default,
registered it in mptcp_sched_init().
Signed-off-by: Geliang Tang <geliang.tang@suse.com>
---
net/mptcp/protocol.c | 3 ++-
net/mptcp/protocol.h | 2 ++
net/mptcp/sched.c | 11 +++++++++++
3 files changed, 15 insertions(+), 1 deletion(-)
diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
index fbb14dfe62b3..11e19ac9d5af 100644
--- a/net/mptcp/protocol.c
+++ b/net/mptcp/protocol.c
@@ -1424,7 +1424,7 @@ bool mptcp_subflow_active(struct mptcp_subflow_context *subflow)
* returns the subflow that will transmit the next DSS
* additionally updates the rtx timeout
*/
-static struct sock *mptcp_subflow_get_send(struct mptcp_sock *msk)
+struct sock *mptcp_subflow_get_send(struct mptcp_sock *msk)
{
struct subflow_send_info send_info[SSK_MODE_MAX];
struct mptcp_subflow_context *subflow;
@@ -3743,6 +3743,7 @@ void __init mptcp_proto_init(void)
mptcp_subflow_init();
mptcp_pm_init();
+ mptcp_sched_init();
mptcp_token_init();
if (proto_register(&mptcp_prot, MPTCP_USE_SLAB) != 0)
diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h
index 500dc7b3fde8..c186d199cf5b 100644
--- a/net/mptcp/protocol.h
+++ b/net/mptcp/protocol.h
@@ -608,6 +608,8 @@ void mptcp_info2sockaddr(const struct mptcp_addr_info *info,
struct mptcp_sched_ops *mptcp_sched_find(const char *name);
int mptcp_register_scheduler(struct mptcp_sched_ops *sched);
void mptcp_unregister_scheduler(struct mptcp_sched_ops *sched);
+struct sock *mptcp_subflow_get_send(struct mptcp_sock *msk);
+void mptcp_sched_init(void);
static inline bool __mptcp_subflow_active(struct mptcp_subflow_context *subflow)
{
diff --git a/net/mptcp/sched.c b/net/mptcp/sched.c
index 88ddc4aba4ea..9612e182b7b7 100644
--- a/net/mptcp/sched.c
+++ b/net/mptcp/sched.c
@@ -54,3 +54,14 @@ void mptcp_unregister_scheduler(struct mptcp_sched_ops *sched)
synchronize_rcu();
}
+
+static struct mptcp_sched_ops mptcp_sched_default = {
+ .get_subflow = mptcp_subflow_get_send,
+ .name = "default",
+ .owner = THIS_MODULE,
+};
+
+void mptcp_sched_init(void)
+{
+ mptcp_register_scheduler(&mptcp_sched_default);
+}
--
2.34.1
^ permalink raw reply related [flat|nested] 9+ messages in thread
* [PATCH mptcp-next v2 3/8] mptcp: add a new sysctl scheduler
2022-03-18 2:58 [PATCH mptcp-next v2 0/8] BPF packet scheduler Geliang Tang
2022-03-18 2:58 ` [PATCH mptcp-next v2 1/8] mptcp: add struct mptcp_sched_ops Geliang Tang
2022-03-18 2:58 ` [PATCH mptcp-next v2 2/8] mptcp: register default scheduler Geliang Tang
@ 2022-03-18 2:58 ` Geliang Tang
2022-03-18 2:58 ` [PATCH mptcp-next v2 4/8] mptcp: add sched for struct mptcp_sock Geliang Tang
` (4 subsequent siblings)
7 siblings, 0 replies; 9+ messages in thread
From: Geliang Tang @ 2022-03-18 2:58 UTC (permalink / raw)
To: mptcp; +Cc: Geliang Tang
This patch added a new sysctl, named scheduler, to support for selection
of different schedulers.
Signed-off-by: Geliang Tang <geliang.tang@suse.com>
---
Documentation/networking/mptcp-sysctl.rst | 8 ++++++++
net/mptcp/ctrl.c | 14 ++++++++++++++
net/mptcp/protocol.h | 1 +
3 files changed, 23 insertions(+)
diff --git a/Documentation/networking/mptcp-sysctl.rst b/Documentation/networking/mptcp-sysctl.rst
index e263dfcc4b40..d9e69fdc7ea3 100644
--- a/Documentation/networking/mptcp-sysctl.rst
+++ b/Documentation/networking/mptcp-sysctl.rst
@@ -75,3 +75,11 @@ stale_loss_cnt - INTEGER
This is a per-namespace sysctl.
Default: 4
+
+scheduler - STRING
+ Select the scheduler of your choice.
+
+ Support for selection of different schedulers. This is a per-namespace
+ sysctl.
+
+ Default: "default"
diff --git a/net/mptcp/ctrl.c b/net/mptcp/ctrl.c
index ae20b7d92e28..c46c22a84d23 100644
--- a/net/mptcp/ctrl.c
+++ b/net/mptcp/ctrl.c
@@ -32,6 +32,7 @@ struct mptcp_pernet {
u8 checksum_enabled;
u8 allow_join_initial_addr_port;
u8 pm_type;
+ char scheduler[MPTCP_SCHED_NAME_MAX];
};
static struct mptcp_pernet *mptcp_get_pernet(const struct net *net)
@@ -69,6 +70,11 @@ int mptcp_get_pm_type(const struct net *net)
return mptcp_get_pernet(net)->pm_type;
}
+const char *mptcp_get_scheduler(const struct net *net)
+{
+ return mptcp_get_pernet(net)->scheduler;
+}
+
static void mptcp_pernet_set_defaults(struct mptcp_pernet *pernet)
{
pernet->mptcp_enabled = 1;
@@ -77,6 +83,7 @@ static void mptcp_pernet_set_defaults(struct mptcp_pernet *pernet)
pernet->allow_join_initial_addr_port = 1;
pernet->stale_loss_cnt = 4;
pernet->pm_type = MPTCP_PM_TYPE_KERNEL;
+ strcpy(pernet->scheduler, "default");
}
#ifdef CONFIG_SYSCTL
@@ -128,6 +135,12 @@ static struct ctl_table mptcp_sysctl_table[] = {
.extra1 = SYSCTL_ZERO,
.extra2 = &mptcp_pm_type_max
},
+ {
+ .procname = "scheduler",
+ .maxlen = MPTCP_SCHED_NAME_MAX,
+ .mode = 0644,
+ .proc_handler = proc_dostring,
+ },
{}
};
@@ -149,6 +162,7 @@ static int mptcp_pernet_new_table(struct net *net, struct mptcp_pernet *pernet)
table[3].data = &pernet->allow_join_initial_addr_port;
table[4].data = &pernet->stale_loss_cnt;
table[5].data = &pernet->pm_type;
+ table[6].data = &pernet->scheduler;
hdr = register_net_sysctl(net, MPTCP_SYSCTL_PATH, table);
if (!hdr)
diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h
index c186d199cf5b..f0bd2e17f2d6 100644
--- a/net/mptcp/protocol.h
+++ b/net/mptcp/protocol.h
@@ -583,6 +583,7 @@ int mptcp_is_checksum_enabled(const struct net *net);
int mptcp_allow_join_id0(const struct net *net);
unsigned int mptcp_stale_loss_cnt(const struct net *net);
int mptcp_get_pm_type(const struct net *net);
+const char *mptcp_get_scheduler(const struct net *net);
void mptcp_subflow_fully_established(struct mptcp_subflow_context *subflow,
struct mptcp_options_received *mp_opt);
bool __mptcp_retransmit_pending_data(struct sock *sk);
--
2.34.1
^ permalink raw reply related [flat|nested] 9+ messages in thread
* [PATCH mptcp-next v2 4/8] mptcp: add sched for struct mptcp_sock
2022-03-18 2:58 [PATCH mptcp-next v2 0/8] BPF packet scheduler Geliang Tang
` (2 preceding siblings ...)
2022-03-18 2:58 ` [PATCH mptcp-next v2 3/8] mptcp: add a new sysctl scheduler Geliang Tang
@ 2022-03-18 2:58 ` Geliang Tang
2022-03-18 2:58 ` [PATCH mptcp-next v2 5/8] mptcp: add mptcp_get_subflow wrapper Geliang Tang
` (3 subsequent siblings)
7 siblings, 0 replies; 9+ messages in thread
From: Geliang Tang @ 2022-03-18 2:58 UTC (permalink / raw)
To: mptcp; +Cc: Geliang Tang
This patch added a new struct member sched in struct mptcp_sock. Set it
in mptcp_set_sched() with the scheduler sysctl.
Signed-off-by: Geliang Tang <geliang.tang@suse.com>
---
net/mptcp/protocol.c | 3 +++
net/mptcp/protocol.h | 2 ++
net/mptcp/sched.c | 16 ++++++++++++++++
3 files changed, 21 insertions(+)
diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
index 11e19ac9d5af..b6b8f41a3ccb 100644
--- a/net/mptcp/protocol.c
+++ b/net/mptcp/protocol.c
@@ -2609,6 +2609,7 @@ static int mptcp_init_sock(struct sock *sk)
* propagate the correct value
*/
mptcp_ca_reset(sk);
+ mptcp_set_sched(sk);
sk_sockets_allocated_inc(sk);
sk->sk_rcvbuf = sock_net(sk)->ipv4.sysctl_tcp_rmem[1];
@@ -2760,6 +2761,7 @@ static void __mptcp_destroy_sock(struct sock *sk)
sk_stop_timer(sk, &msk->sk.icsk_retransmit_timer);
sk_stop_timer(sk, &sk->sk_timer);
msk->pm.status = 0;
+ msk->sched = NULL;
/* clears msk->subflow, allowing the following loop to close
* even the initial subflow
@@ -2933,6 +2935,7 @@ struct sock *mptcp_sk_clone(const struct sock *sk,
msk->snd_una = msk->write_seq;
msk->wnd_end = msk->snd_nxt + req->rsk_rcv_wnd;
msk->setsockopt_seq = mptcp_sk(sk)->setsockopt_seq;
+ msk->sched = mptcp_sk(sk)->sched;
if (mp_opt->suboptions & OPTIONS_MPTCP_MPC) {
msk->can_ack = true;
diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h
index f0bd2e17f2d6..d18c54e2f28c 100644
--- a/net/mptcp/protocol.h
+++ b/net/mptcp/protocol.h
@@ -286,6 +286,7 @@ struct mptcp_sock {
struct socket *subflow; /* outgoing connect/listener/!mp_capable */
struct sock *first;
struct mptcp_pm_data pm;
+ struct mptcp_sched_ops *sched;
struct {
u32 space; /* bytes copied in last measurement window */
u32 copied; /* bytes copied in this measurement window */
@@ -609,6 +610,7 @@ void mptcp_info2sockaddr(const struct mptcp_addr_info *info,
struct mptcp_sched_ops *mptcp_sched_find(const char *name);
int mptcp_register_scheduler(struct mptcp_sched_ops *sched);
void mptcp_unregister_scheduler(struct mptcp_sched_ops *sched);
+void mptcp_set_sched(struct sock *sk);
struct sock *mptcp_subflow_get_send(struct mptcp_sock *msk);
void mptcp_sched_init(void);
diff --git a/net/mptcp/sched.c b/net/mptcp/sched.c
index 9612e182b7b7..727d185af86e 100644
--- a/net/mptcp/sched.c
+++ b/net/mptcp/sched.c
@@ -55,6 +55,22 @@ void mptcp_unregister_scheduler(struct mptcp_sched_ops *sched)
synchronize_rcu();
}
+void mptcp_set_sched(struct sock *sk)
+{
+ struct mptcp_sock *msk = mptcp_sk(sk);
+ struct net *net = sock_net(sk);
+
+ msk->sched = mptcp_sched_find(mptcp_get_scheduler(net));
+ if (!msk->sched) {
+ pr_debug("sched %s not found", mptcp_get_scheduler(net));
+ return;
+ }
+
+ pr_debug("sched=%s", msk->sched->name);
+ if (msk->sched->init)
+ msk->sched->init(msk);
+}
+
static struct mptcp_sched_ops mptcp_sched_default = {
.get_subflow = mptcp_subflow_get_send,
.name = "default",
--
2.34.1
^ permalink raw reply related [flat|nested] 9+ messages in thread
* [PATCH mptcp-next v2 5/8] mptcp: add mptcp_get_subflow wrapper
2022-03-18 2:58 [PATCH mptcp-next v2 0/8] BPF packet scheduler Geliang Tang
` (3 preceding siblings ...)
2022-03-18 2:58 ` [PATCH mptcp-next v2 4/8] mptcp: add sched for struct mptcp_sock Geliang Tang
@ 2022-03-18 2:58 ` Geliang Tang
2022-03-18 2:58 ` [PATCH mptcp-next v2 6/8] mptcp: add bpf_mptcp_sched_ops Geliang Tang
` (2 subsequent siblings)
7 siblings, 0 replies; 9+ messages in thread
From: Geliang Tang @ 2022-03-18 2:58 UTC (permalink / raw)
To: mptcp; +Cc: Geliang Tang
This patch defined a new wrapper mptcp_get_subflow(), used it instead of
mptcp_subflow_get_send().
Signed-off-by: Geliang Tang <geliang.tang@suse.com>
---
net/mptcp/protocol.c | 6 +++---
net/mptcp/protocol.h | 1 +
net/mptcp/sched.c | 6 ++++++
3 files changed, 10 insertions(+), 3 deletions(-)
diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
index b6b8f41a3ccb..5222849eacb1 100644
--- a/net/mptcp/protocol.c
+++ b/net/mptcp/protocol.c
@@ -1567,7 +1567,7 @@ void __mptcp_push_pending(struct sock *sk, unsigned int flags)
int ret = 0;
prev_ssk = ssk;
- ssk = mptcp_subflow_get_send(msk);
+ ssk = mptcp_get_subflow(msk);
/* First check. If the ssk has changed since
* the last round, release prev_ssk
@@ -1634,7 +1634,7 @@ static void __mptcp_subflow_push_pending(struct sock *sk, struct sock *ssk)
* check for a different subflow usage only after
* spooling the first chunk of data
*/
- xmit_ssk = first ? ssk : mptcp_subflow_get_send(mptcp_sk(sk));
+ xmit_ssk = first ? ssk : mptcp_get_subflow(mptcp_sk(sk));
if (!xmit_ssk)
goto out;
if (xmit_ssk != ssk) {
@@ -3073,7 +3073,7 @@ void __mptcp_check_push(struct sock *sk, struct sock *ssk)
return;
if (!sock_owned_by_user(sk)) {
- struct sock *xmit_ssk = mptcp_subflow_get_send(mptcp_sk(sk));
+ struct sock *xmit_ssk = mptcp_get_subflow(mptcp_sk(sk));
if (xmit_ssk == ssk)
__mptcp_subflow_push_pending(sk, ssk);
diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h
index d18c54e2f28c..8871a17f8b82 100644
--- a/net/mptcp/protocol.h
+++ b/net/mptcp/protocol.h
@@ -613,6 +613,7 @@ void mptcp_unregister_scheduler(struct mptcp_sched_ops *sched);
void mptcp_set_sched(struct sock *sk);
struct sock *mptcp_subflow_get_send(struct mptcp_sock *msk);
void mptcp_sched_init(void);
+struct sock *mptcp_get_subflow(struct mptcp_sock *msk);
static inline bool __mptcp_subflow_active(struct mptcp_subflow_context *subflow)
{
diff --git a/net/mptcp/sched.c b/net/mptcp/sched.c
index 727d185af86e..61cbb0237beb 100644
--- a/net/mptcp/sched.c
+++ b/net/mptcp/sched.c
@@ -81,3 +81,9 @@ void mptcp_sched_init(void)
{
mptcp_register_scheduler(&mptcp_sched_default);
}
+
+struct sock *mptcp_get_subflow(struct mptcp_sock *msk)
+{
+ return INDIRECT_CALL_INET_1(msk->sched->get_subflow,
+ mptcp_subflow_get_send, msk);
+}
--
2.34.1
^ permalink raw reply related [flat|nested] 9+ messages in thread
* [PATCH mptcp-next v2 6/8] mptcp: add bpf_mptcp_sched_ops
2022-03-18 2:58 [PATCH mptcp-next v2 0/8] BPF packet scheduler Geliang Tang
` (4 preceding siblings ...)
2022-03-18 2:58 ` [PATCH mptcp-next v2 5/8] mptcp: add mptcp_get_subflow wrapper Geliang Tang
@ 2022-03-18 2:58 ` Geliang Tang
2022-03-18 2:58 ` [PATCH mptcp-next v2 7/8] selftests: bpf: add bpf_first scheduler Geliang Tang
2022-03-18 2:58 ` [PATCH mptcp-next v2 8/8] selftests: bpf: add bpf_first test Geliang Tang
7 siblings, 0 replies; 9+ messages in thread
From: Geliang Tang @ 2022-03-18 2:58 UTC (permalink / raw)
To: mptcp; +Cc: Geliang Tang
This patch implemented a new struct bpf_struct_ops, bpf_mptcp_sched_ops.
Register and unregister the mptcp scheduler in .reg and .unreg.
Signed-off-by: Geliang Tang <geliang.tang@suse.com>
---
kernel/bpf/bpf_struct_ops_types.h | 4 ++
net/mptcp/bpf.c | 102 ++++++++++++++++++++++++++++++
2 files changed, 106 insertions(+)
diff --git a/kernel/bpf/bpf_struct_ops_types.h b/kernel/bpf/bpf_struct_ops_types.h
index 5678a9ddf817..5a6b0c0d8d3d 100644
--- a/kernel/bpf/bpf_struct_ops_types.h
+++ b/kernel/bpf/bpf_struct_ops_types.h
@@ -8,5 +8,9 @@ BPF_STRUCT_OPS_TYPE(bpf_dummy_ops)
#ifdef CONFIG_INET
#include <net/tcp.h>
BPF_STRUCT_OPS_TYPE(tcp_congestion_ops)
+#ifdef CONFIG_MPTCP
+#include <net/mptcp.h>
+BPF_STRUCT_OPS_TYPE(mptcp_sched_ops)
+#endif
#endif
#endif
diff --git a/net/mptcp/bpf.c b/net/mptcp/bpf.c
index da79dae559b2..5f7f9d30ecd4 100644
--- a/net/mptcp/bpf.c
+++ b/net/mptcp/bpf.c
@@ -8,8 +8,110 @@
*/
#include <linux/bpf.h>
+#include <linux/bpf_verifier.h>
+#include <linux/btf.h>
+#include <linux/btf_ids.h>
#include "protocol.h"
+extern struct bpf_struct_ops bpf_mptcp_sched_ops;
+extern struct btf *btf_vmlinux;
+
+static u32 optional_ops[] = {
+ offsetof(struct mptcp_sched_ops, init),
+ offsetof(struct mptcp_sched_ops, release),
+ offsetof(struct mptcp_sched_ops, get_subflow),
+};
+
+static const struct bpf_func_proto *
+bpf_mptcp_sched_get_func_proto(enum bpf_func_id func_id,
+ const struct bpf_prog *prog)
+{
+ return bpf_base_func_proto(func_id);
+}
+
+static const struct bpf_verifier_ops bpf_mptcp_sched_verifier_ops = {
+ .get_func_proto = bpf_mptcp_sched_get_func_proto,
+ .is_valid_access = btf_ctx_access,
+ .btf_struct_access = btf_struct_access,
+};
+
+static int bpf_mptcp_sched_reg(void *kdata)
+{
+ return mptcp_register_scheduler(kdata);
+}
+
+static void bpf_mptcp_sched_unreg(void *kdata)
+{
+ mptcp_unregister_scheduler(kdata);
+}
+
+static int bpf_mptcp_sched_check_member(const struct btf_type *t,
+ const struct btf_member *member)
+{
+ return 0;
+}
+
+static bool is_optional(u32 member_offset)
+{
+ unsigned int i;
+
+ for (i = 0; i < ARRAY_SIZE(optional_ops); i++) {
+ if (member_offset == optional_ops[i])
+ return true;
+ }
+
+ return false;
+}
+
+static int bpf_mptcp_sched_init_member(const struct btf_type *t,
+ const struct btf_member *member,
+ void *kdata, const void *udata)
+{
+ const struct mptcp_sched_ops *usched;
+ struct mptcp_sched_ops *sched;
+ int prog_fd;
+ u32 moff;
+
+ usched = (const struct mptcp_sched_ops *)udata;
+ sched = (struct mptcp_sched_ops *)kdata;
+
+ moff = __btf_member_bit_offset(t, member) / 8;
+ switch (moff) {
+ case offsetof(struct mptcp_sched_ops, name):
+ if (bpf_obj_name_cpy(sched->name, usched->name,
+ sizeof(sched->name)) <= 0)
+ return -EINVAL;
+ if (mptcp_sched_find(usched->name))
+ return -EEXIST;
+ return 1;
+ }
+
+ if (!btf_type_resolve_func_ptr(btf_vmlinux, member->type, NULL))
+ return 0;
+
+ /* Ensure bpf_prog is provided for compulsory func ptr */
+ prog_fd = (int)(*(unsigned long *)(udata + moff));
+ if (!prog_fd && !is_optional(moff))
+ return -EINVAL;
+
+ return 0;
+}
+
+static int bpf_mptcp_sched_init(struct btf *btf)
+{
+ return 0;
+}
+
+struct bpf_struct_ops bpf_mptcp_sched_ops = {
+ .verifier_ops = &bpf_mptcp_sched_verifier_ops,
+ .reg = bpf_mptcp_sched_reg,
+ .unreg = bpf_mptcp_sched_unreg,
+ .check_member = bpf_mptcp_sched_check_member,
+ .init_member = bpf_mptcp_sched_init_member,
+ .init = bpf_mptcp_sched_init,
+ .name = "mptcp_sched_ops",
+};
+
struct mptcp_sock *bpf_mptcp_sock_from_subflow(struct sock *sk)
{
if (sk && sk_fullsock(sk) && sk->sk_protocol == IPPROTO_TCP && sk_is_mptcp(sk))
--
2.34.1
^ permalink raw reply related [flat|nested] 9+ messages in thread
* [PATCH mptcp-next v2 7/8] selftests: bpf: add bpf_first scheduler
2022-03-18 2:58 [PATCH mptcp-next v2 0/8] BPF packet scheduler Geliang Tang
` (5 preceding siblings ...)
2022-03-18 2:58 ` [PATCH mptcp-next v2 6/8] mptcp: add bpf_mptcp_sched_ops Geliang Tang
@ 2022-03-18 2:58 ` Geliang Tang
2022-03-18 2:58 ` [PATCH mptcp-next v2 8/8] selftests: bpf: add bpf_first test Geliang Tang
7 siblings, 0 replies; 9+ messages in thread
From: Geliang Tang @ 2022-03-18 2:58 UTC (permalink / raw)
To: mptcp; +Cc: Geliang Tang
This patch implemented the simplest MPTCP scheduler, named bpf_first,
which always picks the first subflow to send data.
Signed-off-by: Geliang Tang <geliang.tang@suse.com>
---
tools/testing/selftests/bpf/bpf_tcp_helpers.h | 12 ++++++++++
tools/testing/selftests/bpf/progs/bpf_first.c | 24 +++++++++++++++++++
2 files changed, 36 insertions(+)
create mode 100644 tools/testing/selftests/bpf/progs/bpf_first.c
diff --git a/tools/testing/selftests/bpf/bpf_tcp_helpers.h b/tools/testing/selftests/bpf/bpf_tcp_helpers.h
index f92357597e63..5bb21b24231f 100644
--- a/tools/testing/selftests/bpf/bpf_tcp_helpers.h
+++ b/tools/testing/selftests/bpf/bpf_tcp_helpers.h
@@ -197,6 +197,18 @@ struct tcp_congestion_ops {
void *owner;
};
+#define MPTCP_SCHED_NAME_MAX 16
+
+struct mptcp_sched_ops {
+ char name[MPTCP_SCHED_NAME_MAX];
+
+ void (*init)(struct mptcp_sock *msk);
+ void (*release)(struct mptcp_sock *msk);
+
+ struct sock * (*get_subflow)(struct mptcp_sock *msk);
+ void *owner;
+};
+
#define min(a, b) ((a) < (b) ? (a) : (b))
#define max(a, b) ((a) > (b) ? (a) : (b))
#define min_not_zero(x, y) ({ \
diff --git a/tools/testing/selftests/bpf/progs/bpf_first.c b/tools/testing/selftests/bpf/progs/bpf_first.c
new file mode 100644
index 000000000000..ad6822d61741
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/bpf_first.c
@@ -0,0 +1,24 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Copyright (c) 2022, SUSE. */
+
+#include <linux/bpf.h>
+#include "bpf_tcp_helpers.h"
+
+char _license[] SEC("license") = "GPL";
+
+SEC("struct_ops/mptcp_sched_first_init")
+void BPF_PROG(mptcp_sched_first_init, struct mptcp_sock *msk)
+{
+}
+
+struct sock *BPF_STRUCT_OPS(bpf_first_get_subflow, struct mptcp_sock *msk)
+{
+ return msk->first;
+}
+
+SEC(".struct_ops")
+struct mptcp_sched_ops first = {
+ .init = (void *)mptcp_sched_first_init,
+ .get_subflow = (void *)bpf_first_get_subflow,
+ .name = "bpf_first",
+};
--
2.34.1
^ permalink raw reply related [flat|nested] 9+ messages in thread
* [PATCH mptcp-next v2 8/8] selftests: bpf: add bpf_first test
2022-03-18 2:58 [PATCH mptcp-next v2 0/8] BPF packet scheduler Geliang Tang
` (6 preceding siblings ...)
2022-03-18 2:58 ` [PATCH mptcp-next v2 7/8] selftests: bpf: add bpf_first scheduler Geliang Tang
@ 2022-03-18 2:58 ` Geliang Tang
7 siblings, 0 replies; 9+ messages in thread
From: Geliang Tang @ 2022-03-18 2:58 UTC (permalink / raw)
To: mptcp; +Cc: Geliang Tang
This patch added the MPTCP sched testcases. Use sysctl to set
net.mptcp.scheduler in the tests.
Signed-off-by: Geliang Tang <geliang.tang@suse.com>
---
.../selftests/bpf/prog_tests/bpf_tcp_ca.c | 42 +++++++++++++++++--
1 file changed, 38 insertions(+), 4 deletions(-)
diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c b/tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c
index 8f7a1cef7d87..979b6e90b372 100644
--- a/tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c
+++ b/tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c
@@ -9,6 +9,7 @@
#include "bpf_cubic.skel.h"
#include "bpf_tcp_nogpl.skel.h"
#include "bpf_dctcp_release.skel.h"
+#include "bpf_first.skel.h"
#define min(a, b) ((a) < (b) ? (a) : (b))
@@ -16,6 +17,10 @@
#define ENOTSUPP 524
#endif
+#ifndef IPPROTO_MPTCP
+#define IPPROTO_MPTCP 262
+#endif
+
static const unsigned int total_bytes = 10 * 1024 * 1024;
static int expected_stg = 0xeB9F;
static int stop, duration;
@@ -85,21 +90,26 @@ static void do_test(const char *tcp_ca, const struct bpf_map *sk_stg_map)
socklen_t addrlen = sizeof(sa6);
void *thread_ret;
char batch[1500];
+ int proto = 0;
int err;
WRITE_ONCE(stop, 0);
- lfd = socket(AF_INET6, SOCK_STREAM, 0);
+ if (!strcmp(tcp_ca, "bpf_first"))
+ proto = IPPROTO_MPTCP;
+
+ lfd = socket(AF_INET6, SOCK_STREAM, proto);
if (CHECK(lfd == -1, "socket", "errno:%d\n", errno))
return;
- fd = socket(AF_INET6, SOCK_STREAM, 0);
+ fd = socket(AF_INET6, SOCK_STREAM, proto);
if (CHECK(fd == -1, "socket", "errno:%d\n", errno)) {
close(lfd);
return;
}
- if (settcpca(lfd, tcp_ca) || settcpca(fd, tcp_ca) ||
- settimeo(lfd, 0) || settimeo(fd, 0))
+ if (!proto &&
+ (settcpca(lfd, tcp_ca) || settcpca(fd, tcp_ca) ||
+ settimeo(lfd, 0) || settimeo(fd, 0)))
goto done;
/* bind, listen and start server thread to accept */
@@ -324,6 +334,28 @@ static void test_rel_setsockopt(void)
bpf_dctcp_release__destroy(rel_skel);
}
+static void test_first(void)
+{
+ struct bpf_first *first_skel;
+ struct bpf_link *link;
+
+ first_skel = bpf_first__open_and_load();
+ if (CHECK(!first_skel, "bpf_first__open_and_load", "failed\n"))
+ return;
+
+ link = bpf_map__attach_struct_ops(first_skel->maps.first);
+ if (!ASSERT_OK_PTR(link, "bpf_map__attach_struct_ops")) {
+ bpf_first__destroy(first_skel);
+ return;
+ }
+
+ system("sysctl -q net.mptcp.scheduler=bpf_first");
+ do_test("bpf_first", NULL);
+
+ bpf_link__destroy(link);
+ bpf_first__destroy(first_skel);
+}
+
void test_bpf_tcp_ca(void)
{
if (test__start_subtest("dctcp"))
@@ -336,4 +368,6 @@ void test_bpf_tcp_ca(void)
test_dctcp_fallback();
if (test__start_subtest("rel_setsockopt"))
test_rel_setsockopt();
+ if (test__start_subtest("first"))
+ test_first();
}
--
2.34.1
^ permalink raw reply related [flat|nested] 9+ messages in thread
end of thread, other threads:[~2022-03-18 2:59 UTC | newest]
Thread overview: 9+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2022-03-18 2:58 [PATCH mptcp-next v2 0/8] BPF packet scheduler Geliang Tang
2022-03-18 2:58 ` [PATCH mptcp-next v2 1/8] mptcp: add struct mptcp_sched_ops Geliang Tang
2022-03-18 2:58 ` [PATCH mptcp-next v2 2/8] mptcp: register default scheduler Geliang Tang
2022-03-18 2:58 ` [PATCH mptcp-next v2 3/8] mptcp: add a new sysctl scheduler Geliang Tang
2022-03-18 2:58 ` [PATCH mptcp-next v2 4/8] mptcp: add sched for struct mptcp_sock Geliang Tang
2022-03-18 2:58 ` [PATCH mptcp-next v2 5/8] mptcp: add mptcp_get_subflow wrapper Geliang Tang
2022-03-18 2:58 ` [PATCH mptcp-next v2 6/8] mptcp: add bpf_mptcp_sched_ops Geliang Tang
2022-03-18 2:58 ` [PATCH mptcp-next v2 7/8] selftests: bpf: add bpf_first scheduler Geliang Tang
2022-03-18 2:58 ` [PATCH mptcp-next v2 8/8] selftests: bpf: add bpf_first test Geliang Tang
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.