MPTCP Linux Development
 help / color / mirror / Atom feed
* [PATCH mptcp-next v2 0/8] BPF packet scheduler
@ 2022-03-18  2:58 Geliang Tang
  2022-03-18  2:58 ` [PATCH mptcp-next v2 1/8] mptcp: add struct mptcp_sched_ops Geliang Tang
                   ` (7 more replies)
  0 siblings, 8 replies; 9+ messages in thread
From: Geliang Tang @ 2022-03-18  2:58 UTC (permalink / raw)
  To: mptcp; +Cc: Geliang Tang

v2:
 - split into more small patches.
 - change all parameters of mptcp_sched_ops from sk to msk:
       void (*init)(struct mptcp_sock *msk);
       void (*release)(struct mptcp_sock *msk);
       struct sock *   (*get_subflow)(struct mptcp_sock *msk);
 - add tests in bpf_tcp_ca.c, instead of adding a new one.

Depends on the bpf patches queued in the patchwork:

c42b1c51caa4 selftests: bpf: add bpf_first test
49747d1d690e selftests: bpf: add bpf_first scheduler
857cc219d855 mptcp: add bpf_mptcp_sched_ops
287b994479ea mptcp: add mptcp_get_subflow wrapper
a626e160359a mptcp: add sched for struct mptcp_sock
7613acc6e70e mptcp: add a new sysctl scheduler
70ff4322bee0 mptcp: register default scheduler
50a302c839de mptcp: add struct mptcp_sched_ops
2b41b6b9a948 selftests: bpf: verify first subflow of mptcp_sock
91e52c32a832 selftests: bpf: verify ca_name of struct mptcp_sock
07a66e354126 Squash to "selftests: bpf: test bpf_skc_to_mptcp_sock"
168ee1d1a118 selftests: bpf: test bpf_skc_to_mptcp_sock
7a773e6af914 Squash to "selftests: bpf: add MPTCP test base"
a1933407b912 bpf: add bpf_skc_to_mptcp_sock_proto
0095d8ca9a67 Revert "bpf: add 'bpf_mptcp_sock' structure and helper"
2856d8c8020a Revert "selftests: bpf: add bpf_mptcp_sock() verifier tests"
df0e2a4c394f (origin/export, origin/HEAD) DO-NOT-MERGE: mptcp: enabled by default

v1:

Addressed to the commends in the RFC version:
https://patchwork.kernel.org/project/mptcp/cover/cover.1631011068.git.geliangtang@xiaomi.com/

Closes: https://github.com/multipath-tcp/mptcp_net-next/issues/75

Geliang Tang (8):
  mptcp: add struct mptcp_sched_ops
  mptcp: register default scheduler
  mptcp: add a new sysctl scheduler
  mptcp: add sched for struct mptcp_sock
  mptcp: add mptcp_get_subflow wrapper
  mptcp: add bpf_mptcp_sched_ops
  selftests: bpf: add bpf_first scheduler
  selftests: bpf: add bpf_first test

 Documentation/networking/mptcp-sysctl.rst     |   8 ++
 include/net/mptcp.h                           |  13 +++
 kernel/bpf/bpf_struct_ops_types.h             |   4 +
 net/mptcp/Makefile                            |   2 +-
 net/mptcp/bpf.c                               | 102 ++++++++++++++++++
 net/mptcp/ctrl.c                              |  14 +++
 net/mptcp/protocol.c                          |  12 ++-
 net/mptcp/protocol.h                          |   9 ++
 net/mptcp/sched.c                             |  89 +++++++++++++++
 tools/testing/selftests/bpf/bpf_tcp_helpers.h |  12 +++
 .../selftests/bpf/prog_tests/bpf_tcp_ca.c     |  42 +++++++-
 tools/testing/selftests/bpf/progs/bpf_first.c |  24 +++++
 12 files changed, 322 insertions(+), 9 deletions(-)
 create mode 100644 net/mptcp/sched.c
 create mode 100644 tools/testing/selftests/bpf/progs/bpf_first.c

-- 
2.34.1


^ permalink raw reply	[flat|nested] 9+ messages in thread

* [PATCH mptcp-next v2 1/8] mptcp: add struct mptcp_sched_ops
  2022-03-18  2:58 [PATCH mptcp-next v2 0/8] BPF packet scheduler Geliang Tang
@ 2022-03-18  2:58 ` Geliang Tang
  2022-03-18  2:58 ` [PATCH mptcp-next v2 2/8] mptcp: register default scheduler Geliang Tang
                   ` (6 subsequent siblings)
  7 siblings, 0 replies; 9+ messages in thread
From: Geliang Tang @ 2022-03-18  2:58 UTC (permalink / raw)
  To: mptcp; +Cc: Geliang Tang

This patch added struct mptcp_sched_ops. And define the scheduler
register, unregister and find functions.

Signed-off-by: Geliang Tang <geliang.tang@suse.com>
---
 include/net/mptcp.h  | 13 ++++++++++
 net/mptcp/Makefile   |  2 +-
 net/mptcp/protocol.h |  3 +++
 net/mptcp/sched.c    | 56 ++++++++++++++++++++++++++++++++++++++++++++
 4 files changed, 73 insertions(+), 1 deletion(-)
 create mode 100644 net/mptcp/sched.c

diff --git a/include/net/mptcp.h b/include/net/mptcp.h
index 8b1afd6f5cc4..e3a0baa8dbd7 100644
--- a/include/net/mptcp.h
+++ b/include/net/mptcp.h
@@ -95,6 +95,19 @@ struct mptcp_out_options {
 #endif
 };
 
+#define MPTCP_SCHED_NAME_MAX 16
+
+struct mptcp_sched_ops {
+	struct sock *	(*get_subflow)(struct mptcp_sock *msk);
+
+	char			name[MPTCP_SCHED_NAME_MAX];
+	struct module		*owner;
+	struct list_head	list;
+
+	void (*init)(struct mptcp_sock *msk);
+	void (*release)(struct mptcp_sock *msk);
+} ____cacheline_aligned_in_smp;
+
 #ifdef CONFIG_MPTCP
 extern struct request_sock_ops mptcp_subflow_request_sock_ops;
 
diff --git a/net/mptcp/Makefile b/net/mptcp/Makefile
index 0a0608b6b4b4..aa5c10d1b80a 100644
--- a/net/mptcp/Makefile
+++ b/net/mptcp/Makefile
@@ -3,7 +3,7 @@ obj-$(CONFIG_MPTCP) += mptcp.o
 ccflags-y += -DDEBUG
 
 mptcp-y := protocol.o subflow.o options.o token.o crypto.o ctrl.o pm.o diag.o \
-	   mib.o pm_netlink.o sockopt.o
+	   mib.o pm_netlink.o sockopt.o sched.o
 
 obj-$(CONFIG_SYN_COOKIES) += syncookies.o
 obj-$(CONFIG_INET_MPTCP_DIAG) += mptcp_diag.o
diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h
index c8bada4537e2..500dc7b3fde8 100644
--- a/net/mptcp/protocol.h
+++ b/net/mptcp/protocol.h
@@ -605,6 +605,9 @@ int mptcp_subflow_create_socket(struct sock *sk, struct socket **new_sock);
 void mptcp_info2sockaddr(const struct mptcp_addr_info *info,
 			 struct sockaddr_storage *addr,
 			 unsigned short family);
+struct mptcp_sched_ops *mptcp_sched_find(const char *name);
+int mptcp_register_scheduler(struct mptcp_sched_ops *sched);
+void mptcp_unregister_scheduler(struct mptcp_sched_ops *sched);
 
 static inline bool __mptcp_subflow_active(struct mptcp_subflow_context *subflow)
 {
diff --git a/net/mptcp/sched.c b/net/mptcp/sched.c
new file mode 100644
index 000000000000..88ddc4aba4ea
--- /dev/null
+++ b/net/mptcp/sched.c
@@ -0,0 +1,56 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Multipath TCP
+ *
+ * Copyright (c) 2022, SUSE.
+ */
+
+#define pr_fmt(fmt) "MPTCP: " fmt
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/indirect_call_wrapper.h>
+#include "protocol.h"
+
+static DEFINE_SPINLOCK(mptcp_sched_list_lock);
+static LIST_HEAD(mptcp_sched_list);
+
+struct mptcp_sched_ops *mptcp_sched_find(const char *name)
+{
+	struct mptcp_sched_ops *ops;
+
+	list_for_each_entry_rcu(ops, &mptcp_sched_list, list) {
+		if (!strcmp(ops->name, name))
+			return ops;
+	}
+
+	return NULL;
+}
+
+int mptcp_register_scheduler(struct mptcp_sched_ops *sched)
+{
+	int ret = 0;
+
+	if (!sched->get_subflow)
+		return -EINVAL;
+
+	spin_lock(&mptcp_sched_list_lock);
+	if (mptcp_sched_find(sched->name)) {
+		pr_debug("%s already registered", sched->name);
+		ret = -EEXIST;
+	} else {
+		list_add_tail_rcu(&sched->list, &mptcp_sched_list);
+		pr_debug("%s registered", sched->name);
+	}
+	spin_unlock(&mptcp_sched_list_lock);
+
+	return ret;
+}
+
+void mptcp_unregister_scheduler(struct mptcp_sched_ops *sched)
+{
+	spin_lock(&mptcp_sched_list_lock);
+	list_del_rcu(&sched->list);
+	spin_unlock(&mptcp_sched_list_lock);
+
+	synchronize_rcu();
+}
-- 
2.34.1


^ permalink raw reply related	[flat|nested] 9+ messages in thread

* [PATCH mptcp-next v2 2/8] mptcp: register default scheduler
  2022-03-18  2:58 [PATCH mptcp-next v2 0/8] BPF packet scheduler Geliang Tang
  2022-03-18  2:58 ` [PATCH mptcp-next v2 1/8] mptcp: add struct mptcp_sched_ops Geliang Tang
@ 2022-03-18  2:58 ` Geliang Tang
  2022-03-18  2:58 ` [PATCH mptcp-next v2 3/8] mptcp: add a new sysctl scheduler Geliang Tang
                   ` (5 subsequent siblings)
  7 siblings, 0 replies; 9+ messages in thread
From: Geliang Tang @ 2022-03-18  2:58 UTC (permalink / raw)
  To: mptcp; +Cc: Geliang Tang

This patch defined the default packet scheduler mptcp_sched_default,
registered it in mptcp_sched_init().

Signed-off-by: Geliang Tang <geliang.tang@suse.com>
---
 net/mptcp/protocol.c |  3 ++-
 net/mptcp/protocol.h |  2 ++
 net/mptcp/sched.c    | 11 +++++++++++
 3 files changed, 15 insertions(+), 1 deletion(-)

diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
index fbb14dfe62b3..11e19ac9d5af 100644
--- a/net/mptcp/protocol.c
+++ b/net/mptcp/protocol.c
@@ -1424,7 +1424,7 @@ bool mptcp_subflow_active(struct mptcp_subflow_context *subflow)
  * returns the subflow that will transmit the next DSS
  * additionally updates the rtx timeout
  */
-static struct sock *mptcp_subflow_get_send(struct mptcp_sock *msk)
+struct sock *mptcp_subflow_get_send(struct mptcp_sock *msk)
 {
 	struct subflow_send_info send_info[SSK_MODE_MAX];
 	struct mptcp_subflow_context *subflow;
@@ -3743,6 +3743,7 @@ void __init mptcp_proto_init(void)
 
 	mptcp_subflow_init();
 	mptcp_pm_init();
+	mptcp_sched_init();
 	mptcp_token_init();
 
 	if (proto_register(&mptcp_prot, MPTCP_USE_SLAB) != 0)
diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h
index 500dc7b3fde8..c186d199cf5b 100644
--- a/net/mptcp/protocol.h
+++ b/net/mptcp/protocol.h
@@ -608,6 +608,8 @@ void mptcp_info2sockaddr(const struct mptcp_addr_info *info,
 struct mptcp_sched_ops *mptcp_sched_find(const char *name);
 int mptcp_register_scheduler(struct mptcp_sched_ops *sched);
 void mptcp_unregister_scheduler(struct mptcp_sched_ops *sched);
+struct sock *mptcp_subflow_get_send(struct mptcp_sock *msk);
+void mptcp_sched_init(void);
 
 static inline bool __mptcp_subflow_active(struct mptcp_subflow_context *subflow)
 {
diff --git a/net/mptcp/sched.c b/net/mptcp/sched.c
index 88ddc4aba4ea..9612e182b7b7 100644
--- a/net/mptcp/sched.c
+++ b/net/mptcp/sched.c
@@ -54,3 +54,14 @@ void mptcp_unregister_scheduler(struct mptcp_sched_ops *sched)
 
 	synchronize_rcu();
 }
+
+static struct mptcp_sched_ops mptcp_sched_default = {
+	.get_subflow    = mptcp_subflow_get_send,
+	.name           = "default",
+	.owner          = THIS_MODULE,
+};
+
+void mptcp_sched_init(void)
+{
+	mptcp_register_scheduler(&mptcp_sched_default);
+}
-- 
2.34.1


^ permalink raw reply related	[flat|nested] 9+ messages in thread

* [PATCH mptcp-next v2 3/8] mptcp: add a new sysctl scheduler
  2022-03-18  2:58 [PATCH mptcp-next v2 0/8] BPF packet scheduler Geliang Tang
  2022-03-18  2:58 ` [PATCH mptcp-next v2 1/8] mptcp: add struct mptcp_sched_ops Geliang Tang
  2022-03-18  2:58 ` [PATCH mptcp-next v2 2/8] mptcp: register default scheduler Geliang Tang
@ 2022-03-18  2:58 ` Geliang Tang
  2022-03-18  2:58 ` [PATCH mptcp-next v2 4/8] mptcp: add sched for struct mptcp_sock Geliang Tang
                   ` (4 subsequent siblings)
  7 siblings, 0 replies; 9+ messages in thread
From: Geliang Tang @ 2022-03-18  2:58 UTC (permalink / raw)
  To: mptcp; +Cc: Geliang Tang

This patch added a new sysctl, named scheduler, to support for selection
of different schedulers.

Signed-off-by: Geliang Tang <geliang.tang@suse.com>
---
 Documentation/networking/mptcp-sysctl.rst |  8 ++++++++
 net/mptcp/ctrl.c                          | 14 ++++++++++++++
 net/mptcp/protocol.h                      |  1 +
 3 files changed, 23 insertions(+)

diff --git a/Documentation/networking/mptcp-sysctl.rst b/Documentation/networking/mptcp-sysctl.rst
index e263dfcc4b40..d9e69fdc7ea3 100644
--- a/Documentation/networking/mptcp-sysctl.rst
+++ b/Documentation/networking/mptcp-sysctl.rst
@@ -75,3 +75,11 @@ stale_loss_cnt - INTEGER
 	This is a per-namespace sysctl.
 
 	Default: 4
+
+scheduler - STRING
+	Select the scheduler of your choice.
+
+	Support for selection of different schedulers. This is a per-namespace
+	sysctl.
+
+	Default: "default"
diff --git a/net/mptcp/ctrl.c b/net/mptcp/ctrl.c
index ae20b7d92e28..c46c22a84d23 100644
--- a/net/mptcp/ctrl.c
+++ b/net/mptcp/ctrl.c
@@ -32,6 +32,7 @@ struct mptcp_pernet {
 	u8 checksum_enabled;
 	u8 allow_join_initial_addr_port;
 	u8 pm_type;
+	char scheduler[MPTCP_SCHED_NAME_MAX];
 };
 
 static struct mptcp_pernet *mptcp_get_pernet(const struct net *net)
@@ -69,6 +70,11 @@ int mptcp_get_pm_type(const struct net *net)
 	return mptcp_get_pernet(net)->pm_type;
 }
 
+const char *mptcp_get_scheduler(const struct net *net)
+{
+	return mptcp_get_pernet(net)->scheduler;
+}
+
 static void mptcp_pernet_set_defaults(struct mptcp_pernet *pernet)
 {
 	pernet->mptcp_enabled = 1;
@@ -77,6 +83,7 @@ static void mptcp_pernet_set_defaults(struct mptcp_pernet *pernet)
 	pernet->allow_join_initial_addr_port = 1;
 	pernet->stale_loss_cnt = 4;
 	pernet->pm_type = MPTCP_PM_TYPE_KERNEL;
+	strcpy(pernet->scheduler, "default");
 }
 
 #ifdef CONFIG_SYSCTL
@@ -128,6 +135,12 @@ static struct ctl_table mptcp_sysctl_table[] = {
 		.extra1       = SYSCTL_ZERO,
 		.extra2       = &mptcp_pm_type_max
 	},
+	{
+		.procname = "scheduler",
+		.maxlen	= MPTCP_SCHED_NAME_MAX,
+		.mode = 0644,
+		.proc_handler = proc_dostring,
+	},
 	{}
 };
 
@@ -149,6 +162,7 @@ static int mptcp_pernet_new_table(struct net *net, struct mptcp_pernet *pernet)
 	table[3].data = &pernet->allow_join_initial_addr_port;
 	table[4].data = &pernet->stale_loss_cnt;
 	table[5].data = &pernet->pm_type;
+	table[6].data = &pernet->scheduler;
 
 	hdr = register_net_sysctl(net, MPTCP_SYSCTL_PATH, table);
 	if (!hdr)
diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h
index c186d199cf5b..f0bd2e17f2d6 100644
--- a/net/mptcp/protocol.h
+++ b/net/mptcp/protocol.h
@@ -583,6 +583,7 @@ int mptcp_is_checksum_enabled(const struct net *net);
 int mptcp_allow_join_id0(const struct net *net);
 unsigned int mptcp_stale_loss_cnt(const struct net *net);
 int mptcp_get_pm_type(const struct net *net);
+const char *mptcp_get_scheduler(const struct net *net);
 void mptcp_subflow_fully_established(struct mptcp_subflow_context *subflow,
 				     struct mptcp_options_received *mp_opt);
 bool __mptcp_retransmit_pending_data(struct sock *sk);
-- 
2.34.1


^ permalink raw reply related	[flat|nested] 9+ messages in thread

* [PATCH mptcp-next v2 4/8] mptcp: add sched for struct mptcp_sock
  2022-03-18  2:58 [PATCH mptcp-next v2 0/8] BPF packet scheduler Geliang Tang
                   ` (2 preceding siblings ...)
  2022-03-18  2:58 ` [PATCH mptcp-next v2 3/8] mptcp: add a new sysctl scheduler Geliang Tang
@ 2022-03-18  2:58 ` Geliang Tang
  2022-03-18  2:58 ` [PATCH mptcp-next v2 5/8] mptcp: add mptcp_get_subflow wrapper Geliang Tang
                   ` (3 subsequent siblings)
  7 siblings, 0 replies; 9+ messages in thread
From: Geliang Tang @ 2022-03-18  2:58 UTC (permalink / raw)
  To: mptcp; +Cc: Geliang Tang

This patch added a new struct member sched in struct mptcp_sock. Set it
in mptcp_set_sched() with the scheduler sysctl.

Signed-off-by: Geliang Tang <geliang.tang@suse.com>
---
 net/mptcp/protocol.c |  3 +++
 net/mptcp/protocol.h |  2 ++
 net/mptcp/sched.c    | 16 ++++++++++++++++
 3 files changed, 21 insertions(+)

diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
index 11e19ac9d5af..b6b8f41a3ccb 100644
--- a/net/mptcp/protocol.c
+++ b/net/mptcp/protocol.c
@@ -2609,6 +2609,7 @@ static int mptcp_init_sock(struct sock *sk)
 	 * propagate the correct value
 	 */
 	mptcp_ca_reset(sk);
+	mptcp_set_sched(sk);
 
 	sk_sockets_allocated_inc(sk);
 	sk->sk_rcvbuf = sock_net(sk)->ipv4.sysctl_tcp_rmem[1];
@@ -2760,6 +2761,7 @@ static void __mptcp_destroy_sock(struct sock *sk)
 	sk_stop_timer(sk, &msk->sk.icsk_retransmit_timer);
 	sk_stop_timer(sk, &sk->sk_timer);
 	msk->pm.status = 0;
+	msk->sched = NULL;
 
 	/* clears msk->subflow, allowing the following loop to close
 	 * even the initial subflow
@@ -2933,6 +2935,7 @@ struct sock *mptcp_sk_clone(const struct sock *sk,
 	msk->snd_una = msk->write_seq;
 	msk->wnd_end = msk->snd_nxt + req->rsk_rcv_wnd;
 	msk->setsockopt_seq = mptcp_sk(sk)->setsockopt_seq;
+	msk->sched = mptcp_sk(sk)->sched;
 
 	if (mp_opt->suboptions & OPTIONS_MPTCP_MPC) {
 		msk->can_ack = true;
diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h
index f0bd2e17f2d6..d18c54e2f28c 100644
--- a/net/mptcp/protocol.h
+++ b/net/mptcp/protocol.h
@@ -286,6 +286,7 @@ struct mptcp_sock {
 	struct socket	*subflow; /* outgoing connect/listener/!mp_capable */
 	struct sock	*first;
 	struct mptcp_pm_data	pm;
+	struct mptcp_sched_ops	*sched;
 	struct {
 		u32	space;	/* bytes copied in last measurement window */
 		u32	copied; /* bytes copied in this measurement window */
@@ -609,6 +610,7 @@ void mptcp_info2sockaddr(const struct mptcp_addr_info *info,
 struct mptcp_sched_ops *mptcp_sched_find(const char *name);
 int mptcp_register_scheduler(struct mptcp_sched_ops *sched);
 void mptcp_unregister_scheduler(struct mptcp_sched_ops *sched);
+void mptcp_set_sched(struct sock *sk);
 struct sock *mptcp_subflow_get_send(struct mptcp_sock *msk);
 void mptcp_sched_init(void);
 
diff --git a/net/mptcp/sched.c b/net/mptcp/sched.c
index 9612e182b7b7..727d185af86e 100644
--- a/net/mptcp/sched.c
+++ b/net/mptcp/sched.c
@@ -55,6 +55,22 @@ void mptcp_unregister_scheduler(struct mptcp_sched_ops *sched)
 	synchronize_rcu();
 }
 
+void mptcp_set_sched(struct sock *sk)
+{
+	struct mptcp_sock *msk = mptcp_sk(sk);
+	struct net *net = sock_net(sk);
+
+	msk->sched = mptcp_sched_find(mptcp_get_scheduler(net));
+	if (!msk->sched) {
+		pr_debug("sched %s not found", mptcp_get_scheduler(net));
+		return;
+	}
+
+	pr_debug("sched=%s", msk->sched->name);
+	if (msk->sched->init)
+		msk->sched->init(msk);
+}
+
 static struct mptcp_sched_ops mptcp_sched_default = {
 	.get_subflow    = mptcp_subflow_get_send,
 	.name           = "default",
-- 
2.34.1


^ permalink raw reply related	[flat|nested] 9+ messages in thread

* [PATCH mptcp-next v2 5/8] mptcp: add mptcp_get_subflow wrapper
  2022-03-18  2:58 [PATCH mptcp-next v2 0/8] BPF packet scheduler Geliang Tang
                   ` (3 preceding siblings ...)
  2022-03-18  2:58 ` [PATCH mptcp-next v2 4/8] mptcp: add sched for struct mptcp_sock Geliang Tang
@ 2022-03-18  2:58 ` Geliang Tang
  2022-03-18  2:58 ` [PATCH mptcp-next v2 6/8] mptcp: add bpf_mptcp_sched_ops Geliang Tang
                   ` (2 subsequent siblings)
  7 siblings, 0 replies; 9+ messages in thread
From: Geliang Tang @ 2022-03-18  2:58 UTC (permalink / raw)
  To: mptcp; +Cc: Geliang Tang

This patch defined a new wrapper mptcp_get_subflow(), used it instead of
mptcp_subflow_get_send().

Signed-off-by: Geliang Tang <geliang.tang@suse.com>
---
 net/mptcp/protocol.c | 6 +++---
 net/mptcp/protocol.h | 1 +
 net/mptcp/sched.c    | 6 ++++++
 3 files changed, 10 insertions(+), 3 deletions(-)

diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
index b6b8f41a3ccb..5222849eacb1 100644
--- a/net/mptcp/protocol.c
+++ b/net/mptcp/protocol.c
@@ -1567,7 +1567,7 @@ void __mptcp_push_pending(struct sock *sk, unsigned int flags)
 			int ret = 0;
 
 			prev_ssk = ssk;
-			ssk = mptcp_subflow_get_send(msk);
+			ssk = mptcp_get_subflow(msk);
 
 			/* First check. If the ssk has changed since
 			 * the last round, release prev_ssk
@@ -1634,7 +1634,7 @@ static void __mptcp_subflow_push_pending(struct sock *sk, struct sock *ssk)
 			 * check for a different subflow usage only after
 			 * spooling the first chunk of data
 			 */
-			xmit_ssk = first ? ssk : mptcp_subflow_get_send(mptcp_sk(sk));
+			xmit_ssk = first ? ssk : mptcp_get_subflow(mptcp_sk(sk));
 			if (!xmit_ssk)
 				goto out;
 			if (xmit_ssk != ssk) {
@@ -3073,7 +3073,7 @@ void __mptcp_check_push(struct sock *sk, struct sock *ssk)
 		return;
 
 	if (!sock_owned_by_user(sk)) {
-		struct sock *xmit_ssk = mptcp_subflow_get_send(mptcp_sk(sk));
+		struct sock *xmit_ssk = mptcp_get_subflow(mptcp_sk(sk));
 
 		if (xmit_ssk == ssk)
 			__mptcp_subflow_push_pending(sk, ssk);
diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h
index d18c54e2f28c..8871a17f8b82 100644
--- a/net/mptcp/protocol.h
+++ b/net/mptcp/protocol.h
@@ -613,6 +613,7 @@ void mptcp_unregister_scheduler(struct mptcp_sched_ops *sched);
 void mptcp_set_sched(struct sock *sk);
 struct sock *mptcp_subflow_get_send(struct mptcp_sock *msk);
 void mptcp_sched_init(void);
+struct sock *mptcp_get_subflow(struct mptcp_sock *msk);
 
 static inline bool __mptcp_subflow_active(struct mptcp_subflow_context *subflow)
 {
diff --git a/net/mptcp/sched.c b/net/mptcp/sched.c
index 727d185af86e..61cbb0237beb 100644
--- a/net/mptcp/sched.c
+++ b/net/mptcp/sched.c
@@ -81,3 +81,9 @@ void mptcp_sched_init(void)
 {
 	mptcp_register_scheduler(&mptcp_sched_default);
 }
+
+struct sock *mptcp_get_subflow(struct mptcp_sock *msk)
+{
+	return INDIRECT_CALL_INET_1(msk->sched->get_subflow,
+				    mptcp_subflow_get_send, msk);
+}
-- 
2.34.1


^ permalink raw reply related	[flat|nested] 9+ messages in thread

* [PATCH mptcp-next v2 6/8] mptcp: add bpf_mptcp_sched_ops
  2022-03-18  2:58 [PATCH mptcp-next v2 0/8] BPF packet scheduler Geliang Tang
                   ` (4 preceding siblings ...)
  2022-03-18  2:58 ` [PATCH mptcp-next v2 5/8] mptcp: add mptcp_get_subflow wrapper Geliang Tang
@ 2022-03-18  2:58 ` Geliang Tang
  2022-03-18  2:58 ` [PATCH mptcp-next v2 7/8] selftests: bpf: add bpf_first scheduler Geliang Tang
  2022-03-18  2:58 ` [PATCH mptcp-next v2 8/8] selftests: bpf: add bpf_first test Geliang Tang
  7 siblings, 0 replies; 9+ messages in thread
From: Geliang Tang @ 2022-03-18  2:58 UTC (permalink / raw)
  To: mptcp; +Cc: Geliang Tang

This patch implemented a new struct bpf_struct_ops, bpf_mptcp_sched_ops.
Register and unregister the mptcp scheduler in .reg and .unreg.

Signed-off-by: Geliang Tang <geliang.tang@suse.com>
---
 kernel/bpf/bpf_struct_ops_types.h |   4 ++
 net/mptcp/bpf.c                   | 102 ++++++++++++++++++++++++++++++
 2 files changed, 106 insertions(+)

diff --git a/kernel/bpf/bpf_struct_ops_types.h b/kernel/bpf/bpf_struct_ops_types.h
index 5678a9ddf817..5a6b0c0d8d3d 100644
--- a/kernel/bpf/bpf_struct_ops_types.h
+++ b/kernel/bpf/bpf_struct_ops_types.h
@@ -8,5 +8,9 @@ BPF_STRUCT_OPS_TYPE(bpf_dummy_ops)
 #ifdef CONFIG_INET
 #include <net/tcp.h>
 BPF_STRUCT_OPS_TYPE(tcp_congestion_ops)
+#ifdef CONFIG_MPTCP
+#include <net/mptcp.h>
+BPF_STRUCT_OPS_TYPE(mptcp_sched_ops)
+#endif
 #endif
 #endif
diff --git a/net/mptcp/bpf.c b/net/mptcp/bpf.c
index da79dae559b2..5f7f9d30ecd4 100644
--- a/net/mptcp/bpf.c
+++ b/net/mptcp/bpf.c
@@ -8,8 +8,110 @@
  */
 
 #include <linux/bpf.h>
+#include <linux/bpf_verifier.h>
+#include <linux/btf.h>
+#include <linux/btf_ids.h>
 #include "protocol.h"
 
+extern struct bpf_struct_ops bpf_mptcp_sched_ops;
+extern struct btf *btf_vmlinux;
+
+static u32 optional_ops[] = {
+	offsetof(struct mptcp_sched_ops, init),
+	offsetof(struct mptcp_sched_ops, release),
+	offsetof(struct mptcp_sched_ops, get_subflow),
+};
+
+static const struct bpf_func_proto *
+bpf_mptcp_sched_get_func_proto(enum bpf_func_id func_id,
+			       const struct bpf_prog *prog)
+{
+	return bpf_base_func_proto(func_id);
+}
+
+static const struct bpf_verifier_ops bpf_mptcp_sched_verifier_ops = {
+	.get_func_proto		= bpf_mptcp_sched_get_func_proto,
+	.is_valid_access	= btf_ctx_access,
+	.btf_struct_access	= btf_struct_access,
+};
+
+static int bpf_mptcp_sched_reg(void *kdata)
+{
+	return mptcp_register_scheduler(kdata);
+}
+
+static void bpf_mptcp_sched_unreg(void *kdata)
+{
+	mptcp_unregister_scheduler(kdata);
+}
+
+static int bpf_mptcp_sched_check_member(const struct btf_type *t,
+					const struct btf_member *member)
+{
+	return 0;
+}
+
+static bool is_optional(u32 member_offset)
+{
+	unsigned int i;
+
+	for (i = 0; i < ARRAY_SIZE(optional_ops); i++) {
+		if (member_offset == optional_ops[i])
+			return true;
+	}
+
+	return false;
+}
+
+static int bpf_mptcp_sched_init_member(const struct btf_type *t,
+				       const struct btf_member *member,
+				       void *kdata, const void *udata)
+{
+	const struct mptcp_sched_ops *usched;
+	struct mptcp_sched_ops *sched;
+	int prog_fd;
+	u32 moff;
+
+	usched = (const struct mptcp_sched_ops *)udata;
+	sched = (struct mptcp_sched_ops *)kdata;
+
+	moff = __btf_member_bit_offset(t, member) / 8;
+	switch (moff) {
+	case offsetof(struct mptcp_sched_ops, name):
+		if (bpf_obj_name_cpy(sched->name, usched->name,
+				     sizeof(sched->name)) <= 0)
+			return -EINVAL;
+		if (mptcp_sched_find(usched->name))
+			return -EEXIST;
+		return 1;
+	}
+
+	if (!btf_type_resolve_func_ptr(btf_vmlinux, member->type, NULL))
+		return 0;
+
+	/* Ensure bpf_prog is provided for compulsory func ptr */
+	prog_fd = (int)(*(unsigned long *)(udata + moff));
+	if (!prog_fd && !is_optional(moff))
+		return -EINVAL;
+
+	return 0;
+}
+
+static int bpf_mptcp_sched_init(struct btf *btf)
+{
+	return 0;
+}
+
+struct bpf_struct_ops bpf_mptcp_sched_ops = {
+	.verifier_ops	= &bpf_mptcp_sched_verifier_ops,
+	.reg		= bpf_mptcp_sched_reg,
+	.unreg		= bpf_mptcp_sched_unreg,
+	.check_member	= bpf_mptcp_sched_check_member,
+	.init_member	= bpf_mptcp_sched_init_member,
+	.init		= bpf_mptcp_sched_init,
+	.name		= "mptcp_sched_ops",
+};
+
 struct mptcp_sock *bpf_mptcp_sock_from_subflow(struct sock *sk)
 {
 	if (sk && sk_fullsock(sk) && sk->sk_protocol == IPPROTO_TCP && sk_is_mptcp(sk))
-- 
2.34.1


^ permalink raw reply related	[flat|nested] 9+ messages in thread

* [PATCH mptcp-next v2 7/8] selftests: bpf: add bpf_first scheduler
  2022-03-18  2:58 [PATCH mptcp-next v2 0/8] BPF packet scheduler Geliang Tang
                   ` (5 preceding siblings ...)
  2022-03-18  2:58 ` [PATCH mptcp-next v2 6/8] mptcp: add bpf_mptcp_sched_ops Geliang Tang
@ 2022-03-18  2:58 ` Geliang Tang
  2022-03-18  2:58 ` [PATCH mptcp-next v2 8/8] selftests: bpf: add bpf_first test Geliang Tang
  7 siblings, 0 replies; 9+ messages in thread
From: Geliang Tang @ 2022-03-18  2:58 UTC (permalink / raw)
  To: mptcp; +Cc: Geliang Tang

This patch implemented the simplest MPTCP scheduler, named bpf_first,
which always picks the first subflow to send data.

Signed-off-by: Geliang Tang <geliang.tang@suse.com>
---
 tools/testing/selftests/bpf/bpf_tcp_helpers.h | 12 ++++++++++
 tools/testing/selftests/bpf/progs/bpf_first.c | 24 +++++++++++++++++++
 2 files changed, 36 insertions(+)
 create mode 100644 tools/testing/selftests/bpf/progs/bpf_first.c

diff --git a/tools/testing/selftests/bpf/bpf_tcp_helpers.h b/tools/testing/selftests/bpf/bpf_tcp_helpers.h
index f92357597e63..5bb21b24231f 100644
--- a/tools/testing/selftests/bpf/bpf_tcp_helpers.h
+++ b/tools/testing/selftests/bpf/bpf_tcp_helpers.h
@@ -197,6 +197,18 @@ struct tcp_congestion_ops {
 	void *owner;
 };
 
+#define MPTCP_SCHED_NAME_MAX 16
+
+struct mptcp_sched_ops {
+	char name[MPTCP_SCHED_NAME_MAX];
+
+	void (*init)(struct mptcp_sock *msk);
+	void (*release)(struct mptcp_sock *msk);
+
+	struct sock *	(*get_subflow)(struct mptcp_sock *msk);
+	void *owner;
+};
+
 #define min(a, b) ((a) < (b) ? (a) : (b))
 #define max(a, b) ((a) > (b) ? (a) : (b))
 #define min_not_zero(x, y) ({			\
diff --git a/tools/testing/selftests/bpf/progs/bpf_first.c b/tools/testing/selftests/bpf/progs/bpf_first.c
new file mode 100644
index 000000000000..ad6822d61741
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/bpf_first.c
@@ -0,0 +1,24 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Copyright (c) 2022, SUSE. */
+
+#include <linux/bpf.h>
+#include "bpf_tcp_helpers.h"
+
+char _license[] SEC("license") = "GPL";
+
+SEC("struct_ops/mptcp_sched_first_init")
+void BPF_PROG(mptcp_sched_first_init, struct mptcp_sock *msk)
+{
+}
+
+struct sock *BPF_STRUCT_OPS(bpf_first_get_subflow, struct mptcp_sock *msk)
+{
+	return msk->first;
+}
+
+SEC(".struct_ops")
+struct mptcp_sched_ops first = {
+	.init 		= (void *)mptcp_sched_first_init,
+	.get_subflow	= (void *)bpf_first_get_subflow,
+	.name		= "bpf_first",
+};
-- 
2.34.1


^ permalink raw reply related	[flat|nested] 9+ messages in thread

* [PATCH mptcp-next v2 8/8] selftests: bpf: add bpf_first test
  2022-03-18  2:58 [PATCH mptcp-next v2 0/8] BPF packet scheduler Geliang Tang
                   ` (6 preceding siblings ...)
  2022-03-18  2:58 ` [PATCH mptcp-next v2 7/8] selftests: bpf: add bpf_first scheduler Geliang Tang
@ 2022-03-18  2:58 ` Geliang Tang
  7 siblings, 0 replies; 9+ messages in thread
From: Geliang Tang @ 2022-03-18  2:58 UTC (permalink / raw)
  To: mptcp; +Cc: Geliang Tang

This patch added the MPTCP sched testcases. Use sysctl to set
net.mptcp.scheduler in the tests.

Signed-off-by: Geliang Tang <geliang.tang@suse.com>
---
 .../selftests/bpf/prog_tests/bpf_tcp_ca.c     | 42 +++++++++++++++++--
 1 file changed, 38 insertions(+), 4 deletions(-)

diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c b/tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c
index 8f7a1cef7d87..979b6e90b372 100644
--- a/tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c
+++ b/tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c
@@ -9,6 +9,7 @@
 #include "bpf_cubic.skel.h"
 #include "bpf_tcp_nogpl.skel.h"
 #include "bpf_dctcp_release.skel.h"
+#include "bpf_first.skel.h"
 
 #define min(a, b) ((a) < (b) ? (a) : (b))
 
@@ -16,6 +17,10 @@
 #define ENOTSUPP 524
 #endif
 
+#ifndef IPPROTO_MPTCP
+#define IPPROTO_MPTCP 262
+#endif
+
 static const unsigned int total_bytes = 10 * 1024 * 1024;
 static int expected_stg = 0xeB9F;
 static int stop, duration;
@@ -85,21 +90,26 @@ static void do_test(const char *tcp_ca, const struct bpf_map *sk_stg_map)
 	socklen_t addrlen = sizeof(sa6);
 	void *thread_ret;
 	char batch[1500];
+	int proto = 0;
 	int err;
 
 	WRITE_ONCE(stop, 0);
 
-	lfd = socket(AF_INET6, SOCK_STREAM, 0);
+	if (!strcmp(tcp_ca, "bpf_first"))
+		proto = IPPROTO_MPTCP;
+
+	lfd = socket(AF_INET6, SOCK_STREAM, proto);
 	if (CHECK(lfd == -1, "socket", "errno:%d\n", errno))
 		return;
-	fd = socket(AF_INET6, SOCK_STREAM, 0);
+	fd = socket(AF_INET6, SOCK_STREAM, proto);
 	if (CHECK(fd == -1, "socket", "errno:%d\n", errno)) {
 		close(lfd);
 		return;
 	}
 
-	if (settcpca(lfd, tcp_ca) || settcpca(fd, tcp_ca) ||
-	    settimeo(lfd, 0) || settimeo(fd, 0))
+	if (!proto &&
+	    (settcpca(lfd, tcp_ca) || settcpca(fd, tcp_ca) ||
+	     settimeo(lfd, 0) || settimeo(fd, 0)))
 		goto done;
 
 	/* bind, listen and start server thread to accept */
@@ -324,6 +334,28 @@ static void test_rel_setsockopt(void)
 	bpf_dctcp_release__destroy(rel_skel);
 }
 
+static void test_first(void)
+{
+	struct bpf_first *first_skel;
+	struct bpf_link *link;
+
+	first_skel = bpf_first__open_and_load();
+	if (CHECK(!first_skel, "bpf_first__open_and_load", "failed\n"))
+		return;
+
+	link = bpf_map__attach_struct_ops(first_skel->maps.first);
+	if (!ASSERT_OK_PTR(link, "bpf_map__attach_struct_ops")) {
+		bpf_first__destroy(first_skel);
+		return;
+	}
+
+	system("sysctl -q net.mptcp.scheduler=bpf_first");
+	do_test("bpf_first", NULL);
+
+	bpf_link__destroy(link);
+	bpf_first__destroy(first_skel);
+}
+
 void test_bpf_tcp_ca(void)
 {
 	if (test__start_subtest("dctcp"))
@@ -336,4 +368,6 @@ void test_bpf_tcp_ca(void)
 		test_dctcp_fallback();
 	if (test__start_subtest("rel_setsockopt"))
 		test_rel_setsockopt();
+	if (test__start_subtest("first"))
+		test_first();
 }
-- 
2.34.1


^ permalink raw reply related	[flat|nested] 9+ messages in thread

end of thread, other threads:[~2022-03-18  2:59 UTC | newest]

Thread overview: 9+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2022-03-18  2:58 [PATCH mptcp-next v2 0/8] BPF packet scheduler Geliang Tang
2022-03-18  2:58 ` [PATCH mptcp-next v2 1/8] mptcp: add struct mptcp_sched_ops Geliang Tang
2022-03-18  2:58 ` [PATCH mptcp-next v2 2/8] mptcp: register default scheduler Geliang Tang
2022-03-18  2:58 ` [PATCH mptcp-next v2 3/8] mptcp: add a new sysctl scheduler Geliang Tang
2022-03-18  2:58 ` [PATCH mptcp-next v2 4/8] mptcp: add sched for struct mptcp_sock Geliang Tang
2022-03-18  2:58 ` [PATCH mptcp-next v2 5/8] mptcp: add mptcp_get_subflow wrapper Geliang Tang
2022-03-18  2:58 ` [PATCH mptcp-next v2 6/8] mptcp: add bpf_mptcp_sched_ops Geliang Tang
2022-03-18  2:58 ` [PATCH mptcp-next v2 7/8] selftests: bpf: add bpf_first scheduler Geliang Tang
2022-03-18  2:58 ` [PATCH mptcp-next v2 8/8] selftests: bpf: add bpf_first test Geliang Tang

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox