public inbox for bpf@vger.kernel.org
 help / color / mirror / Atom feed
From: Kui-Feng Lee <kuifeng@meta.com>
To: <bpf@vger.kernel.org>, <ast@kernel.org>, <martin.lau@linux.dev>,
	<song@kernel.org>, <kernel-team@meta.com>, <andrii@kernel.org>,
	<sdf@google.com>
Cc: Kui-Feng Lee <kuifeng@meta.com>
Subject: [PATCH bpf-next v5 2/8] net: Update an existing TCP congestion control algorithm.
Date: Tue, 7 Mar 2023 16:50:44 -0800	[thread overview]
Message-ID: <20230308005050.255859-3-kuifeng@meta.com> (raw)
In-Reply-To: <20230308005050.255859-1-kuifeng@meta.com>

This feature lets you immediately transition to another congestion
control algorithm or implementation with the same name.  Once a name
is updated, new connections will apply this new algorithm.

The 'validate' function pointer has been added to bpf_struct_ops as
well, allowing us to validate a struct_ops without having to go
through the registration process.

Signed-off-by: Kui-Feng Lee <kuifeng@meta.com>
---
 include/linux/bpf.h            |  2 ++
 include/net/tcp.h              |  3 ++
 net/bpf/bpf_dummy_struct_ops.c |  6 ++++
 net/ipv4/bpf_tcp_ca.c          | 14 +++++++--
 net/ipv4/tcp_cong.c            | 57 +++++++++++++++++++++++++++++-----
 5 files changed, 73 insertions(+), 9 deletions(-)

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 50cfc2388cbc..00b6e1a2edaf 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -1512,6 +1512,8 @@ struct bpf_struct_ops {
 			   void *kdata, const void *udata);
 	int (*reg)(void *kdata);
 	void (*unreg)(void *kdata);
+	int (*update)(void *kdata, void *old_kdata);
+	int (*validate)(void *kdata);
 	const struct btf_type *type;
 	const struct btf_type *value_type;
 	const char *name;
diff --git a/include/net/tcp.h b/include/net/tcp.h
index db9f828e9d1e..2abb755e6a3a 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -1117,6 +1117,9 @@ struct tcp_congestion_ops {
 
 int tcp_register_congestion_control(struct tcp_congestion_ops *type);
 void tcp_unregister_congestion_control(struct tcp_congestion_ops *type);
+int tcp_update_congestion_control(struct tcp_congestion_ops *type,
+				  struct tcp_congestion_ops *old_type);
+int tcp_validate_congestion_control(struct tcp_congestion_ops *ca);
 
 void tcp_assign_congestion_control(struct sock *sk);
 void tcp_init_congestion_control(struct sock *sk);
diff --git a/net/bpf/bpf_dummy_struct_ops.c b/net/bpf/bpf_dummy_struct_ops.c
index ff4f89a2b02a..158f14e240d0 100644
--- a/net/bpf/bpf_dummy_struct_ops.c
+++ b/net/bpf/bpf_dummy_struct_ops.c
@@ -222,12 +222,18 @@ static void bpf_dummy_unreg(void *kdata)
 {
 }
 
+static int bpf_dummy_update(void *kdata, void *old_kdata)
+{
+	return -EOPNOTSUPP;
+}
+
 struct bpf_struct_ops bpf_bpf_dummy_ops = {
 	.verifier_ops = &bpf_dummy_verifier_ops,
 	.init = bpf_dummy_init,
 	.check_member = bpf_dummy_ops_check_member,
 	.init_member = bpf_dummy_init_member,
 	.reg = bpf_dummy_reg,
+	.update = bpf_dummy_update,
 	.unreg = bpf_dummy_unreg,
 	.name = "bpf_dummy_ops",
 };
diff --git a/net/ipv4/bpf_tcp_ca.c b/net/ipv4/bpf_tcp_ca.c
index 13fc0c185cd9..e8b27826283e 100644
--- a/net/ipv4/bpf_tcp_ca.c
+++ b/net/ipv4/bpf_tcp_ca.c
@@ -239,8 +239,6 @@ static int bpf_tcp_ca_init_member(const struct btf_type *t,
 		if (bpf_obj_name_cpy(tcp_ca->name, utcp_ca->name,
 				     sizeof(tcp_ca->name)) <= 0)
 			return -EINVAL;
-		if (tcp_ca_find(utcp_ca->name))
-			return -EEXIST;
 		return 1;
 	}
 
@@ -266,13 +264,25 @@ static void bpf_tcp_ca_unreg(void *kdata)
 	tcp_unregister_congestion_control(kdata);
 }
 
+static int bpf_tcp_ca_update(void *kdata, void *old_kdata)
+{
+	return tcp_update_congestion_control(kdata, old_kdata);
+}
+
+static int bpf_tcp_ca_validate(void *kdata)
+{
+	return tcp_validate_congestion_control(kdata);
+}
+
 struct bpf_struct_ops bpf_tcp_congestion_ops = {
 	.verifier_ops = &bpf_tcp_ca_verifier_ops,
 	.reg = bpf_tcp_ca_reg,
 	.unreg = bpf_tcp_ca_unreg,
+	.update = bpf_tcp_ca_update,
 	.check_member = bpf_tcp_ca_check_member,
 	.init_member = bpf_tcp_ca_init_member,
 	.init = bpf_tcp_ca_init,
+	.validate = bpf_tcp_ca_validate,
 	.name = "tcp_congestion_ops",
 };
 
diff --git a/net/ipv4/tcp_cong.c b/net/ipv4/tcp_cong.c
index db8b4b488c31..24829390e495 100644
--- a/net/ipv4/tcp_cong.c
+++ b/net/ipv4/tcp_cong.c
@@ -75,14 +75,8 @@ struct tcp_congestion_ops *tcp_ca_find_key(u32 key)
 	return NULL;
 }
 
-/*
- * Attach new congestion control algorithm to the list
- * of available options.
- */
-int tcp_register_congestion_control(struct tcp_congestion_ops *ca)
+int tcp_validate_congestion_control(struct tcp_congestion_ops *ca)
 {
-	int ret = 0;
-
 	/* all algorithms must implement these */
 	if (!ca->ssthresh || !ca->undo_cwnd ||
 	    !(ca->cong_avoid || ca->cong_control)) {
@@ -90,6 +84,20 @@ int tcp_register_congestion_control(struct tcp_congestion_ops *ca)
 		return -EINVAL;
 	}
 
+	return 0;
+}
+
+/* Attach new congestion control algorithm to the list
+ * of available options.
+ */
+int tcp_register_congestion_control(struct tcp_congestion_ops *ca)
+{
+	int ret;
+
+	ret = tcp_validate_congestion_control(ca);
+	if (ret)
+		return ret;
+
 	ca->key = jhash(ca->name, sizeof(ca->name), strlen(ca->name));
 
 	spin_lock(&tcp_cong_list_lock);
@@ -130,6 +138,41 @@ void tcp_unregister_congestion_control(struct tcp_congestion_ops *ca)
 }
 EXPORT_SYMBOL_GPL(tcp_unregister_congestion_control);
 
+/* Replace a registered old ca with a new one.
+ *
+ * The new ca must have the same name as the old one, that has been
+ * registered.
+ */
+int tcp_update_congestion_control(struct tcp_congestion_ops *ca, struct tcp_congestion_ops *old_ca)
+{
+	struct tcp_congestion_ops *existing;
+	int ret;
+
+	ret = tcp_validate_congestion_control(ca);
+	if (ret)
+		return ret;
+
+	ca->key = jhash(ca->name, sizeof(ca->name), strlen(ca->name));
+
+	spin_lock(&tcp_cong_list_lock);
+	existing = tcp_ca_find_key(old_ca->key);
+	if (ca->key == TCP_CA_UNSPEC || !existing || strcmp(existing->name, ca->name)) {
+		pr_notice("%s not registered or non-unique key\n",
+			  ca->name);
+		ret = -EINVAL;
+	} else if (existing != old_ca) {
+		pr_notice("invalid old congestion control algorithm to replace\n");
+		ret = -EINVAL;
+	} else {
+		list_add_tail_rcu(&ca->list, &tcp_cong_list);
+		list_del_rcu(&existing->list);
+		pr_debug("%s updated\n", ca->name);
+	}
+	spin_unlock(&tcp_cong_list_lock);
+
+	return ret;
+}
+
 u32 tcp_ca_get_key_by_name(struct net *net, const char *name, bool *ecn_ca)
 {
 	const struct tcp_congestion_ops *ca;
-- 
2.34.1


  parent reply	other threads:[~2023-03-08  0:51 UTC|newest]

Thread overview: 28+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2023-03-08  0:50 [PATCH bpf-next v5 0/8] Transit between BPF TCP congestion controls Kui-Feng Lee
2023-03-08  0:50 ` [PATCH bpf-next v5 1/8] bpf: Retire the struct_ops map kvalue->refcnt Kui-Feng Lee
2023-03-08 18:30   ` Martin KaFai Lau
2023-03-08 22:30     ` Kui-Feng Lee
2023-03-08  0:50 ` Kui-Feng Lee [this message]
2023-03-08 18:59   ` [PATCH bpf-next v5 2/8] net: Update an existing TCP congestion control algorithm Martin KaFai Lau
2023-03-08 22:51     ` Kui-Feng Lee
2023-03-08  0:50 ` [PATCH bpf-next v5 3/8] bpf: Create links for BPF struct_ops maps Kui-Feng Lee
2023-03-08 15:01   ` kernel test robot
2023-03-08 15:32   ` kernel test robot
2023-03-08 16:03   ` kernel test robot
2023-03-08 20:04   ` Martin KaFai Lau
2023-03-08 23:46     ` Kui-Feng Lee
2023-03-08  0:50 ` [PATCH bpf-next v5 4/8] libbpf: Create a bpf_link in bpf_map__attach_struct_ops() Kui-Feng Lee
2023-03-08 21:42   ` Martin KaFai Lau
2023-03-09  0:22     ` Kui-Feng Lee
2023-03-09 17:09       ` Martin KaFai Lau
2023-03-09 18:16         ` Kui-Feng Lee
2023-03-09 18:19           ` Kui-Feng Lee
2023-03-08  0:50 ` [PATCH bpf-next v5 5/8] bpf: Update the struct_ops of a bpf_link Kui-Feng Lee
2023-03-08 22:21   ` Martin KaFai Lau
2023-03-09  3:09     ` Kui-Feng Lee
2023-03-08  0:50 ` [PATCH bpf-next v5 6/8] libbpf: Update a bpf_link with another struct_ops Kui-Feng Lee
2023-03-08 22:32   ` Martin KaFai Lau
2023-03-08  0:50 ` [PATCH bpf-next v5 7/8] libbpf: Use .struct_ops.link section to indicate a struct_ops with a link Kui-Feng Lee
2023-03-08  0:50 ` [PATCH bpf-next v5 8/8] selftests/bpf: Test switching TCP Congestion Control algorithms Kui-Feng Lee
2023-03-08 23:10   ` Martin KaFai Lau
2023-03-09  3:34     ` Kui-Feng Lee

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20230308005050.255859-3-kuifeng@meta.com \
    --to=kuifeng@meta.com \
    --cc=andrii@kernel.org \
    --cc=ast@kernel.org \
    --cc=bpf@vger.kernel.org \
    --cc=kernel-team@meta.com \
    --cc=martin.lau@linux.dev \
    --cc=sdf@google.com \
    --cc=song@kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox