From: Jiayuan Chen <jiayuan.chen@linux.dev>
To: bpf@vger.kernel.org
Cc: mrpre@163.com, Jiayuan Chen <jiayuan.chen@linux.dev>,
Alexei Starovoitov <ast@kernel.org>,
Daniel Borkmann <daniel@iogearbox.net>,
Andrii Nakryiko <andrii@kernel.org>,
Martin KaFai Lau <martin.lau@linux.dev>,
Eduard Zingerman <eddyz87@gmail.com>, Song Liu <song@kernel.org>,
Yonghong Song <yonghong.song@linux.dev>,
John Fastabend <john.fastabend@gmail.com>,
KP Singh <kpsingh@kernel.org>,
Stanislav Fomichev <sdf@fomichev.me>, Hao Luo <haoluo@google.com>,
Jiri Olsa <jolsa@kernel.org>, Jonathan Corbet <corbet@lwn.net>,
Jakub Sitnicki <jakub@cloudflare.com>,
"David S. Miller" <davem@davemloft.net>,
Eric Dumazet <edumazet@google.com>,
Jakub Kicinski <kuba@kernel.org>, Paolo Abeni <pabeni@redhat.com>,
Simon Horman <horms@kernel.org>,
Kuniyuki Iwashima <kuniyu@amazon.com>,
Willem de Bruijn <willemb@google.com>,
Mykola Lysenko <mykolal@fb.com>, Shuah Khan <shuah@kernel.org>,
Jiapeng Chong <jiapeng.chong@linux.alibaba.com>,
linux-doc@vger.kernel.org, linux-kernel@vger.kernel.org,
netdev@vger.kernel.org, linux-kselftest@vger.kernel.org
Subject: [PATCH bpf-next v1 1/3] bpf, sockmap: Introduce a new kfunc for sockmap
Date: Mon, 28 Apr 2025 16:16:52 +0800 [thread overview]
Message-ID: <20250428081744.52375-2-jiayuan.chen@linux.dev> (raw)
In-Reply-To: <20250428081744.52375-1-jiayuan.chen@linux.dev>
Since the helper list is effectively frozen and the existing helpers
cannot be extended, we add a new kfunc instead which simply set the
redir_cpu to psock.
The new kfunc is used to set redir_cpu to psock.
All these changes conform to the kfuncs.rst documentation.
Signed-off-by: Jiayuan Chen <jiayuan.chen@linux.dev>
---
Documentation/bpf/map_sockmap.rst | 14 +++++++++++
include/linux/skmsg.h | 3 +++
kernel/bpf/btf.c | 3 +++
net/core/skmsg.c | 1 +
net/core/sock_map.c | 39 +++++++++++++++++++++++++++++++
5 files changed, 60 insertions(+)
diff --git a/Documentation/bpf/map_sockmap.rst b/Documentation/bpf/map_sockmap.rst
index 2d630686a00b..eca3dfc1c85f 100644
--- a/Documentation/bpf/map_sockmap.rst
+++ b/Documentation/bpf/map_sockmap.rst
@@ -212,6 +212,20 @@ following cases:
Returns 0
+bpf_sk_skb_set_redirect_cpu()
+^^^^^^^^^^^^^^^^^^^^^^
+.. code-block:: c
+
+ int bpf_sk_skb_set_redirect_cpu(struct __sk_buff *s, int redir_cpu)
+
+This kfunc ``bpf_sk_skb_set_redirect_cpu()`` is available to
+``BPF_PROG_TYPE_SK_SKB`` BPF programs. It sets the CPU affinity, allowing the
+sockmap packet redirecting process to run on the specified CPU as much as
+possible, helping users reduce the interference between the sockmap redirecting
+background thread and other threads.
+
+Returns 0 on success, or a negative error in case of failure.
+
bpf_msg_cork_bytes()
^^^^^^^^^^^^^^^^^^^^^^
.. code-block:: c
diff --git a/include/linux/skmsg.h b/include/linux/skmsg.h
index 0b9095a281b8..b888481a845d 100644
--- a/include/linux/skmsg.h
+++ b/include/linux/skmsg.h
@@ -16,6 +16,8 @@
#define MAX_MSG_FRAGS MAX_SKB_FRAGS
#define NR_MSG_FRAG_IDS (MAX_MSG_FRAGS + 1)
+#define BPF_SK_REDIR_CPU_UNSET -1
+
enum __sk_action {
__SK_DROP = 0,
__SK_PASS,
@@ -86,6 +88,7 @@ struct sk_psock {
u32 apply_bytes;
u32 cork_bytes;
u32 eval;
+ s32 redir_cpu;
bool redir_ingress; /* undefined if sk_redir is null */
struct sk_msg *cork;
struct sk_psock_progs progs;
diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c
index a91822bae043..2a8f59e2c639 100644
--- a/kernel/bpf/btf.c
+++ b/kernel/bpf/btf.c
@@ -219,6 +219,7 @@ enum btf_kfunc_hook {
BTF_KFUNC_HOOK_LWT,
BTF_KFUNC_HOOK_NETFILTER,
BTF_KFUNC_HOOK_KPROBE,
+ BTF_KFUNC_HOOK_SK_MSG,
BTF_KFUNC_HOOK_MAX,
};
@@ -8649,6 +8650,8 @@ static int bpf_prog_type_to_kfunc_hook(enum bpf_prog_type prog_type)
return BTF_KFUNC_HOOK_SCHED_ACT;
case BPF_PROG_TYPE_SK_SKB:
return BTF_KFUNC_HOOK_SK_SKB;
+ case BPF_PROG_TYPE_SK_MSG:
+ return BTF_KFUNC_HOOK_SK_MSG;
case BPF_PROG_TYPE_SOCKET_FILTER:
return BTF_KFUNC_HOOK_SOCKET_FILTER;
case BPF_PROG_TYPE_LWT_OUT:
diff --git a/net/core/skmsg.c b/net/core/skmsg.c
index 276934673066..292752c783b5 100644
--- a/net/core/skmsg.c
+++ b/net/core/skmsg.c
@@ -741,6 +741,7 @@ struct sk_psock *sk_psock_init(struct sock *sk, int node)
psock->saved_destroy = prot->destroy;
psock->saved_close = prot->close;
psock->saved_write_space = sk->sk_write_space;
+ psock->redir_cpu = BPF_SK_REDIR_CPU_UNSET;
INIT_LIST_HEAD(&psock->link);
spin_lock_init(&psock->link_lock);
diff --git a/net/core/sock_map.c b/net/core/sock_map.c
index 82a14f131d00..9f1e531a3807 100644
--- a/net/core/sock_map.c
+++ b/net/core/sock_map.c
@@ -701,6 +701,45 @@ const struct bpf_func_proto bpf_msg_redirect_map_proto = {
.arg4_type = ARG_ANYTHING,
};
+__bpf_kfunc_start_defs();
+
+__bpf_kfunc int bpf_sk_skb_set_redirect_cpu(struct __sk_buff *s, int redir_cpu)
+{
+ struct sk_buff *skb = (struct sk_buff *)s;
+ struct sock *sk = skb->sk;
+ struct sk_psock *psock;
+
+ WARN_ON_ONCE(!rcu_read_lock_held());
+
+ if (!sk || redir_cpu >= num_possible_cpus())
+ return -EINVAL;
+
+ psock = sk_psock(sk);
+ if (!psock)
+ return -ENOENT;
+
+ psock->redir_cpu = redir_cpu;
+ return 0;
+}
+
+__bpf_kfunc_end_defs();
+
+BTF_KFUNCS_START(bpf_sk_sockmap_kfunc_ids)
+BTF_ID_FLAGS(func, bpf_sk_skb_set_redirect_cpu)
+BTF_KFUNCS_END(bpf_sk_sockmap_kfunc_ids)
+
+static const struct btf_kfunc_id_set bpf_sk_sockmap_kfunc_set = {
+ .owner = THIS_MODULE,
+ .set = &bpf_sk_sockmap_kfunc_ids,
+};
+
+static int init_sockmap_subsystem(void)
+{
+ return register_btf_kfunc_id_set(BPF_PROG_TYPE_SK_SKB, &bpf_sk_sockmap_kfunc_set);
+}
+
+late_initcall(init_sockmap_subsystem);
+
struct sock_map_seq_info {
struct bpf_map *map;
struct sock *sk;
--
2.47.1
next prev parent reply other threads:[~2025-04-28 8:18 UTC|newest]
Thread overview: 9+ messages / expand[flat|nested] mbox.gz Atom feed top
2025-04-28 8:16 [PATCH bpf-next v1 0/3] bpf, sockmap: Improve performance with CPU affinity Jiayuan Chen
2025-04-28 8:16 ` Jiayuan Chen [this message]
2025-04-29 0:56 ` [PATCH bpf-next v1 1/3] bpf, sockmap: Introduce a new kfunc for sockmap Cong Wang
2025-04-29 5:23 ` Jiayuan Chen
2025-04-28 8:16 ` [PATCH bpf-next v1 2/3] bpf, sockmap: Affinitize workqueue to a specific CPU Jiayuan Chen
2025-04-28 8:16 ` [PATCH bpf-next v1 3/3] selftest/bpf/benchs: Add cpu-affinity for sockmap bench Jiayuan Chen
2025-04-29 23:26 ` [PATCH bpf-next v1 0/3] bpf, sockmap: Improve performance with CPU affinity Alexei Starovoitov
2025-04-29 23:47 ` Jiayuan Chen
2025-04-29 23:53 ` Alexei Starovoitov
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20250428081744.52375-2-jiayuan.chen@linux.dev \
--to=jiayuan.chen@linux.dev \
--cc=andrii@kernel.org \
--cc=ast@kernel.org \
--cc=bpf@vger.kernel.org \
--cc=corbet@lwn.net \
--cc=daniel@iogearbox.net \
--cc=davem@davemloft.net \
--cc=eddyz87@gmail.com \
--cc=edumazet@google.com \
--cc=haoluo@google.com \
--cc=horms@kernel.org \
--cc=jakub@cloudflare.com \
--cc=jiapeng.chong@linux.alibaba.com \
--cc=john.fastabend@gmail.com \
--cc=jolsa@kernel.org \
--cc=kpsingh@kernel.org \
--cc=kuba@kernel.org \
--cc=kuniyu@amazon.com \
--cc=linux-doc@vger.kernel.org \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-kselftest@vger.kernel.org \
--cc=martin.lau@linux.dev \
--cc=mrpre@163.com \
--cc=mykolal@fb.com \
--cc=netdev@vger.kernel.org \
--cc=pabeni@redhat.com \
--cc=sdf@fomichev.me \
--cc=shuah@kernel.org \
--cc=song@kernel.org \
--cc=willemb@google.com \
--cc=yonghong.song@linux.dev \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.