From: Cong Wang <xiyou.wangcong@gmail.com>
To: netdev@vger.kernel.org
Cc: bpf@vger.kernel.org, john.fastabend@gmail.com,
jakub@cloudflare.com, zijianzhang@bytedance.com,
zhoufeng.zf@bytedance.com, Cong Wang <cong.wang@bytedance.com>
Subject: [Patch bpf-next v4 2/4] skmsg: implement slab allocator cache for sk_msg
Date: Mon, 30 Jun 2025 18:11:59 -0700 [thread overview]
Message-ID: <20250701011201.235392-3-xiyou.wangcong@gmail.com> (raw)
In-Reply-To: <20250701011201.235392-1-xiyou.wangcong@gmail.com>
From: Zijian Zhang <zijianzhang@bytedance.com>
Optimizing redirect ingress performance requires frequent allocation and
deallocation of sk_msg structures. Introduce a dedicated kmem_cache for
sk_msg to reduce memory allocation overhead and improve performance.
Acked-by: John Fastabend <john.fastabend@gmail.com>
Reviewed-by: Cong Wang <cong.wang@bytedance.com>
Signed-off-by: Zijian Zhang <zijianzhang@bytedance.com>
---
include/linux/skmsg.h | 21 ++++++++++++---------
net/core/skmsg.c | 28 +++++++++++++++++++++-------
net/ipv4/tcp_bpf.c | 5 ++---
3 files changed, 35 insertions(+), 19 deletions(-)
diff --git a/include/linux/skmsg.h b/include/linux/skmsg.h
index d6f0a8cd73c4..bf28ce9b5fdb 100644
--- a/include/linux/skmsg.h
+++ b/include/linux/skmsg.h
@@ -121,6 +121,7 @@ struct sk_psock {
struct rcu_work rwork;
};
+struct sk_msg *sk_msg_alloc(gfp_t gfp);
int sk_msg_expand(struct sock *sk, struct sk_msg *msg, int len,
int elem_first_coalesce);
int sk_msg_clone(struct sock *sk, struct sk_msg *dst, struct sk_msg *src,
@@ -143,6 +144,8 @@ int sk_msg_recvmsg(struct sock *sk, struct sk_psock *psock, struct msghdr *msg,
int len, int flags);
bool sk_msg_is_readable(struct sock *sk);
+extern struct kmem_cache *sk_msg_cachep;
+
static inline void sk_msg_check_to_free(struct sk_msg *msg, u32 i, u32 bytes)
{
WARN_ON(i == msg->sg.end && bytes);
@@ -319,6 +322,13 @@ static inline void sock_drop(struct sock *sk, struct sk_buff *skb)
kfree_skb(skb);
}
+static inline void kfree_sk_msg(struct sk_msg *msg)
+{
+ if (msg->skb)
+ consume_skb(msg->skb);
+ kmem_cache_free(sk_msg_cachep, msg);
+}
+
static inline bool sk_psock_queue_msg(struct sk_psock *psock,
struct sk_msg *msg)
{
@@ -330,7 +340,7 @@ static inline bool sk_psock_queue_msg(struct sk_psock *psock,
ret = true;
} else {
sk_msg_free(psock->sk, msg);
- kfree(msg);
+ kfree_sk_msg(msg);
ret = false;
}
spin_unlock_bh(&psock->ingress_lock);
@@ -378,13 +388,6 @@ static inline bool sk_psock_queue_empty(const struct sk_psock *psock)
return psock ? list_empty(&psock->ingress_msg) : true;
}
-static inline void kfree_sk_msg(struct sk_msg *msg)
-{
- if (msg->skb)
- consume_skb(msg->skb);
- kfree(msg);
-}
-
static inline void sk_psock_report_error(struct sk_psock *psock, int err)
{
struct sock *sk = psock->sk;
@@ -441,7 +444,7 @@ static inline void sk_psock_cork_free(struct sk_psock *psock)
{
if (psock->cork) {
sk_msg_free(psock->sk, psock->cork);
- kfree(psock->cork);
+ kfree_sk_msg(psock->cork);
psock->cork = NULL;
}
}
diff --git a/net/core/skmsg.c b/net/core/skmsg.c
index 0939356828a4..5aafa5817394 100644
--- a/net/core/skmsg.c
+++ b/net/core/skmsg.c
@@ -10,6 +10,8 @@
#include <net/tls.h>
#include <trace/events/sock.h>
+struct kmem_cache *sk_msg_cachep;
+
static bool sk_msg_try_coalesce_ok(struct sk_msg *msg, int elem_first_coalesce)
{
if (msg->sg.end > msg->sg.start &&
@@ -503,16 +505,17 @@ bool sk_msg_is_readable(struct sock *sk)
}
EXPORT_SYMBOL_GPL(sk_msg_is_readable);
-static struct sk_msg *alloc_sk_msg(gfp_t gfp)
+struct sk_msg *sk_msg_alloc(gfp_t gfp)
{
struct sk_msg *msg;
- msg = kzalloc(sizeof(*msg), gfp | __GFP_NOWARN);
+ msg = kmem_cache_zalloc(sk_msg_cachep, gfp | __GFP_NOWARN);
if (unlikely(!msg))
return NULL;
sg_init_marker(msg->sg.data, NR_MSG_FRAG_IDS);
return msg;
}
+EXPORT_SYMBOL_GPL(sk_msg_alloc);
static struct sk_msg *sk_psock_create_ingress_msg(struct sock *sk,
struct sk_buff *skb)
@@ -523,7 +526,7 @@ static struct sk_msg *sk_psock_create_ingress_msg(struct sock *sk,
if (!sk_rmem_schedule(sk, skb, skb->truesize))
return NULL;
- return alloc_sk_msg(GFP_KERNEL);
+ return sk_msg_alloc(GFP_KERNEL);
}
static int sk_psock_skb_ingress_enqueue(struct sk_buff *skb,
@@ -598,7 +601,7 @@ static int sk_psock_skb_ingress(struct sk_psock *psock, struct sk_buff *skb,
skb_set_owner_r(skb, sk);
err = sk_psock_skb_ingress_enqueue(skb, off, len, psock, sk, msg, true);
if (err < 0)
- kfree(msg);
+ kfree_sk_msg(msg);
return err;
}
@@ -609,7 +612,7 @@ static int sk_psock_skb_ingress(struct sk_psock *psock, struct sk_buff *skb,
static int sk_psock_skb_ingress_self(struct sk_psock *psock, struct sk_buff *skb,
u32 off, u32 len, bool take_ref)
{
- struct sk_msg *msg = alloc_sk_msg(GFP_ATOMIC);
+ struct sk_msg *msg = sk_msg_alloc(GFP_ATOMIC);
struct sock *sk = psock->sk;
int err;
@@ -618,7 +621,7 @@ static int sk_psock_skb_ingress_self(struct sk_psock *psock, struct sk_buff *skb
skb_set_owner_r(skb, sk);
err = sk_psock_skb_ingress_enqueue(skb, off, len, psock, sk, msg, take_ref);
if (err < 0)
- kfree(msg);
+ kfree_sk_msg(msg);
return err;
}
@@ -795,7 +798,7 @@ static void __sk_psock_purge_ingress_msg(struct sk_psock *psock)
if (!msg->skb)
atomic_sub(msg->sg.size, &psock->sk->sk_rmem_alloc);
sk_msg_free(psock->sk, msg);
- kfree(msg);
+ kfree_sk_msg(msg);
}
}
@@ -1280,3 +1283,14 @@ void sk_psock_stop_verdict(struct sock *sk, struct sk_psock *psock)
sk->sk_data_ready = psock->saved_data_ready;
psock->saved_data_ready = NULL;
}
+
+static int __init sk_msg_cachep_init(void)
+{
+ sk_msg_cachep = kmem_cache_create("sk_msg_cachep",
+ sizeof(struct sk_msg),
+ 0,
+ SLAB_HWCACHE_ALIGN | SLAB_ACCOUNT,
+ NULL);
+ return 0;
+}
+late_initcall(sk_msg_cachep_init);
diff --git a/net/ipv4/tcp_bpf.c b/net/ipv4/tcp_bpf.c
index 85b64ffc20c6..f0ef41c951e2 100644
--- a/net/ipv4/tcp_bpf.c
+++ b/net/ipv4/tcp_bpf.c
@@ -38,7 +38,7 @@ static int bpf_tcp_ingress(struct sock *sk, struct sk_psock *psock,
struct sk_msg *tmp;
int i, ret = 0;
- tmp = kzalloc(sizeof(*tmp), __GFP_NOWARN | GFP_KERNEL);
+ tmp = sk_msg_alloc(GFP_KERNEL);
if (unlikely(!tmp))
return -ENOMEM;
@@ -406,8 +406,7 @@ static int tcp_bpf_send_verdict(struct sock *sk, struct sk_psock *psock,
msg->cork_bytes > msg->sg.size && !enospc) {
psock->cork_bytes = msg->cork_bytes - msg->sg.size;
if (!psock->cork) {
- psock->cork = kzalloc(sizeof(*psock->cork),
- GFP_ATOMIC | __GFP_NOWARN);
+ psock->cork = sk_msg_alloc(GFP_ATOMIC);
if (!psock->cork)
return -ENOMEM;
}
--
2.34.1
next prev parent reply other threads:[~2025-07-01 1:12 UTC|newest]
Thread overview: 17+ messages / expand[flat|nested] mbox.gz Atom feed top
2025-07-01 1:11 [Patch bpf-next v4 0/4] tcp_bpf: improve ingress redirection performance with message corking Cong Wang
2025-07-01 1:11 ` [Patch bpf-next v4 1/4] skmsg: rename sk_msg_alloc() to sk_msg_expand() Cong Wang
2025-07-02 9:24 ` Jakub Sitnicki
2025-07-01 1:11 ` Cong Wang [this message]
2025-07-02 11:36 ` [Patch bpf-next v4 2/4] skmsg: implement slab allocator cache for sk_msg Jakub Sitnicki
2025-07-01 1:12 ` [Patch bpf-next v4 3/4] skmsg: save some space in struct sk_psock Cong Wang
2025-07-02 11:46 ` Jakub Sitnicki
2025-07-01 1:12 ` [Patch bpf-next v4 4/4] tcp_bpf: improve ingress redirection performance with message corking Cong Wang
2025-07-02 12:17 ` Jakub Sitnicki
2025-07-03 2:17 ` Zijian Zhang
2025-07-03 11:32 ` Jakub Sitnicki
2025-07-04 4:20 ` Cong Wang
2025-07-07 17:51 ` Jakub Sitnicki
2025-07-15 0:26 ` Zijian Zhang
2025-07-02 10:22 ` [Patch bpf-next v4 0/4] " Jakub Sitnicki
2025-07-03 1:48 ` Zijian Zhang
2025-07-02 11:05 ` Jakub Sitnicki
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20250701011201.235392-3-xiyou.wangcong@gmail.com \
--to=xiyou.wangcong@gmail.com \
--cc=bpf@vger.kernel.org \
--cc=cong.wang@bytedance.com \
--cc=jakub@cloudflare.com \
--cc=john.fastabend@gmail.com \
--cc=netdev@vger.kernel.org \
--cc=zhoufeng.zf@bytedance.com \
--cc=zijianzhang@bytedance.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).