[PATCH v2] skbuff: Improve the sending efficiency of __skb_send

linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed

* [PATCH v2] skbuff: Improve the sending efficiency of __skb_send_sock
@ 2025-06-27  9:44 Feng Yang
  2025-06-27 10:19 ` Eric Dumazet
  0 siblings, 1 reply; 5+ messages in thread
From: Feng Yang @ 2025-06-27  9:44 UTC (permalink / raw)
  To: davem, edumazet, kuba, pabeni, horms, willemb, almasrymina,
	kerneljasonxing, ebiggers, asml.silence, aleksander.lobakin,
	stfomichev
  Cc: yangfeng, netdev, linux-kernel

From: Feng Yang <yangfeng@kylinos.cn>

By aggregating skb data into a bvec array for transmission, when using sockmap to forward large packets,
what previously required multiple transmissions now only needs a single transmission, which significantly enhances performance.
For small packets, the performance remains comparable to the original level.

When using sockmap for forwarding, the average latency for different packet sizes
after sending 10,000 packets is as follows:
size	old(us)		new(us)
512	56		55
1472	58		58
1600	106		79
3000	145		108
5000	182		123

Signed-off-by: Feng Yang <yangfeng@kylinos.cn>
---
Changes in v2:
- Delete dynamic memory allocation, thanks: Paolo Abeni,Stanislav Fomichev.
- Link to v1: https://lore.kernel.org/all/20250623084212.122284-1-yangfeng59949@163.com/
---
 net/core/skbuff.c | 145 ++++++++++++++++++++++------------------------
 1 file changed, 68 insertions(+), 77 deletions(-)

diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 85fc82f72d26..aae5139cfb28 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -3231,104 +3231,95 @@ static int sendmsg_unlocked(struct sock *sk, struct msghdr *msg)
 	return sock_sendmsg(sock, msg);
 }
 
+#define MAX_SKB_SEND_BIOVEC_SIZE	16
 typedef int (*sendmsg_func)(struct sock *sk, struct msghdr *msg);
 static int __skb_send_sock(struct sock *sk, struct sk_buff *skb, int offset,
 			   int len, sendmsg_func sendmsg, int flags)
 {
-	unsigned int orig_len = len;
 	struct sk_buff *head = skb;
 	unsigned short fragidx;
-	int slen, ret;
-
-do_frag_list:
-
-	/* Deal with head data */
-	while (offset < skb_headlen(skb) && len) {
-		struct kvec kv;
-		struct msghdr msg;
-
-		slen = min_t(int, len, skb_headlen(skb) - offset);
-		kv.iov_base = skb->data + offset;
-		kv.iov_len = slen;
-		memset(&msg, 0, sizeof(msg));
-		msg.msg_flags = MSG_DONTWAIT | flags;
-
-		iov_iter_kvec(&msg.msg_iter, ITER_SOURCE, &kv, 1, slen);
-		ret = INDIRECT_CALL_2(sendmsg, sendmsg_locked,
-				      sendmsg_unlocked, sk, &msg);
-		if (ret <= 0)
-			goto error;
-
-		offset += ret;
-		len -= ret;
-	}
-
-	/* All the data was skb head? */
-	if (!len)
-		goto out;
+	struct msghdr msg;
+	struct bio_vec bvec[MAX_SKB_SEND_BIOVEC_SIZE];
+	int ret, slen, total_len = 0;
+	int bvec_count = 0;
+	unsigned int copied = 0;
+
+	memset(&msg, 0, sizeof(msg));
+	msg.msg_flags = MSG_SPLICE_PAGES | MSG_DONTWAIT | flags;
+
+	while (copied < len) {
+		/* Deal with head data */
+		if (offset < skb_headlen(skb) && bvec_count < MAX_SKB_SEND_BIOVEC_SIZE) {
+			struct page *page = virt_to_page(skb->data + offset);
+			unsigned int page_offset = offset_in_page(skb->data + offset);
+
+			if (!sendpage_ok(page))
+				msg.msg_flags &= ~MSG_SPLICE_PAGES;
+
+			slen = min_t(int, skb_headlen(skb) - offset, len - copied);
+			bvec_set_page(&bvec[bvec_count++], page, slen, page_offset);
+			copied += slen;
+			offset += slen;
+		}
 
-	/* Make offset relative to start of frags */
-	offset -= skb_headlen(skb);
+		/* Make offset relative to start of frags */
+		offset -= skb_headlen(skb);
 
-	/* Find where we are in frag list */
-	for (fragidx = 0; fragidx < skb_shinfo(skb)->nr_frags; fragidx++) {
-		skb_frag_t *frag  = &skb_shinfo(skb)->frags[fragidx];
+		if (copied < len && bvec_count < MAX_SKB_SEND_BIOVEC_SIZE) {
+			for (fragidx = 0; fragidx < skb_shinfo(skb)->nr_frags; fragidx++) {
+				skb_frag_t *frag  = &skb_shinfo(skb)->frags[fragidx];
+				unsigned int frag_size = skb_frag_size(frag);
 
-		if (offset < skb_frag_size(frag))
-			break;
+				/* Find where we are in frag list */
+				if (offset >= frag_size) {
+					offset -= frag_size;
+					continue;
+				}
 
-		offset -= skb_frag_size(frag);
-	}
+				slen = min_t(size_t, frag_size - offset, len - copied);
+				bvec_set_page(&bvec[bvec_count++], skb_frag_page(frag), slen,
+					      skb_frag_off(frag) + offset);
 
-	for (; len && fragidx < skb_shinfo(skb)->nr_frags; fragidx++) {
-		skb_frag_t *frag  = &skb_shinfo(skb)->frags[fragidx];
+				copied += slen;
+				offset = 0;
 
-		slen = min_t(size_t, len, skb_frag_size(frag) - offset);
+				if (copied >= len || bvec_count >= MAX_SKB_SEND_BIOVEC_SIZE)
+					break;
+			}
+		}
 
-		while (slen) {
-			struct bio_vec bvec;
-			struct msghdr msg = {
-				.msg_flags = MSG_SPLICE_PAGES | MSG_DONTWAIT |
-					     flags,
-			};
+		if (copied < len && bvec_count < MAX_SKB_SEND_BIOVEC_SIZE) {
+			/* Process any frag lists */
+			if (skb == head) {
+				if (skb_has_frag_list(skb))
+					skb = skb_shinfo(skb)->frag_list;
+			} else if (skb->next) {
+				skb = skb->next;
+			}
+		}
 
-			bvec_set_page(&bvec, skb_frag_page(frag), slen,
-				      skb_frag_off(frag) + offset);
-			iov_iter_bvec(&msg.msg_iter, ITER_SOURCE, &bvec, 1,
-				      slen);
+		if (bvec_count == MAX_SKB_SEND_BIOVEC_SIZE || copied == len) {
+			iov_iter_bvec(&msg.msg_iter, ITER_SOURCE, bvec, bvec_count, len);
+			ret = INDIRECT_CALL_2(sendmsg, sendmsg_locked, sendmsg_unlocked, sk, &msg);
 
-			ret = INDIRECT_CALL_2(sendmsg, sendmsg_locked,
-					      sendmsg_unlocked, sk, &msg);
-			if (ret <= 0)
-				goto error;
+			if (ret < 0)
+				return ret;
 
+			/* Statistical data */
 			len -= ret;
 			offset += ret;
-			slen -= ret;
+			total_len += ret;
+
+			/* Restore initial value */
+			memset(&msg, 0, sizeof(msg));
+			msg.msg_flags = MSG_SPLICE_PAGES | MSG_DONTWAIT | flags;
+			copied = 0;
+			bvec_count = 0;
+			skb = head;
 		}
-
-		offset = 0;
 	}
 
-	if (len) {
-		/* Process any frag lists */
-
-		if (skb == head) {
-			if (skb_has_frag_list(skb)) {
-				skb = skb_shinfo(skb)->frag_list;
-				goto do_frag_list;
-			}
-		} else if (skb->next) {
-			skb = skb->next;
-			goto do_frag_list;
-		}
-	}
-
-out:
-	return orig_len - len;
-
-error:
-	return orig_len == len ? ret : orig_len - len;
+	return total_len;
 }
 
 /* Send skb data on a socket. Socket must be locked. */
-- 
2.43.0


^ permalink raw reply related	[flat|nested] 5+ messages in thread

* Re: [PATCH v2] skbuff: Improve the sending efficiency of __skb_send_sock
  2025-06-27  9:44 [PATCH v2] skbuff: Improve the sending efficiency of __skb_send_sock Feng Yang
@ 2025-06-27 10:19 ` Eric Dumazet
  2025-06-27 10:23   ` Eric Dumazet
  2025-06-29 11:33   ` David Laight
  0 siblings, 2 replies; 5+ messages in thread
From: Eric Dumazet @ 2025-06-27 10:19 UTC (permalink / raw)
  To: Feng Yang
  Cc: davem, kuba, pabeni, horms, willemb, almasrymina, kerneljasonxing,
	ebiggers, asml.silence, aleksander.lobakin, stfomichev, yangfeng,
	netdev, linux-kernel

On Fri, Jun 27, 2025 at 2:44 AM Feng Yang <yangfeng59949@163.com> wrote:
>
> From: Feng Yang <yangfeng@kylinos.cn>
>
> By aggregating skb data into a bvec array for transmission, when using sockmap to forward large packets,
> what previously required multiple transmissions now only needs a single transmission, which significantly enhances performance.
> For small packets, the performance remains comparable to the original level.
>
> When using sockmap for forwarding, the average latency for different packet sizes
> after sending 10,000 packets is as follows:
> size    old(us)         new(us)
> 512     56              55
> 1472    58              58
> 1600    106             79
> 3000    145             108
> 5000    182             123
>
> Signed-off-by: Feng Yang <yangfeng@kylinos.cn>

Instead of changing everything, have you tried strategically adding
MSG_MORE in this function ?

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH v2] skbuff: Improve the sending efficiency of __skb_send_sock
  2025-06-27 10:19 ` Eric Dumazet
@ 2025-06-27 10:23   ` Eric Dumazet
  2025-06-29 11:33   ` David Laight
  1 sibling, 0 replies; 5+ messages in thread
From: Eric Dumazet @ 2025-06-27 10:23 UTC (permalink / raw)
  To: Feng Yang
  Cc: davem, kuba, pabeni, horms, willemb, almasrymina, kerneljasonxing,
	ebiggers, asml.silence, aleksander.lobakin, stfomichev, yangfeng,
	netdev, linux-kernel

On Fri, Jun 27, 2025 at 3:19 AM Eric Dumazet <edumazet@google.com> wrote:
>
> On Fri, Jun 27, 2025 at 2:44 AM Feng Yang <yangfeng59949@163.com> wrote:
> >
> > From: Feng Yang <yangfeng@kylinos.cn>
> >
> > By aggregating skb data into a bvec array for transmission, when using sockmap to forward large packets,
> > what previously required multiple transmissions now only needs a single transmission, which significantly enhances performance.
> > For small packets, the performance remains comparable to the original level.
> >
> > When using sockmap for forwarding, the average latency for different packet sizes
> > after sending 10,000 packets is as follows:
> > size    old(us)         new(us)
> > 512     56              55
> > 1472    58              58
> > 1600    106             79
> > 3000    145             108
> > 5000    182             123
> >
> > Signed-off-by: Feng Yang <yangfeng@kylinos.cn>
>
> Instead of changing everything, have you tried strategically adding
> MSG_MORE in this function ?

Untested patch:

diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index d6420b74ea9c6a9c53a7c16634cce82a1cd1bbd3..b0f5e8898fdf450129948d829240b570f3cbf9eb
100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -3252,6 +3252,8 @@ static int __skb_send_sock(struct sock *sk,
struct sk_buff *skb, int offset,
                kv.iov_len = slen;
                memset(&msg, 0, sizeof(msg));
                msg.msg_flags = MSG_DONTWAIT | flags;
+               if (slen < len)
+                       msg.msg_flags |= MSG_MORE;

                iov_iter_kvec(&msg.msg_iter, ITER_SOURCE, &kv, 1, slen);
                ret = INDIRECT_CALL_2(sendmsg, sendmsg_locked,
@@ -3292,6 +3294,8 @@ static int __skb_send_sock(struct sock *sk,
struct sk_buff *skb, int offset,
                                             flags,
                        };

+                       if (slen < len)
+                               msg.msg_flags |= MSG_MORE;
                        bvec_set_page(&bvec, skb_frag_page(frag), slen,
                                      skb_frag_off(frag) + offset);
                        iov_iter_bvec(&msg.msg_iter, ITER_SOURCE, &bvec, 1,

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH v2] skbuff: Improve the sending efficiency of __skb_send_sock
  2025-06-27 10:19 ` Eric Dumazet
  2025-06-27 10:23   ` Eric Dumazet
@ 2025-06-29 11:33   ` David Laight
  2025-06-30  6:42     ` Feng Yang
  1 sibling, 1 reply; 5+ messages in thread
From: David Laight @ 2025-06-29 11:33 UTC (permalink / raw)
  To: Eric Dumazet
  Cc: Feng Yang, davem, kuba, pabeni, horms, willemb, almasrymina,
	kerneljasonxing, ebiggers, asml.silence, aleksander.lobakin,
	stfomichev, yangfeng, netdev, linux-kernel

On Fri, 27 Jun 2025 03:19:27 -0700
Eric Dumazet <edumazet@google.com> wrote:

> On Fri, Jun 27, 2025 at 2:44 AM Feng Yang <yangfeng59949@163.com> wrote:
> >
> > From: Feng Yang <yangfeng@kylinos.cn>
> >
> > By aggregating skb data into a bvec array for transmission, when using sockmap to forward large packets,
> > what previously required multiple transmissions now only needs a single transmission, which significantly enhances performance.
> > For small packets, the performance remains comparable to the original level.
> >
> > When using sockmap for forwarding, the average latency for different packet sizes
> > after sending 10,000 packets is as follows:
> > size    old(us)         new(us)
> > 512     56              55
> > 1472    58              58
> > 1600    106             79
> > 3000    145             108
> > 5000    182             123
> >
> > Signed-off-by: Feng Yang <yangfeng@kylinos.cn>  
> 
> Instead of changing everything, have you tried strategically adding
> MSG_MORE in this function ?
> 

Does (could) this code ever be used for protocols other than TCP?
For UDP setting MSG_MORE will generate a single datagram.
For SCTP all the data actually has to be sent as a single sendmsg()
in order to generate a single DATA chunk.

Prior to 6.5 the code used sock->ops->sendpage_locked() so had to do
a separate call per page.
But if all the overheads of 'ioc_iter' are being added it does seen
appropriate to make use of its features!

	David
 

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH v2] skbuff: Improve the sending efficiency of __skb_send_sock
  2025-06-29 11:33   ` David Laight
@ 2025-06-30  6:42     ` Feng Yang
  0 siblings, 0 replies; 5+ messages in thread
From: Feng Yang @ 2025-06-30  6:42 UTC (permalink / raw)
  To: david.laight.linux
  Cc: aleksander.lobakin, almasrymina, asml.silence, davem, ebiggers,
	edumazet, horms, kerneljasonxing, kuba, linux-kernel, netdev,
	pabeni, stfomichev, willemb, yangfeng59949, yangfeng

On Fri, 27 Jun 2025 03:23:24 -0700 Eric Dumazet <edumazet@google.com> wrote:

> On Fri, Jun 27, 2025 at 3:19 AM Eric Dumazet <edumazet@google.com> wrote:
> >
> > On Fri, Jun 27, 2025 at 2:44 AM Feng Yang <yangfeng59949@163.com> wrote:
> > >
> > > From: Feng Yang <yangfeng@kylinos.cn>
> > >
> > > By aggregating skb data into a bvec array for transmission, when using sockmap to forward large packets,
> > > what previously required multiple transmissions now only needs a single transmission, which significantly enhances performance.
> > > For small packets, the performance remains comparable to the original level.
> > >
> > > When using sockmap for forwarding, the average latency for different packet sizes
> > > after sending 10,000 packets is as follows:
> > > size    old(us)         new(us)
> > > 512     56              55
> > > 1472    58              58
> > > 1600    106             79
> > > 3000    145             108
> > > 5000    182             123
> > >
> > > Signed-off-by: Feng Yang <yangfeng@kylinos.cn>
> >
> > Instead of changing everything, have you tried strategically adding
> > MSG_MORE in this function ?
> 
> Untested patch:
> 
> diff --git a/net/core/skbuff.c b/net/core/skbuff.c
> index d6420b74ea9c6a9c53a7c16634cce82a1cd1bbd3..b0f5e8898fdf450129948d829240b570f3cbf9eb
> 100644
> --- a/net/core/skbuff.c
> +++ b/net/core/skbuff.c
> @@ -3252,6 +3252,8 @@ static int __skb_send_sock(struct sock *sk,
> struct sk_buff *skb, int offset,
>                 kv.iov_len = slen;
>                 memset(&msg, 0, sizeof(msg));
>                 msg.msg_flags = MSG_DONTWAIT | flags;
> +               if (slen < len)
> +                       msg.msg_flags |= MSG_MORE;
> 
>                 iov_iter_kvec(&msg.msg_iter, ITER_SOURCE, &kv, 1, slen);
>                 ret = INDIRECT_CALL_2(sendmsg, sendmsg_locked,
> @@ -3292,6 +3294,8 @@ static int __skb_send_sock(struct sock *sk,
> struct sk_buff *skb, int offset,
>                                              flags,
>                         };
> 
> +                       if (slen < len)
> +                               msg.msg_flags |= MSG_MORE;
>                         bvec_set_page(&bvec, skb_frag_page(frag), slen,
>                                       skb_frag_off(frag) + offset);
>                         iov_iter_bvec(&msg.msg_iter, ITER_SOURCE, &bvec, 1,

After testing, there is a performance improvement for large packets in both TCP and UDP.
Thanks.


^ permalink raw reply	[flat|nested] 5+ messages in thread

end of thread, other threads:[~2025-06-30  6:42 UTC | newest]

Thread overview: 5+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2025-06-27  9:44 [PATCH v2] skbuff: Improve the sending efficiency of __skb_send_sock Feng Yang
2025-06-27 10:19 ` Eric Dumazet
2025-06-27 10:23   ` Eric Dumazet
2025-06-29 11:33   ` David Laight
2025-06-30  6:42     ` Feng Yang

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).