netdev.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [RFC PATCH 0/2] net: gro: avoid touching transport header
@ 2025-12-05 14:03 Paolo Abeni
  2025-12-05 14:03 ` [RFC PATCH 1/2] net: gro: avoid relaying on skb->transport_header at receive time Paolo Abeni
  2025-12-05 14:03 ` [RFC PATCH 2/2] net: gro: set the transport header later Paolo Abeni
  0 siblings, 2 replies; 7+ messages in thread
From: Paolo Abeni @ 2025-12-05 14:03 UTC (permalink / raw)
  To: netdev
  Cc: David S. Miller, Eric Dumazet, Jakub Kicinski, Simon Horman,
	Neal Cardwell, Kuniyuki Iwashima, David Ahern

This series is basically a pre-req for GRO support for double UDP
encapsulation:

https://lore.kernel.org/netdev/cover.1764056123.git.pabeni@redhat.com/

that otherwise would requiring explicitly disabling gro on the outer
geneve device.

I *think* it should also help plain TCP GRO performances, even if don't
have a very high speed, full zero-copy, big TCP testbed handy to
actually prove it - see patch 1 for the gory details.

Paolo Abeni (2):
  net: gro: avoid relaying on skb->transport_header at receive time
  net: gro: set the transport header later

 include/net/gro.h        | 26 ++++++++++++++++++++++++++
 include/net/tcp.h        |  3 ++-
 net/ipv4/af_inet.c       |  2 +-
 net/ipv4/tcp_offload.c   | 16 +++++++++-------
 net/ipv4/udp_offload.c   |  8 ++++++--
 net/ipv6/ip6_offload.c   |  3 +--
 net/ipv6/tcpv6_offload.c |  2 +-
 7 files changed, 46 insertions(+), 14 deletions(-)

-- 
2.52.0


^ permalink raw reply	[flat|nested] 7+ messages in thread

* [RFC PATCH 1/2] net: gro: avoid relaying on skb->transport_header at receive time
  2025-12-05 14:03 [RFC PATCH 0/2] net: gro: avoid touching transport header Paolo Abeni
@ 2025-12-05 14:03 ` Paolo Abeni
  2025-12-05 14:37   ` Eric Dumazet
  2025-12-06 21:26   ` Willem de Bruijn
  2025-12-05 14:03 ` [RFC PATCH 2/2] net: gro: set the transport header later Paolo Abeni
  1 sibling, 2 replies; 7+ messages in thread
From: Paolo Abeni @ 2025-12-05 14:03 UTC (permalink / raw)
  To: netdev
  Cc: David S. Miller, Eric Dumazet, Jakub Kicinski, Simon Horman,
	Neal Cardwell, Kuniyuki Iwashima, David Ahern

Currently {tcp,udp}_gro_receive relay on the gro network stage setting
the correct transport header offset for all the skbs held by the GRO
engine.

Such assumption is not necessary, as the code can instead leverage the
offset already available for the currently processed skb. Add a couple
of helpers to for readabilty' sake.

As skb->transport_header lays on a different cacheline wrt skb->data,
this should save a cacheline access for each packet aggregation.
Additionally this will make the next patch possible.

Note that the compiler (gcc 15.2.1) does inline the tcp_gro_lookup()
call in tcp_gro_receive(), so the additional argument is only relevant
for the fraglist case.

Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
 include/net/gro.h        | 26 ++++++++++++++++++++++++++
 include/net/tcp.h        |  3 ++-
 net/ipv4/tcp_offload.c   | 15 ++++++++-------
 net/ipv4/udp_offload.c   |  4 ++--
 net/ipv6/tcpv6_offload.c |  2 +-
 5 files changed, 39 insertions(+), 11 deletions(-)

diff --git a/include/net/gro.h b/include/net/gro.h
index b65f631c521d..fdb9285ab117 100644
--- a/include/net/gro.h
+++ b/include/net/gro.h
@@ -420,6 +420,18 @@ struct sk_buff *udp_gro_receive(struct list_head *head, struct sk_buff *skb,
 				struct udphdr *uh, struct sock *sk);
 int udp_gro_complete(struct sk_buff *skb, int nhoff, udp_lookup_t lookup);
 
+/* Return the skb hdr corresponding to the specified skb2 hdr.
+ * skb2 is held in the gro engine, i.e. its headers are in the linear part.
+ */
+static inline const void *
+skb_gro_header_from(const struct sk_buff *skb, const struct sk_buff *skb2,
+		    const void *hdr2)
+{
+	size_t offset = (unsigned char *)hdr2 - skb2->data;
+
+	return skb->data + offset;
+}
+
 static inline struct udphdr *udp_gro_udphdr(struct sk_buff *skb)
 {
 	struct udphdr *uh;
@@ -432,6 +444,13 @@ static inline struct udphdr *udp_gro_udphdr(struct sk_buff *skb)
 	return uh;
 }
 
+static inline const struct udphdr *
+udp_gro_udphdr_from(const struct sk_buff *skb, const struct sk_buff *skb2,
+		    const struct udphdr *uh)
+{
+	return (const struct udphdr *)skb_gro_header_from(skb, skb2, uh);
+}
+
 static inline __wsum ip6_gro_compute_pseudo(const struct sk_buff *skb,
 					    int proto)
 {
@@ -620,4 +639,11 @@ static inline struct tcphdr *tcp_gro_pull_header(struct sk_buff *skb)
 	return th;
 }
 
+static inline const struct tcphdr *
+tcp_gro_header_from(const struct sk_buff *skb, const struct sk_buff *skb2,
+		    const struct tcphdr *th)
+{
+	return (const struct tcphdr *)skb_gro_header_from(skb, skb2, th);
+}
+
 #endif /* _NET_GRO_H */
diff --git a/include/net/tcp.h b/include/net/tcp.h
index 0deb5e9dd911..a4c239daf2ea 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -2313,7 +2313,8 @@ void tcp_v4_destroy_sock(struct sock *sk);
 
 struct sk_buff *tcp_gso_segment(struct sk_buff *skb,
 				netdev_features_t features);
-struct sk_buff *tcp_gro_lookup(struct list_head *head, struct tcphdr *th);
+struct sk_buff *tcp_gro_lookup(struct list_head *head, struct sk_buff *skb,
+			       struct tcphdr *th);
 struct sk_buff *tcp_gro_receive(struct list_head *head, struct sk_buff *skb,
 				struct tcphdr *th);
 INDIRECT_CALLABLE_DECLARE(int tcp4_gro_complete(struct sk_buff *skb, int thoff));
diff --git a/net/ipv4/tcp_offload.c b/net/ipv4/tcp_offload.c
index fdda18b1abda..fa36686df6d7 100644
--- a/net/ipv4/tcp_offload.c
+++ b/net/ipv4/tcp_offload.c
@@ -261,16 +261,17 @@ struct sk_buff *tcp_gso_segment(struct sk_buff *skb,
 	return segs;
 }
 
-struct sk_buff *tcp_gro_lookup(struct list_head *head, struct tcphdr *th)
+struct sk_buff *tcp_gro_lookup(struct list_head *head, struct sk_buff *skb,
+			       struct tcphdr *th)
 {
-	struct tcphdr *th2;
+	const struct tcphdr *th2;
 	struct sk_buff *p;
 
 	list_for_each_entry(p, head, list) {
 		if (!NAPI_GRO_CB(p)->same_flow)
 			continue;
 
-		th2 = tcp_hdr(p);
+		th2 = tcp_gro_header_from(p, skb, th);
 		if (*(u32 *)&th->source ^ *(u32 *)&th2->source) {
 			NAPI_GRO_CB(p)->same_flow = 0;
 			continue;
@@ -287,8 +288,8 @@ struct sk_buff *tcp_gro_receive(struct list_head *head, struct sk_buff *skb,
 {
 	unsigned int thlen = th->doff * 4;
 	struct sk_buff *pp = NULL;
+	const struct tcphdr *th2;
 	struct sk_buff *p;
-	struct tcphdr *th2;
 	unsigned int len;
 	__be32 flags;
 	unsigned int mss = 1;
@@ -298,11 +299,11 @@ struct sk_buff *tcp_gro_receive(struct list_head *head, struct sk_buff *skb,
 	len = skb_gro_len(skb);
 	flags = tcp_flag_word(th);
 
-	p = tcp_gro_lookup(head, th);
+	p = tcp_gro_lookup(head, skb, th);
 	if (!p)
 		goto out_check_final;
 
-	th2 = tcp_hdr(p);
+	th2 = tcp_gro_header_from(p, skb, th);
 	flush = (__force int)(flags & TCP_FLAG_CWR);
 	flush |= (__force int)((flags ^ tcp_flag_word(th2)) &
 		  ~(TCP_FLAG_FIN | TCP_FLAG_PSH));
@@ -398,7 +399,7 @@ static void tcp4_check_fraglist_gro(struct list_head *head, struct sk_buff *skb,
 	if (likely(!(skb->dev->features & NETIF_F_GRO_FRAGLIST)))
 		return;
 
-	p = tcp_gro_lookup(head, th);
+	p = tcp_gro_lookup(head, skb, th);
 	if (p) {
 		NAPI_GRO_CB(skb)->is_flist = NAPI_GRO_CB(p)->is_flist;
 		return;
diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c
index 19d0b5b09ffa..7048cb2a28a2 100644
--- a/net/ipv4/udp_offload.c
+++ b/net/ipv4/udp_offload.c
@@ -701,7 +701,7 @@ static struct sk_buff *udp_gro_receive_segment(struct list_head *head,
 {
 	struct udphdr *uh = udp_gro_udphdr(skb);
 	struct sk_buff *pp = NULL;
-	struct udphdr *uh2;
+	const struct udphdr *uh2;
 	struct sk_buff *p;
 	unsigned int ulen;
 	int ret = 0;
@@ -726,7 +726,7 @@ static struct sk_buff *udp_gro_receive_segment(struct list_head *head,
 		if (!NAPI_GRO_CB(p)->same_flow)
 			continue;
 
-		uh2 = udp_hdr(p);
+		uh2 = udp_gro_udphdr_from(p, skb, uh);
 
 		/* Match ports only, as csum is always non zero */
 		if ((*(u32 *)&uh->source != *(u32 *)&uh2->source)) {
diff --git a/net/ipv6/tcpv6_offload.c b/net/ipv6/tcpv6_offload.c
index effeba58630b..ae481bf95651 100644
--- a/net/ipv6/tcpv6_offload.c
+++ b/net/ipv6/tcpv6_offload.c
@@ -27,7 +27,7 @@ static void tcp6_check_fraglist_gro(struct list_head *head, struct sk_buff *skb,
 	if (likely(!(skb->dev->features & NETIF_F_GRO_FRAGLIST)))
 		return;
 
-	p = tcp_gro_lookup(head, th);
+	p = tcp_gro_lookup(head, skb, th);
 	if (p) {
 		NAPI_GRO_CB(skb)->is_flist = NAPI_GRO_CB(p)->is_flist;
 		return;
-- 
2.52.0


^ permalink raw reply related	[flat|nested] 7+ messages in thread

* [RFC PATCH 2/2] net: gro: set the transport header later
  2025-12-05 14:03 [RFC PATCH 0/2] net: gro: avoid touching transport header Paolo Abeni
  2025-12-05 14:03 ` [RFC PATCH 1/2] net: gro: avoid relaying on skb->transport_header at receive time Paolo Abeni
@ 2025-12-05 14:03 ` Paolo Abeni
  1 sibling, 0 replies; 7+ messages in thread
From: Paolo Abeni @ 2025-12-05 14:03 UTC (permalink / raw)
  To: netdev
  Cc: David S. Miller, Eric Dumazet, Jakub Kicinski, Simon Horman,
	Neal Cardwell, Kuniyuki Iwashima, David Ahern

After the previous patch, the GRO engine receive callbacks don't relay
anymore on the skb transport header being set.

Move such operation at GRO complete time, with one notable exception:
SKB_GSO_FRAGLIST offload need the headers to be set on each skb in
the list prior to segmentation.

This prevents the NAPI gro_cell instance on top of a geneve tunnel
with GRO hints enabled from corrupting the GRO-hint-aggregated packet
setting the (innermost) transport header to the middle-one before
stopping the GRO process due to the encap mark.

Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
 net/ipv4/af_inet.c     | 2 +-
 net/ipv4/tcp_offload.c | 1 +
 net/ipv4/udp_offload.c | 4 ++++
 net/ipv6/ip6_offload.c | 3 +--
 4 files changed, 7 insertions(+), 3 deletions(-)

diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index 08d811f11896..f954ab78481a 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -1527,7 +1527,6 @@ struct sk_buff *inet_gro_receive(struct list_head *head, struct sk_buff *skb)
 	 * as we already checked checksum over ipv4 header was 0
 	 */
 	skb_gro_pull(skb, sizeof(*iph));
-	skb_set_transport_header(skb, skb_gro_offset(skb));
 
 	pp = indirect_call_gro_receive(tcp4_gro_receive, udp4_gro_receive,
 				       ops->callbacks.gro_receive, head, skb);
@@ -1611,6 +1610,7 @@ int inet_gro_complete(struct sk_buff *skb, int nhoff)
 	if (WARN_ON(!ops || !ops->callbacks.gro_complete))
 		goto out;
 
+	skb_set_transport_header(skb, nhoff + sizeof(*iph));
 	/* Only need to add sizeof(*iph) to get to the next hdr below
 	 * because any hdr with option will have been flushed in
 	 * inet_gro_receive().
diff --git a/net/ipv4/tcp_offload.c b/net/ipv4/tcp_offload.c
index fa36686df6d7..a78d9b15de06 100644
--- a/net/ipv4/tcp_offload.c
+++ b/net/ipv4/tcp_offload.c
@@ -334,6 +334,7 @@ struct sk_buff *tcp_gro_receive(struct list_head *head, struct sk_buff *skb,
 		flush |= skb->csum_level != p->csum_level;
 		flush |= NAPI_GRO_CB(p)->count >= 64;
 		skb_set_network_header(skb, skb_gro_receive_network_offset(skb));
+		skb_set_transport_header(skb, (unsigned char *)th - skb->data);
 
 		if (flush || skb_gro_receive_list(p, skb))
 			mss = 1;
diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c
index 7048cb2a28a2..73edbc154cfa 100644
--- a/net/ipv4/udp_offload.c
+++ b/net/ipv4/udp_offload.c
@@ -751,6 +751,8 @@ static struct sk_buff *udp_gro_receive_segment(struct list_head *head,
 			pp = p;
 		} else {
 			if (NAPI_GRO_CB(skb)->is_flist) {
+				int offset;
+
 				if (!pskb_may_pull(skb, skb_gro_offset(skb))) {
 					NAPI_GRO_CB(skb)->flush = 1;
 					return NULL;
@@ -761,6 +763,8 @@ static struct sk_buff *udp_gro_receive_segment(struct list_head *head,
 					return NULL;
 				}
 				skb_set_network_header(skb, skb_gro_receive_network_offset(skb));
+				offset = (unsigned char *)uh - skb->data;
+				skb_set_transport_header(skb, offset);
 				ret = skb_gro_receive_list(p, skb);
 			} else {
 				skb_gro_postpull_rcsum(skb, uh,
diff --git a/net/ipv6/ip6_offload.c b/net/ipv6/ip6_offload.c
index fce91183797a..ed71cbd45690 100644
--- a/net/ipv6/ip6_offload.c
+++ b/net/ipv6/ip6_offload.c
@@ -256,8 +256,6 @@ INDIRECT_CALLABLE_SCOPE struct sk_buff *ipv6_gro_receive(struct list_head *head,
 		skb_gro_pull(skb, sizeof(*iph));
 	}
 
-	skb_set_transport_header(skb, skb_gro_offset(skb));
-
 	NAPI_GRO_CB(skb)->proto = proto;
 
 	flush--;
@@ -382,6 +380,7 @@ INDIRECT_CALLABLE_SCOPE int ipv6_gro_complete(struct sk_buff *skb, int nhoff)
 	if (WARN_ON(!ops || !ops->callbacks.gro_complete))
 		goto out;
 
+	skb_set_transport_header(skb, nhoff);
 	err = INDIRECT_CALL_L4(ops->callbacks.gro_complete, tcp6_gro_complete,
 			       udp6_gro_complete, skb, nhoff);
 
-- 
2.52.0


^ permalink raw reply related	[flat|nested] 7+ messages in thread

* Re: [RFC PATCH 1/2] net: gro: avoid relaying on skb->transport_header at receive time
  2025-12-05 14:03 ` [RFC PATCH 1/2] net: gro: avoid relaying on skb->transport_header at receive time Paolo Abeni
@ 2025-12-05 14:37   ` Eric Dumazet
  2025-12-05 15:22     ` Paolo Abeni
  2025-12-06 21:26   ` Willem de Bruijn
  1 sibling, 1 reply; 7+ messages in thread
From: Eric Dumazet @ 2025-12-05 14:37 UTC (permalink / raw)
  To: Paolo Abeni
  Cc: netdev, David S. Miller, Jakub Kicinski, Simon Horman,
	Neal Cardwell, Kuniyuki Iwashima, David Ahern

On Fri, Dec 5, 2025 at 6:04 AM Paolo Abeni <pabeni@redhat.com> wrote:
>
> Currently {tcp,udp}_gro_receive relay on the gro network stage setting

rely :)

> the correct transport header offset for all the skbs held by the GRO
> engine.
>
> Such assumption is not necessary, as the code can instead leverage the
> offset already available for the currently processed skb. Add a couple
> of helpers to for readabilty' sake.
>
> As skb->transport_header lays on a different cacheline wrt skb->data,
> this should save a cacheline access for each packet aggregation.
> Additionally this will make the next patch possible.
>
> Note that the compiler (gcc 15.2.1) does inline the tcp_gro_lookup()
> call in tcp_gro_receive(), so the additional argument is only relevant
> for the fraglist case.
>
> Signed-off-by: Paolo Abeni <pabeni@redhat.com>
> ---
>  include/net/gro.h        | 26 ++++++++++++++++++++++++++
>  include/net/tcp.h        |  3 ++-
>  net/ipv4/tcp_offload.c   | 15 ++++++++-------
>  net/ipv4/udp_offload.c   |  4 ++--
>  net/ipv6/tcpv6_offload.c |  2 +-
>  5 files changed, 39 insertions(+), 11 deletions(-)
>
> diff --git a/include/net/gro.h b/include/net/gro.h
> index b65f631c521d..fdb9285ab117 100644
> --- a/include/net/gro.h
> +++ b/include/net/gro.h
> @@ -420,6 +420,18 @@ struct sk_buff *udp_gro_receive(struct list_head *head, struct sk_buff *skb,
>                                 struct udphdr *uh, struct sock *sk);
>  int udp_gro_complete(struct sk_buff *skb, int nhoff, udp_lookup_t lookup);
>
> +/* Return the skb hdr corresponding to the specified skb2 hdr.
> + * skb2 is held in the gro engine, i.e. its headers are in the linear part.
> + */
> +static inline const void *
> +skb_gro_header_from(const struct sk_buff *skb, const struct sk_buff *skb2,
> +                   const void *hdr2)
> +{
> +       size_t offset = (unsigned char *)hdr2 - skb2->data;
> +
> +       return skb->data + offset;
> +}

I would rather switch gro to pass an @offset instead of a header pointer ?

Rebuilding one header pointer from offset is fast : skb->data + offset
( offset : network header, transport header, ...)

As a matter of fact, some GRO state variables could be onstack, instead
of being stored in NAPI_GRO_CB()

This would avoid some stalls because skb->cb[] has been cleared with
memset() with long words,
while GRO is using smaller fields.

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [RFC PATCH 1/2] net: gro: avoid relaying on skb->transport_header at receive time
  2025-12-05 14:37   ` Eric Dumazet
@ 2025-12-05 15:22     ` Paolo Abeni
  2025-12-05 15:36       ` Eric Dumazet
  0 siblings, 1 reply; 7+ messages in thread
From: Paolo Abeni @ 2025-12-05 15:22 UTC (permalink / raw)
  To: Eric Dumazet
  Cc: netdev, David S. Miller, Jakub Kicinski, Simon Horman,
	Neal Cardwell, Kuniyuki Iwashima, David Ahern

On 12/5/25 3:37 PM, Eric Dumazet wrote:
> On Fri, Dec 5, 2025 at 6:04 AM Paolo Abeni <pabeni@redhat.com> wrote:
>>
>> Currently {tcp,udp}_gro_receive relay on the gro network stage setting
> 
> rely :)
> 
>> the correct transport header offset for all the skbs held by the GRO
>> engine.
>>
>> Such assumption is not necessary, as the code can instead leverage the
>> offset already available for the currently processed skb. Add a couple
>> of helpers to for readabilty' sake.
>>
>> As skb->transport_header lays on a different cacheline wrt skb->data,
>> this should save a cacheline access for each packet aggregation.
>> Additionally this will make the next patch possible.
>>
>> Note that the compiler (gcc 15.2.1) does inline the tcp_gro_lookup()
>> call in tcp_gro_receive(), so the additional argument is only relevant
>> for the fraglist case.
>>
>> Signed-off-by: Paolo Abeni <pabeni@redhat.com>
>> ---
>>  include/net/gro.h        | 26 ++++++++++++++++++++++++++
>>  include/net/tcp.h        |  3 ++-
>>  net/ipv4/tcp_offload.c   | 15 ++++++++-------
>>  net/ipv4/udp_offload.c   |  4 ++--
>>  net/ipv6/tcpv6_offload.c |  2 +-
>>  5 files changed, 39 insertions(+), 11 deletions(-)
>>
>> diff --git a/include/net/gro.h b/include/net/gro.h
>> index b65f631c521d..fdb9285ab117 100644
>> --- a/include/net/gro.h
>> +++ b/include/net/gro.h
>> @@ -420,6 +420,18 @@ struct sk_buff *udp_gro_receive(struct list_head *head, struct sk_buff *skb,
>>                                 struct udphdr *uh, struct sock *sk);
>>  int udp_gro_complete(struct sk_buff *skb, int nhoff, udp_lookup_t lookup);
>>
>> +/* Return the skb hdr corresponding to the specified skb2 hdr.
>> + * skb2 is held in the gro engine, i.e. its headers are in the linear part.
>> + */
>> +static inline const void *
>> +skb_gro_header_from(const struct sk_buff *skb, const struct sk_buff *skb2,
>> +                   const void *hdr2)
>> +{
>> +       size_t offset = (unsigned char *)hdr2 - skb2->data;
>> +
>> +       return skb->data + offset;
>> +}
> 
> I would rather switch gro to pass an @offset instead of a header pointer ?
> 
> Rebuilding one header pointer from offset is fast : skb->data + offset
> ( offset : network header, transport header, ...)

I considered such option and opted for the above for a very small
reason: it produces a little more compact (C) code in the caller.

I'll switch to offset in next revisions.
> As a matter of fact, some GRO state variables could be onstack, instead
> of being stored in NAPI_GRO_CB()
Do you mean the network offsets? In any case, I hope we can keep such
work separate from this one?
> This would avoid some stalls because skb->cb[] has been cleared with
> memset() with long words, while GRO is using smaller fields.Whoops, I never considered store forwarding induced stalls. Something to
ponder about for me.

Many thanks!

Paolo


^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [RFC PATCH 1/2] net: gro: avoid relaying on skb->transport_header at receive time
  2025-12-05 15:22     ` Paolo Abeni
@ 2025-12-05 15:36       ` Eric Dumazet
  0 siblings, 0 replies; 7+ messages in thread
From: Eric Dumazet @ 2025-12-05 15:36 UTC (permalink / raw)
  To: Paolo Abeni
  Cc: netdev, David S. Miller, Jakub Kicinski, Simon Horman,
	Neal Cardwell, Kuniyuki Iwashima, David Ahern

On Fri, Dec 5, 2025 at 7:23 AM Paolo Abeni <pabeni@redhat.com> wrote:
>
> On 12/5/25 3:37 PM, Eric Dumazet wrote:
> > On Fri, Dec 5, 2025 at 6:04 AM Paolo Abeni <pabeni@redhat.com> wrote:
> >>
> >> Currently {tcp,udp}_gro_receive relay on the gro network stage setting
> >
> > rely :)
> >
> >> the correct transport header offset for all the skbs held by the GRO
> >> engine.
> >>
> >> Such assumption is not necessary, as the code can instead leverage the
> >> offset already available for the currently processed skb. Add a couple
> >> of helpers to for readabilty' sake.
> >>
> >> As skb->transport_header lays on a different cacheline wrt skb->data,
> >> this should save a cacheline access for each packet aggregation.
> >> Additionally this will make the next patch possible.
> >>
> >> Note that the compiler (gcc 15.2.1) does inline the tcp_gro_lookup()
> >> call in tcp_gro_receive(), so the additional argument is only relevant
> >> for the fraglist case.
> >>
> >> Signed-off-by: Paolo Abeni <pabeni@redhat.com>
> >> ---
> >>  include/net/gro.h        | 26 ++++++++++++++++++++++++++
> >>  include/net/tcp.h        |  3 ++-
> >>  net/ipv4/tcp_offload.c   | 15 ++++++++-------
> >>  net/ipv4/udp_offload.c   |  4 ++--
> >>  net/ipv6/tcpv6_offload.c |  2 +-
> >>  5 files changed, 39 insertions(+), 11 deletions(-)
> >>
> >> diff --git a/include/net/gro.h b/include/net/gro.h
> >> index b65f631c521d..fdb9285ab117 100644
> >> --- a/include/net/gro.h
> >> +++ b/include/net/gro.h
> >> @@ -420,6 +420,18 @@ struct sk_buff *udp_gro_receive(struct list_head *head, struct sk_buff *skb,
> >>                                 struct udphdr *uh, struct sock *sk);
> >>  int udp_gro_complete(struct sk_buff *skb, int nhoff, udp_lookup_t lookup);
> >>
> >> +/* Return the skb hdr corresponding to the specified skb2 hdr.
> >> + * skb2 is held in the gro engine, i.e. its headers are in the linear part.
> >> + */
> >> +static inline const void *
> >> +skb_gro_header_from(const struct sk_buff *skb, const struct sk_buff *skb2,
> >> +                   const void *hdr2)
> >> +{
> >> +       size_t offset = (unsigned char *)hdr2 - skb2->data;
> >> +
> >> +       return skb->data + offset;
> >> +}
> >
> > I would rather switch gro to pass an @offset instead of a header pointer ?
> >
> > Rebuilding one header pointer from offset is fast : skb->data + offset
> > ( offset : network header, transport header, ...)
>
> I considered such option and opted for the above for a very small
> reason: it produces a little more compact (C) code in the caller.
>
> I'll switch to offset in next revisions.

I am

> > As a matter of fact, some GRO state variables could be onstack, instead
> > of being stored in NAPI_GRO_CB()
> Do you mean the network offsets? In any case, I hope we can keep such
> work separate from this one?

Sure, just some general observation.

BTW the offending memset() can be optimized a bit to not let the
compiler call an external function.
I do not know how to upstream this properly ;)

diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index a00808f7be6a..7df63dc79cf3 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -424,7 +424,13 @@ struct sk_buff *slab_build_skb(void *data)
        if (unlikely(!skb))
                return NULL;

-       memset(skb, 0, offsetof(struct sk_buff, tail));
+       /* Implement memset(skb, 0, offsetof(struct sk_buff, tail)
+        * so that compiler inlines it ;)
+        */
+       memset(skb, 0, 128);
+       barrier();
+       memset((void *)skb + 128, 0, offsetof(struct sk_buff, tail) - 128);
+
        data = __slab_build_skb(data, &size);
        __finalize_skb_around(skb, data, size);

^ permalink raw reply related	[flat|nested] 7+ messages in thread

* Re: [RFC PATCH 1/2] net: gro: avoid relaying on skb->transport_header at receive time
  2025-12-05 14:03 ` [RFC PATCH 1/2] net: gro: avoid relaying on skb->transport_header at receive time Paolo Abeni
  2025-12-05 14:37   ` Eric Dumazet
@ 2025-12-06 21:26   ` Willem de Bruijn
  1 sibling, 0 replies; 7+ messages in thread
From: Willem de Bruijn @ 2025-12-06 21:26 UTC (permalink / raw)
  To: Paolo Abeni, netdev
  Cc: David S. Miller, Eric Dumazet, Jakub Kicinski, Simon Horman,
	Neal Cardwell, Kuniyuki Iwashima, David Ahern

Paolo Abeni wrote:
> Currently {tcp,udp}_gro_receive relay on the gro network stage setting
> the correct transport header offset for all the skbs held by the GRO
> engine.
> 
> Such assumption is not necessary, as the code can instead leverage the
> offset already available for the currently processed skb. Add a couple
> of helpers to for readabilty' sake.
> 
> As skb->transport_header lays on a different cacheline wrt skb->data,
> this should save a cacheline access for each packet aggregation.
> Additionally this will make the next patch possible.
> 
> Note that the compiler (gcc 15.2.1) does inline the tcp_gro_lookup()
> call in tcp_gro_receive(), so the additional argument is only relevant
> for the fraglist case.
> 
> Signed-off-by: Paolo Abeni <pabeni@redhat.com>
> ---
>  include/net/gro.h        | 26 ++++++++++++++++++++++++++
>  include/net/tcp.h        |  3 ++-
>  net/ipv4/tcp_offload.c   | 15 ++++++++-------
>  net/ipv4/udp_offload.c   |  4 ++--
>  net/ipv6/tcpv6_offload.c |  2 +-
>  5 files changed, 39 insertions(+), 11 deletions(-)
> 
> diff --git a/include/net/gro.h b/include/net/gro.h
> index b65f631c521d..fdb9285ab117 100644
> --- a/include/net/gro.h
> +++ b/include/net/gro.h
> @@ -420,6 +420,18 @@ struct sk_buff *udp_gro_receive(struct list_head *head, struct sk_buff *skb,
>  				struct udphdr *uh, struct sock *sk);
>  int udp_gro_complete(struct sk_buff *skb, int nhoff, udp_lookup_t lookup);
>  
> +/* Return the skb hdr corresponding to the specified skb2 hdr.
> + * skb2 is held in the gro engine, i.e. its headers are in the linear part.

I thought "being held in the gro engine" intended to mean behing held
on the gro_list, i.e., p.

But this is used inverse, where skb2 is the currently arriving packet
and skb == p. Is this intentional and am I just misunderstanding the
intent of this comment? Or is the comment intended to say

"skb is held on the gro list, therefore [..]"

> + */
> +static inline const void *
> +skb_gro_header_from(const struct sk_buff *skb, const struct sk_buff *skb2,
> +		    const void *hdr2)
> +{
> +	size_t offset = (unsigned char *)hdr2 - skb2->data;
> +
> +	return skb->data + offset;
> +}

^ permalink raw reply	[flat|nested] 7+ messages in thread

end of thread, other threads:[~2025-12-06 21:26 UTC | newest]

Thread overview: 7+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2025-12-05 14:03 [RFC PATCH 0/2] net: gro: avoid touching transport header Paolo Abeni
2025-12-05 14:03 ` [RFC PATCH 1/2] net: gro: avoid relaying on skb->transport_header at receive time Paolo Abeni
2025-12-05 14:37   ` Eric Dumazet
2025-12-05 15:22     ` Paolo Abeni
2025-12-05 15:36       ` Eric Dumazet
2025-12-06 21:26   ` Willem de Bruijn
2025-12-05 14:03 ` [RFC PATCH 2/2] net: gro: set the transport header later Paolo Abeni

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).