* [PATCH net-next] tcp: drop tcp_v{4,6}_restore_cb
@ 2025-04-10 16:16 Stanislav Fomichev
2025-04-10 16:24 ` Eric Dumazet
0 siblings, 1 reply; 4+ messages in thread
From: Stanislav Fomichev @ 2025-04-10 16:16 UTC (permalink / raw)
To: netdev
Cc: davem, edumazet, kuba, pabeni, ncardwell, kuniyu, horms, dsahern,
linux-kernel
Instead of moving and restoring IP[6]CB, reorder tcp_skb_cb
to alias with inet[6]_skb_parm. Add static asserts to make
sure tcp_skb_cb fits into skb.cb and that inet[6]_skb_parm is
at the proper offset.
Signed-off-by: Stanislav Fomichev <sdf@fomichev.me>
---
include/net/tcp.h | 46 +++++++++++++++++++++++++--------------------
net/ipv4/tcp_ipv4.c | 16 ----------------
net/ipv6/tcp_ipv6.c | 25 ------------------------
3 files changed, 26 insertions(+), 61 deletions(-)
diff --git a/include/net/tcp.h b/include/net/tcp.h
index 4450c384ef17..e80fd505f139 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -1010,6 +1010,27 @@ enum tcp_skb_cb_sacked_flags {
* If this grows please adjust skbuff.h:skbuff->cb[xxx] size appropriately.
*/
struct tcp_skb_cb {
+ union {
+ struct {
+#define TCPCB_DELIVERED_CE_MASK ((1U<<20) - 1)
+ /* There is space for up to 24 bytes */
+ __u32 is_app_limited:1, /* cwnd not fully used? */
+ delivered_ce:20,
+ unused:11;
+ /* pkts S/ACKed so far upon tx of skb, incl retrans: */
+ __u32 delivered;
+ /* start of send pipeline phase */
+ u64 first_tx_mstamp;
+ /* when we reached the "delivered" count */
+ u64 delivered_mstamp;
+ } tx; /* only used for outgoing skbs */
+ union {
+ struct inet_skb_parm h4;
+#if IS_ENABLED(CONFIG_IPV6)
+ struct inet6_skb_parm h6;
+#endif
+ } header; /* For incoming skbs */
+ };
__u32 seq; /* Starting sequence number */
__u32 end_seq; /* SEQ + FIN + SYN + datalen */
union {
@@ -1033,28 +1054,13 @@ struct tcp_skb_cb {
has_rxtstamp:1, /* SKB has a RX timestamp */
unused:4;
__u32 ack_seq; /* Sequence number ACK'd */
- union {
- struct {
-#define TCPCB_DELIVERED_CE_MASK ((1U<<20) - 1)
- /* There is space for up to 24 bytes */
- __u32 is_app_limited:1, /* cwnd not fully used? */
- delivered_ce:20,
- unused:11;
- /* pkts S/ACKed so far upon tx of skb, incl retrans: */
- __u32 delivered;
- /* start of send pipeline phase */
- u64 first_tx_mstamp;
- /* when we reached the "delivered" count */
- u64 delivered_mstamp;
- } tx; /* only used for outgoing skbs */
- union {
- struct inet_skb_parm h4;
+};
+
+static_assert(sizeof(struct tcp_skb_cb) <= sizeof_field(struct sk_buff, cb));
+static_assert(offsetof(struct tcp_skb_cb, header.h4) == 0);
#if IS_ENABLED(CONFIG_IPV6)
- struct inet6_skb_parm h6;
+static_assert(offsetof(struct tcp_skb_cb, header.h6) == 0);
#endif
- } header; /* For incoming skbs */
- };
-};
#define TCP_SKB_CB(__skb) ((struct tcp_skb_cb *)&((__skb)->cb[0]))
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 8cce0d5489da..9654f663fd0d 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -2153,22 +2153,9 @@ int tcp_filter(struct sock *sk, struct sk_buff *skb)
}
EXPORT_IPV6_MOD(tcp_filter);
-static void tcp_v4_restore_cb(struct sk_buff *skb)
-{
- memmove(IPCB(skb), &TCP_SKB_CB(skb)->header.h4,
- sizeof(struct inet_skb_parm));
-}
-
static void tcp_v4_fill_cb(struct sk_buff *skb, const struct iphdr *iph,
const struct tcphdr *th)
{
- /* This is tricky : We move IPCB at its correct location into TCP_SKB_CB()
- * barrier() makes sure compiler wont play fool^Waliasing games.
- */
- memmove(&TCP_SKB_CB(skb)->header.h4, IPCB(skb),
- sizeof(struct inet_skb_parm));
- barrier();
-
TCP_SKB_CB(skb)->seq = ntohl(th->seq);
TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin +
skb->len - th->doff * 4);
@@ -2293,7 +2280,6 @@ int tcp_v4_rcv(struct sk_buff *skb)
* Try to feed this packet to this socket
* instead of discarding it.
*/
- tcp_v4_restore_cb(skb);
sock_put(sk);
goto lookup;
}
@@ -2302,7 +2288,6 @@ int tcp_v4_rcv(struct sk_buff *skb)
nf_reset_ct(skb);
if (nsk == sk) {
reqsk_put(req);
- tcp_v4_restore_cb(skb);
} else {
drop_reason = tcp_child_process(sk, nsk, skb);
if (drop_reason) {
@@ -2430,7 +2415,6 @@ int tcp_v4_rcv(struct sk_buff *skb)
if (sk2) {
inet_twsk_deschedule_put(inet_twsk(sk));
sk = sk2;
- tcp_v4_restore_cb(skb);
refcounted = false;
__this_cpu_write(tcp_tw_isn, isn);
goto process;
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index b03c223eda4f..f7734ba7f3e6 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -1342,16 +1342,6 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
return 0; /* don't send reset */
}
-static void tcp_v6_restore_cb(struct sk_buff *skb)
-{
- /* We need to move header back to the beginning if xfrm6_policy_check()
- * and tcp_v6_fill_cb() are going to be called again.
- * ip6_datagram_recv_specific_ctl() also expects IP6CB to be there.
- */
- memmove(IP6CB(skb), &TCP_SKB_CB(skb)->header.h6,
- sizeof(struct inet6_skb_parm));
-}
-
static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff *skb,
struct request_sock *req,
struct dst_entry *dst,
@@ -1552,8 +1542,6 @@ static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff *
newnp->pktoptions = skb_clone_and_charge_r(ireq->pktopts, newsk);
consume_skb(ireq->pktopts);
ireq->pktopts = NULL;
- if (newnp->pktoptions)
- tcp_v6_restore_cb(newnp->pktoptions);
}
} else {
if (!req_unhash && found_dup_sk) {
@@ -1710,7 +1698,6 @@ int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)
if (inet6_test_bit(REPFLOW, sk))
np->flow_label = ip6_flowlabel(ipv6_hdr(opt_skb));
if (ipv6_opt_accepted(sk, opt_skb, &TCP_SKB_CB(opt_skb)->header.h6)) {
- tcp_v6_restore_cb(opt_skb);
opt_skb = xchg(&np->pktoptions, opt_skb);
} else {
__kfree_skb(opt_skb);
@@ -1725,15 +1712,6 @@ int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)
static void tcp_v6_fill_cb(struct sk_buff *skb, const struct ipv6hdr *hdr,
const struct tcphdr *th)
{
- /* This is tricky: we move IP6CB at its correct location into
- * TCP_SKB_CB(). It must be done after xfrm6_policy_check(), because
- * _decode_session6() uses IP6CB().
- * barrier() makes sure compiler won't play aliasing games.
- */
- memmove(&TCP_SKB_CB(skb)->header.h6, IP6CB(skb),
- sizeof(struct inet6_skb_parm));
- barrier();
-
TCP_SKB_CB(skb)->seq = ntohl(th->seq);
TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin +
skb->len - th->doff*4);
@@ -1849,7 +1827,6 @@ INDIRECT_CALLABLE_SCOPE int tcp_v6_rcv(struct sk_buff *skb)
* Try to feed this packet to this socket
* instead of discarding it.
*/
- tcp_v6_restore_cb(skb);
sock_put(sk);
goto lookup;
}
@@ -1858,7 +1835,6 @@ INDIRECT_CALLABLE_SCOPE int tcp_v6_rcv(struct sk_buff *skb)
nf_reset_ct(skb);
if (nsk == sk) {
reqsk_put(req);
- tcp_v6_restore_cb(skb);
} else {
drop_reason = tcp_child_process(sk, nsk, skb);
if (drop_reason) {
@@ -1987,7 +1963,6 @@ INDIRECT_CALLABLE_SCOPE int tcp_v6_rcv(struct sk_buff *skb)
struct inet_timewait_sock *tw = inet_twsk(sk);
inet_twsk_deschedule_put(tw);
sk = sk2;
- tcp_v6_restore_cb(skb);
refcounted = false;
__this_cpu_write(tcp_tw_isn, isn);
goto process;
--
2.49.0
^ permalink raw reply related [flat|nested] 4+ messages in thread* Re: [PATCH net-next] tcp: drop tcp_v{4,6}_restore_cb
2025-04-10 16:16 [PATCH net-next] tcp: drop tcp_v{4,6}_restore_cb Stanislav Fomichev
@ 2025-04-10 16:24 ` Eric Dumazet
2025-04-10 16:45 ` Stanislav Fomichev
0 siblings, 1 reply; 4+ messages in thread
From: Eric Dumazet @ 2025-04-10 16:24 UTC (permalink / raw)
To: Stanislav Fomichev
Cc: netdev, davem, kuba, pabeni, ncardwell, kuniyu, horms, dsahern,
linux-kernel
On Thu, Apr 10, 2025 at 6:16 PM Stanislav Fomichev <sdf@fomichev.me> wrote:
>
> Instead of moving and restoring IP[6]CB, reorder tcp_skb_cb
> to alias with inet[6]_skb_parm. Add static asserts to make
> sure tcp_skb_cb fits into skb.cb and that inet[6]_skb_parm is
> at the proper offset.
May I ask : why ?
I think you are simply reverting 971f10eca18 ("tcp: better TCP_SKB_CB
layout to reduce cache line misses")
without any performance measurements.
^ permalink raw reply [flat|nested] 4+ messages in thread* Re: [PATCH net-next] tcp: drop tcp_v{4,6}_restore_cb
2025-04-10 16:24 ` Eric Dumazet
@ 2025-04-10 16:45 ` Stanislav Fomichev
2025-04-10 16:58 ` Eric Dumazet
0 siblings, 1 reply; 4+ messages in thread
From: Stanislav Fomichev @ 2025-04-10 16:45 UTC (permalink / raw)
To: Eric Dumazet
Cc: Stanislav Fomichev, netdev, davem, kuba, pabeni, ncardwell,
kuniyu, horms, dsahern, linux-kernel
On 04/10, Eric Dumazet wrote:
> On Thu, Apr 10, 2025 at 6:16 PM Stanislav Fomichev <sdf@fomichev.me> wrote:
> >
> > Instead of moving and restoring IP[6]CB, reorder tcp_skb_cb
> > to alias with inet[6]_skb_parm. Add static asserts to make
> > sure tcp_skb_cb fits into skb.cb and that inet[6]_skb_parm is
> > at the proper offset.
>
> May I ask : why ?
>
> I think you are simply reverting 971f10eca18 ("tcp: better TCP_SKB_CB
> layout to reduce cache line misses")
> without any performance measurements.
Oh, wow, I did not go that far back into the history, thanks for the
pointer! Let me see if there is any perf impact form this...
^ permalink raw reply [flat|nested] 4+ messages in thread* Re: [PATCH net-next] tcp: drop tcp_v{4,6}_restore_cb
2025-04-10 16:45 ` Stanislav Fomichev
@ 2025-04-10 16:58 ` Eric Dumazet
0 siblings, 0 replies; 4+ messages in thread
From: Eric Dumazet @ 2025-04-10 16:58 UTC (permalink / raw)
To: Stanislav Fomichev
Cc: Stanislav Fomichev, netdev, davem, kuba, pabeni, ncardwell,
kuniyu, horms, dsahern, linux-kernel
On Thu, Apr 10, 2025 at 6:45 PM Stanislav Fomichev <stfomichev@gmail.com> wrote:
>
> On 04/10, Eric Dumazet wrote:
> > On Thu, Apr 10, 2025 at 6:16 PM Stanislav Fomichev <sdf@fomichev.me> wrote:
> > >
> > > Instead of moving and restoring IP[6]CB, reorder tcp_skb_cb
> > > to alias with inet[6]_skb_parm. Add static asserts to make
> > > sure tcp_skb_cb fits into skb.cb and that inet[6]_skb_parm is
> > > at the proper offset.
> >
> > May I ask : why ?
> >
> > I think you are simply reverting 971f10eca18 ("tcp: better TCP_SKB_CB
> > layout to reduce cache line misses")
> > without any performance measurements.
>
> Oh, wow, I did not go that far back into the history, thanks for the
> pointer! Let me see if there is any perf impact form this...
To be fair, we now have RB-tree for the out of order queue, we no
longer of O(N) costs
when trying to insert an skb in this queue. Also we try to coalesce
skbs together.
Tests would require thousands of skbs in the out-of-order queue.
Think of long-distance flows (rtt > 100ms), and big tcp_rmem[] and
tcp_wmem[] limits for the sender/receiver.
^ permalink raw reply [flat|nested] 4+ messages in thread
end of thread, other threads:[~2025-04-10 16:58 UTC | newest]
Thread overview: 4+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2025-04-10 16:16 [PATCH net-next] tcp: drop tcp_v{4,6}_restore_cb Stanislav Fomichev
2025-04-10 16:24 ` Eric Dumazet
2025-04-10 16:45 ` Stanislav Fomichev
2025-04-10 16:58 ` Eric Dumazet
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).