* [PATCH v3 net-next 0/3] tcp: better TCP_SKB_CB layout
@ 2014-09-25 20:14 Eric Dumazet
2014-09-25 20:14 ` [PATCH v3 net-next 1/3] ipv4: rename ip_options_echo to __ip_options_echo() Eric Dumazet
` (2 more replies)
0 siblings, 3 replies; 11+ messages in thread
From: Eric Dumazet @ 2014-09-25 20:14 UTC (permalink / raw)
To: David S. Miller
Cc: netdev, Yuchung Cheng, Neal Cardwell, Christoph Paasch,
Eric Dumazet
TCP had the assumption that IPCB and IP6CB are first members of skb->cb[]
This is fine, except that IPCB/IP6CB are used in TCP for a very short time
in input path.
What really matters for TCP stack is to get skb->next,
TCP_SKB_CB(skb)->seq, and TCP_SKB_CB(skb)->end_seq in the same cache line.
skb that are immediately consumed do not care because whole skb->cb[] is
hot in cpu cache, while skb that sit in wocket write queue or receive queues
do not need TCP_SKB_CB(skb)->header at all.
This patch set implements the prereq for IPv4, IPv6, and TCP to make this
possible. This makes TCP more efficient.
Eric Dumazet (3):
ipv4: rename ip_options_echo to __ip_options_echo()
ipv6: add a struct inet6_skb_parm param to ipv6_opt_accepted()
tcp: better TCP_SKB_CB layout to reduce cache line misses
include/net/ip.h | 15 ++++++++++++---
include/net/ipv6.h | 3 ++-
include/net/tcp.h | 12 ++++++------
net/dccp/ipv6.c | 2 +-
net/ipv4/ip_options.c | 6 ++----
net/ipv4/ip_output.c | 8 +++++---
net/ipv4/tcp_ipv4.c | 29 ++++++++++++++++++-----------
net/ipv4/tcp_output.c | 5 +++++
net/ipv6/af_inet6.c | 4 ++--
net/ipv6/syncookies.c | 2 +-
net/ipv6/tcp_ipv6.c | 12 ++++++++++--
11 files changed, 64 insertions(+), 34 deletions(-)
--
2.1.0.rc2.206.gedb03e5
^ permalink raw reply [flat|nested] 11+ messages in thread
* [PATCH v3 net-next 1/3] ipv4: rename ip_options_echo to __ip_options_echo()
2014-09-25 20:14 [PATCH v3 net-next 0/3] tcp: better TCP_SKB_CB layout Eric Dumazet
@ 2014-09-25 20:14 ` Eric Dumazet
2014-09-25 20:30 ` Joe Perches
2014-09-25 20:14 ` [PATCH v3 net-next 2/3] ipv6: add a struct inet6_skb_parm param to ipv6_opt_accepted() Eric Dumazet
2014-09-25 20:14 ` [PATCH v3 net-next 3/3] tcp: better TCP_SKB_CB layout to reduce cache line misses Eric Dumazet
2 siblings, 1 reply; 11+ messages in thread
From: Eric Dumazet @ 2014-09-25 20:14 UTC (permalink / raw)
To: David S. Miller
Cc: netdev, Yuchung Cheng, Neal Cardwell, Christoph Paasch,
Eric Dumazet
ip_options_echo() assumes struct ip_options is provided in &IPCB(skb)->opt
Lets break this assumption, but provide a helper to not change all call points.
ip_send_unicast_reply() gets a new struct ip_options pointer.
Signed-off-by: Eric Dumazet <edumazet@google.com>
---
include/net/ip.h | 15 ++++++++++++---
net/ipv4/ip_options.c | 6 ++----
net/ipv4/ip_output.c | 8 +++++---
net/ipv4/tcp_ipv4.c | 10 ++++++----
4 files changed, 25 insertions(+), 14 deletions(-)
diff --git a/include/net/ip.h b/include/net/ip.h
index fcd9068fb8c3..0bb620702929 100644
--- a/include/net/ip.h
+++ b/include/net/ip.h
@@ -180,8 +180,10 @@ static inline __u8 ip_reply_arg_flowi_flags(const struct ip_reply_arg *arg)
return (arg->flags & IP_REPLY_ARG_NOSRCCHECK) ? FLOWI_FLAG_ANYSRC : 0;
}
-void ip_send_unicast_reply(struct net *net, struct sk_buff *skb, __be32 daddr,
- __be32 saddr, const struct ip_reply_arg *arg,
+void ip_send_unicast_reply(struct net *net, struct sk_buff *skb,
+ const struct ip_options *sopt,
+ __be32 daddr, __be32 saddr,
+ const struct ip_reply_arg *arg,
unsigned int len);
#define IP_INC_STATS(net, field) SNMP_INC_STATS64((net)->mib.ip_statistics, field)
@@ -511,7 +513,14 @@ int ip_forward(struct sk_buff *skb);
void ip_options_build(struct sk_buff *skb, struct ip_options *opt,
__be32 daddr, struct rtable *rt, int is_frag);
-int ip_options_echo(struct ip_options *dopt, struct sk_buff *skb);
+
+int __ip_options_echo(struct ip_options *dopt, struct sk_buff *skb,
+ const struct ip_options *sopt);
+static inline int ip_options_echo(struct ip_options *dopt, struct sk_buff *skb)
+{
+ return __ip_options_echo(dopt, skb, &IPCB(skb)->opt);
+}
+
void ip_options_fragment(struct sk_buff *skb);
int ip_options_compile(struct net *net, struct ip_options *opt,
struct sk_buff *skb);
diff --git a/net/ipv4/ip_options.c b/net/ipv4/ip_options.c
index ad382499bace..5b3d91be2db0 100644
--- a/net/ipv4/ip_options.c
+++ b/net/ipv4/ip_options.c
@@ -87,17 +87,15 @@ void ip_options_build(struct sk_buff *skb, struct ip_options *opt,
* NOTE: dopt cannot point to skb.
*/
-int ip_options_echo(struct ip_options *dopt, struct sk_buff *skb)
+int __ip_options_echo(struct ip_options *dopt, struct sk_buff *skb,
+ const struct ip_options *sopt)
{
- const struct ip_options *sopt;
unsigned char *sptr, *dptr;
int soffset, doffset;
int optlen;
memset(dopt, 0, sizeof(struct ip_options));
- sopt = &(IPCB(skb)->opt);
-
if (sopt->optlen == 0)
return 0;
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index 215af2b155cb..c8fa62476461 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -1522,8 +1522,10 @@ static DEFINE_PER_CPU(struct inet_sock, unicast_sock) = {
.uc_ttl = -1,
};
-void ip_send_unicast_reply(struct net *net, struct sk_buff *skb, __be32 daddr,
- __be32 saddr, const struct ip_reply_arg *arg,
+void ip_send_unicast_reply(struct net *net, struct sk_buff *skb,
+ const struct ip_options *sopt,
+ __be32 daddr, __be32 saddr,
+ const struct ip_reply_arg *arg,
unsigned int len)
{
struct ip_options_data replyopts;
@@ -1534,7 +1536,7 @@ void ip_send_unicast_reply(struct net *net, struct sk_buff *skb, __be32 daddr,
struct sock *sk;
struct inet_sock *inet;
- if (ip_options_echo(&replyopts.opt.opt, skb))
+ if (__ip_options_echo(&replyopts.opt.opt, skb, sopt))
return;
ipc.addr = daddr;
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 3b2e49cb2b61..28ab90382c01 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -681,8 +681,9 @@ static void tcp_v4_send_reset(struct sock *sk, struct sk_buff *skb)
net = dev_net(skb_dst(skb)->dev);
arg.tos = ip_hdr(skb)->tos;
- ip_send_unicast_reply(net, skb, ip_hdr(skb)->saddr,
- ip_hdr(skb)->daddr, &arg, arg.iov[0].iov_len);
+ ip_send_unicast_reply(net, skb, &TCP_SKB_CB(skb)->header.h4.opt,
+ ip_hdr(skb)->saddr, ip_hdr(skb)->daddr,
+ &arg, arg.iov[0].iov_len);
TCP_INC_STATS_BH(net, TCP_MIB_OUTSEGS);
TCP_INC_STATS_BH(net, TCP_MIB_OUTRSTS);
@@ -764,8 +765,9 @@ static void tcp_v4_send_ack(struct sk_buff *skb, u32 seq, u32 ack,
if (oif)
arg.bound_dev_if = oif;
arg.tos = tos;
- ip_send_unicast_reply(net, skb, ip_hdr(skb)->saddr,
- ip_hdr(skb)->daddr, &arg, arg.iov[0].iov_len);
+ ip_send_unicast_reply(net, skb, &TCP_SKB_CB(skb)->header.h4.opt,
+ ip_hdr(skb)->saddr, ip_hdr(skb)->daddr,
+ &arg, arg.iov[0].iov_len);
TCP_INC_STATS_BH(net, TCP_MIB_OUTSEGS);
}
--
2.1.0.rc2.206.gedb03e5
^ permalink raw reply related [flat|nested] 11+ messages in thread
* [PATCH v3 net-next 2/3] ipv6: add a struct inet6_skb_parm param to ipv6_opt_accepted()
2014-09-25 20:14 [PATCH v3 net-next 0/3] tcp: better TCP_SKB_CB layout Eric Dumazet
2014-09-25 20:14 ` [PATCH v3 net-next 1/3] ipv4: rename ip_options_echo to __ip_options_echo() Eric Dumazet
@ 2014-09-25 20:14 ` Eric Dumazet
2014-09-25 20:14 ` [PATCH v3 net-next 3/3] tcp: better TCP_SKB_CB layout to reduce cache line misses Eric Dumazet
2 siblings, 0 replies; 11+ messages in thread
From: Eric Dumazet @ 2014-09-25 20:14 UTC (permalink / raw)
To: David S. Miller
Cc: netdev, Yuchung Cheng, Neal Cardwell, Christoph Paasch,
Eric Dumazet
ipv6_opt_accepted() assumes IP6CB(skb) holds the struct inet6_skb_parm
that it needs. Lets not assume this, as TCP stack might use a different
place.
Signed-off-by: Eric Dumazet <edumazet@google.com>
---
include/net/ipv6.h | 3 ++-
net/dccp/ipv6.c | 2 +-
net/ipv6/af_inet6.c | 4 ++--
net/ipv6/syncookies.c | 2 +-
net/ipv6/tcp_ipv6.c | 5 +++--
5 files changed, 9 insertions(+), 7 deletions(-)
diff --git a/include/net/ipv6.h b/include/net/ipv6.h
index 7e247e9b8765..97f472012438 100644
--- a/include/net/ipv6.h
+++ b/include/net/ipv6.h
@@ -288,7 +288,8 @@ struct ipv6_txoptions *ipv6_renew_options(struct sock *sk,
struct ipv6_txoptions *ipv6_fixup_options(struct ipv6_txoptions *opt_space,
struct ipv6_txoptions *opt);
-bool ipv6_opt_accepted(const struct sock *sk, const struct sk_buff *skb);
+bool ipv6_opt_accepted(const struct sock *sk, const struct sk_buff *skb,
+ const struct inet6_skb_parm *opt);
static inline bool ipv6_accept_ra(struct inet6_dev *idev)
{
diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c
index 04cb17d4b0ce..ad2acfe1ca61 100644
--- a/net/dccp/ipv6.c
+++ b/net/dccp/ipv6.c
@@ -404,7 +404,7 @@ static int dccp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
ireq->ir_v6_rmt_addr = ipv6_hdr(skb)->saddr;
ireq->ir_v6_loc_addr = ipv6_hdr(skb)->daddr;
- if (ipv6_opt_accepted(sk, skb) ||
+ if (ipv6_opt_accepted(sk, skb, IP6CB(skb)) ||
np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo ||
np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim) {
atomic_inc(&skb->users);
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index e4865a3ebe1d..34f726f59814 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -672,10 +672,10 @@ int inet6_sk_rebuild_header(struct sock *sk)
}
EXPORT_SYMBOL_GPL(inet6_sk_rebuild_header);
-bool ipv6_opt_accepted(const struct sock *sk, const struct sk_buff *skb)
+bool ipv6_opt_accepted(const struct sock *sk, const struct sk_buff *skb,
+ const struct inet6_skb_parm *opt)
{
const struct ipv6_pinfo *np = inet6_sk(sk);
- const struct inet6_skb_parm *opt = IP6CB(skb);
if (np->rxopt.all) {
if ((opt->hop && (np->rxopt.bits.hopopts ||
diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c
index c643dc907ce7..9a2838e93cc5 100644
--- a/net/ipv6/syncookies.c
+++ b/net/ipv6/syncookies.c
@@ -203,7 +203,7 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb)
ireq->ir_num = ntohs(th->dest);
ireq->ir_v6_rmt_addr = ipv6_hdr(skb)->saddr;
ireq->ir_v6_loc_addr = ipv6_hdr(skb)->daddr;
- if (ipv6_opt_accepted(sk, skb) ||
+ if (ipv6_opt_accepted(sk, skb, &TCP_SKB_CB(skb)->header.h6) ||
np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo ||
np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim) {
atomic_inc(&skb->users);
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index de51a88bec6f..9400b4326f22 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -742,7 +742,8 @@ static void tcp_v6_init_req(struct request_sock *req, struct sock *sk,
ireq->ir_iif = inet6_iif(skb);
if (!TCP_SKB_CB(skb)->tcp_tw_isn &&
- (ipv6_opt_accepted(sk, skb) || np->rxopt.bits.rxinfo ||
+ (ipv6_opt_accepted(sk, skb, &TCP_SKB_CB(skb)->header.h6) ||
+ np->rxopt.bits.rxinfo ||
np->rxopt.bits.rxoinfo || np->rxopt.bits.rxhlim ||
np->rxopt.bits.rxohlim || np->repflow)) {
atomic_inc(&skb->users);
@@ -1367,7 +1368,7 @@ ipv6_pktoptions:
np->rcv_flowinfo = ip6_flowinfo(ipv6_hdr(opt_skb));
if (np->repflow)
np->flow_label = ip6_flowlabel(ipv6_hdr(opt_skb));
- if (ipv6_opt_accepted(sk, opt_skb)) {
+ if (ipv6_opt_accepted(sk, opt_skb, &TCP_SKB_CB(opt_skb)->header.h6)) {
skb_set_owner_r(opt_skb, sk);
opt_skb = xchg(&np->pktoptions, opt_skb);
} else {
--
2.1.0.rc2.206.gedb03e5
^ permalink raw reply related [flat|nested] 11+ messages in thread
* [PATCH v3 net-next 3/3] tcp: better TCP_SKB_CB layout to reduce cache line misses
2014-09-25 20:14 [PATCH v3 net-next 0/3] tcp: better TCP_SKB_CB layout Eric Dumazet
2014-09-25 20:14 ` [PATCH v3 net-next 1/3] ipv4: rename ip_options_echo to __ip_options_echo() Eric Dumazet
2014-09-25 20:14 ` [PATCH v3 net-next 2/3] ipv6: add a struct inet6_skb_parm param to ipv6_opt_accepted() Eric Dumazet
@ 2014-09-25 20:14 ` Eric Dumazet
2014-09-27 16:46 ` Eric Dumazet
2 siblings, 1 reply; 11+ messages in thread
From: Eric Dumazet @ 2014-09-25 20:14 UTC (permalink / raw)
To: David S. Miller
Cc: netdev, Yuchung Cheng, Neal Cardwell, Christoph Paasch,
Eric Dumazet
TCP maintains lists of skb in write queue, and in receive queues
(in order and out of order queues)
Scanning these lists both in input and output path usually requires
access to skb->next, TCP_SKB_CB(skb)->seq, and TCP_SKB_CB(skb)->end_seq
These fields are currently in two different cache lines, meaning we
waste lot of memory bandwidth when these queues are big and flows
have either packet drops or packet reorders.
We can move TCP_SKB_CB(skb)->header at the end of TCP_SKB_CB, because
this header is not used in fast path. This allows TCP to search much faster
in the skb lists.
Even with regular flows, we save one cache line miss in fast path.
Thanks to Christoph Paasch for noticing we need to cleanup
skb->cb[] (IPCB/IP6CB) before entering IP stack in tx path,
and that I forgot IPCB use in tcp_v4_hnd_req() and tcp_v4_save_options().
Signed-off-by: Eric Dumazet <edumazet@google.com>
---
include/net/tcp.h | 12 ++++++------
net/ipv4/tcp_ipv4.c | 19 ++++++++++++-------
net/ipv4/tcp_output.c | 5 +++++
net/ipv6/tcp_ipv6.c | 7 +++++++
4 files changed, 30 insertions(+), 13 deletions(-)
diff --git a/include/net/tcp.h b/include/net/tcp.h
index a4201ef216e8..4dc6641ee990 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -696,12 +696,6 @@ static inline u32 tcp_skb_timestamp(const struct sk_buff *skb)
* If this grows please adjust skbuff.h:skbuff->cb[xxx] size appropriately.
*/
struct tcp_skb_cb {
- union {
- struct inet_skb_parm h4;
-#if IS_ENABLED(CONFIG_IPV6)
- struct inet6_skb_parm h6;
-#endif
- } header; /* For incoming frames */
__u32 seq; /* Starting sequence number */
__u32 end_seq; /* SEQ + FIN + SYN + datalen */
__u32 tcp_tw_isn; /* isn chosen by tcp_timewait_state_process() */
@@ -720,6 +714,12 @@ struct tcp_skb_cb {
__u8 ip_dsfield; /* IPv4 tos or IPv6 dsfield */
/* 1 byte hole */
__u32 ack_seq; /* Sequence number ACK'd */
+ union {
+ struct inet_skb_parm h4;
+#if IS_ENABLED(CONFIG_IPV6)
+ struct inet6_skb_parm h6;
+#endif
+ } header; /* For incoming frames */
};
#define TCP_SKB_CB(__skb) ((struct tcp_skb_cb *)&((__skb)->cb[0]))
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 28ab90382c01..4af25fa4c059 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -886,18 +886,16 @@ EXPORT_SYMBOL(tcp_syn_flood_action);
*/
static struct ip_options_rcu *tcp_v4_save_options(struct sk_buff *skb)
{
- const struct ip_options *opt = &(IPCB(skb)->opt);
+ const struct ip_options *opt = &TCP_SKB_CB(skb)->header.h4;
struct ip_options_rcu *dopt = NULL;
if (opt && opt->optlen) {
int opt_size = sizeof(*dopt) + opt->optlen;
dopt = kmalloc(opt_size, GFP_ATOMIC);
- if (dopt) {
- if (ip_options_echo(&dopt->opt, skb)) {
- kfree(dopt);
- dopt = NULL;
- }
+ if (dopt && __ip_options_echo(&dopt->opt, skb, opt)) {
+ kfree(dopt);
+ dopt = NULL;
}
}
return dopt;
@@ -1431,7 +1429,7 @@ static struct sock *tcp_v4_hnd_req(struct sock *sk, struct sk_buff *skb)
#ifdef CONFIG_SYN_COOKIES
if (!th->syn)
- sk = cookie_v4_check(sk, skb, &(IPCB(skb)->opt));
+ sk = cookie_v4_check(sk, skb, &TCP_SKB_CB(skb)->header.h4);
#endif
return sk;
}
@@ -1636,6 +1634,13 @@ int tcp_v4_rcv(struct sk_buff *skb)
th = tcp_hdr(skb);
iph = ip_hdr(skb);
+ /* This is tricky : We move IPCB at its correct location into TCP_SKB_CB()
+ * barrier() makes sure compiler wont play fool^Waliasing games.
+ */
+ memmove(&TCP_SKB_CB(skb)->header.h4, IPCB(skb),
+ sizeof(struct inet_skb_parm));
+ barrier();
+
TCP_SKB_CB(skb)->seq = ntohl(th->seq);
TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin +
skb->len - th->doff * 4);
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 8c61a7c0c889..6c7949cafd99 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -974,6 +974,11 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
/* Our usage of tstamp should remain private */
skb->tstamp.tv64 = 0;
+
+ /* Cleanup our debris for IP stacks */
+ memset(skb->cb, 0, max(sizeof(struct inet_skb_parm),
+ sizeof(struct inet6_skb_parm)));
+
err = icsk->icsk_af_ops->queue_xmit(sk, skb, &inet->cork.fl);
if (likely(err <= 0))
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 9400b4326f22..132bac137aed 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -1412,6 +1412,13 @@ static int tcp_v6_rcv(struct sk_buff *skb)
th = tcp_hdr(skb);
hdr = ipv6_hdr(skb);
+ /* This is tricky : We move IPCB at its correct location into TCP_SKB_CB()
+ * barrier() makes sure compiler wont play fool^Waliasing games.
+ */
+ memmove(&TCP_SKB_CB(skb)->header.h6, IP6CB(skb),
+ sizeof(struct inet6_skb_parm));
+ barrier();
+
TCP_SKB_CB(skb)->seq = ntohl(th->seq);
TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin +
skb->len - th->doff*4);
--
2.1.0.rc2.206.gedb03e5
^ permalink raw reply related [flat|nested] 11+ messages in thread
* Re: [PATCH v3 net-next 1/3] ipv4: rename ip_options_echo to __ip_options_echo()
2014-09-25 20:14 ` [PATCH v3 net-next 1/3] ipv4: rename ip_options_echo to __ip_options_echo() Eric Dumazet
@ 2014-09-25 20:30 ` Joe Perches
2014-09-25 20:38 ` Eric Dumazet
0 siblings, 1 reply; 11+ messages in thread
From: Joe Perches @ 2014-09-25 20:30 UTC (permalink / raw)
To: Eric Dumazet
Cc: David S. Miller, netdev, Yuchung Cheng, Neal Cardwell,
Christoph Paasch
On Thu, 2014-09-25 at 13:14 -0700, Eric Dumazet wrote:
> ip_options_echo() assumes struct ip_options is provided in &IPCB(skb)->opt
> Lets break this assumption, but provide a helper to not change all call points.
[]
> diff --git a/include/net/ip.h b/include/net/ip.h
[]
> @@ -511,7 +513,14 @@ int ip_forward(struct sk_buff *skb);
>
> void ip_options_build(struct sk_buff *skb, struct ip_options *opt,
> __be32 daddr, struct rtable *rt, int is_frag);
> -int ip_options_echo(struct ip_options *dopt, struct sk_buff *skb);
> +
> +int __ip_options_echo(struct ip_options *dopt, struct sk_buff *skb,
> + const struct ip_options *sopt);
Thanks Eric.
Unrelated:
I wonder how much effort, if any, should be made to convert
struct sk_buff * to const struct sk_buff * where appropriate.
For instance:
This __ip_options_echo could use const struct sk_buff *skb
if fib_compute_spec_dst was changed to const struct sk_buff *skb.
^ permalink raw reply [flat|nested] 11+ messages in thread
* Re: [PATCH v3 net-next 1/3] ipv4: rename ip_options_echo to __ip_options_echo()
2014-09-25 20:30 ` Joe Perches
@ 2014-09-25 20:38 ` Eric Dumazet
2014-09-25 20:49 ` Joe Perches
0 siblings, 1 reply; 11+ messages in thread
From: Eric Dumazet @ 2014-09-25 20:38 UTC (permalink / raw)
To: Joe Perches
Cc: Eric Dumazet, David S. Miller, netdev, Yuchung Cheng,
Neal Cardwell, Christoph Paasch
On Thu, 2014-09-25 at 13:30 -0700, Joe Perches wrote:
> Unrelated:
>
> I wonder how much effort, if any, should be made to convert
> struct sk_buff * to const struct sk_buff * where appropriate.
>
> For instance:
>
> This __ip_options_echo could use const struct sk_buff *skb
> if fib_compute_spec_dst was changed to const struct sk_buff *skb.
Well, this seems something certainly doable, as a follow up ;)
^ permalink raw reply [flat|nested] 11+ messages in thread
* Re: [PATCH v3 net-next 1/3] ipv4: rename ip_options_echo to __ip_options_echo()
2014-09-25 20:38 ` Eric Dumazet
@ 2014-09-25 20:49 ` Joe Perches
2014-09-26 5:29 ` Julia Lawall
2014-09-26 6:21 ` Julia Lawall
0 siblings, 2 replies; 11+ messages in thread
From: Joe Perches @ 2014-09-25 20:49 UTC (permalink / raw)
To: Eric Dumazet
Cc: Eric Dumazet, David S. Miller, netdev, Yuchung Cheng,
Neal Cardwell, Christoph Paasch, Julia Lawall
On Thu, 2014-09-25 at 13:38 -0700, Eric Dumazet wrote:
> On Thu, 2014-09-25 at 13:30 -0700, Joe Perches wrote:
>
> > Unrelated:
> >
> > I wonder how much effort, if any, should be made to convert
> > struct sk_buff * to const struct sk_buff * where appropriate.
> >
> > For instance:
> >
> > This __ip_options_echo could use const struct sk_buff *skb
> > if fib_compute_spec_dst was changed to const struct sk_buff *skb.
>
> Well, this seems something certainly doable, as a follow up ;)
It's doable, but it seems a non-trivial inspection task.
I believe coccinelle does not have the ability to automate this.
^ permalink raw reply [flat|nested] 11+ messages in thread
* Re: [PATCH v3 net-next 1/3] ipv4: rename ip_options_echo to __ip_options_echo()
2014-09-25 20:49 ` Joe Perches
@ 2014-09-26 5:29 ` Julia Lawall
2014-09-26 8:08 ` Joe Perches
2014-09-26 6:21 ` Julia Lawall
1 sibling, 1 reply; 11+ messages in thread
From: Julia Lawall @ 2014-09-26 5:29 UTC (permalink / raw)
To: Joe Perches
Cc: Eric Dumazet, Eric Dumazet, David S. Miller, netdev,
Yuchung Cheng, Neal Cardwell, Christoph Paasch, Julia Lawall
On Thu, 25 Sep 2014, Joe Perches wrote:
> On Thu, 2014-09-25 at 13:38 -0700, Eric Dumazet wrote:
> > On Thu, 2014-09-25 at 13:30 -0700, Joe Perches wrote:
> >
> > > Unrelated:
> > >
> > > I wonder how much effort, if any, should be made to convert
> > > struct sk_buff * to const struct sk_buff * where appropriate.
> > >
> > > For instance:
> > >
> > > This __ip_options_echo could use const struct sk_buff *skb
> > > if fib_compute_spec_dst was changed to const struct sk_buff *skb.
> >
> > Well, this seems something certainly doable, as a follow up ;)
>
> It's doable, but it seems a non-trivial inspection task.
>
> I believe coccinelle does not have the ability to automate this.
What are the exact conditions when the change is possible?
I guess something like the value is only used for accessing its fields.
and is not passed to any other function? Here skb is passed to a function
called ip_hdr, but at least if one wanted to focus on sk_buffs then one
could treat that function as a special case.
julia
^ permalink raw reply [flat|nested] 11+ messages in thread
* Re: [PATCH v3 net-next 1/3] ipv4: rename ip_options_echo to __ip_options_echo()
2014-09-25 20:49 ` Joe Perches
2014-09-26 5:29 ` Julia Lawall
@ 2014-09-26 6:21 ` Julia Lawall
1 sibling, 0 replies; 11+ messages in thread
From: Julia Lawall @ 2014-09-26 6:21 UTC (permalink / raw)
To: Joe Perches
Cc: Eric Dumazet, Eric Dumazet, David S. Miller, netdev,
Yuchung Cheng, Neal Cardwell, Christoph Paasch, Julia Lawall
On Thu, 25 Sep 2014, Joe Perches wrote:
> On Thu, 2014-09-25 at 13:38 -0700, Eric Dumazet wrote:
> > On Thu, 2014-09-25 at 13:30 -0700, Joe Perches wrote:
> >
> > > Unrelated:
> > >
> > > I wonder how much effort, if any, should be made to convert
> > > struct sk_buff * to const struct sk_buff * where appropriate.
> > >
> > > For instance:
> > >
> > > This __ip_options_echo could use const struct sk_buff *skb
> > > if fib_compute_spec_dst was changed to const struct sk_buff *skb.
> >
> > Well, this seems something certainly doable, as a follow up ;)
>
> It's doable, but it seems a non-trivial inspection task.
>
> I believe coccinelle does not have the ability to automate this.
For example, can this function declare skb as const? Or would one have to
know that vde_user_write does not modify the contents of skb->data as
well?
julia
static int vde_write(int fd, struct sk_buff *skb, struct uml_net_private
*lp)
{
struct vde_data *pri = (struct vde_data *) &lp->user;
if (pri->conn != NULL)
return vde_user_write((void *)pri->conn, skb->data,
skb->len);
printk(KERN_ERR "vde_write - we have no VDECONN to write to");
return -EBADF;
}
>
>
>
^ permalink raw reply [flat|nested] 11+ messages in thread
* Re: [PATCH v3 net-next 1/3] ipv4: rename ip_options_echo to __ip_options_echo()
2014-09-26 5:29 ` Julia Lawall
@ 2014-09-26 8:08 ` Joe Perches
0 siblings, 0 replies; 11+ messages in thread
From: Joe Perches @ 2014-09-26 8:08 UTC (permalink / raw)
To: Julia Lawall
Cc: Eric Dumazet, Eric Dumazet, David S. Miller, netdev,
Yuchung Cheng, Neal Cardwell, Christoph Paasch
On Fri, 2014-09-26 at 07:29 +0200, Julia Lawall wrote:
> On Thu, 25 Sep 2014, Joe Perches wrote:
> > On Thu, 2014-09-25 at 13:38 -0700, Eric Dumazet wrote:
> > > On Thu, 2014-09-25 at 13:30 -0700, Joe Perches wrote:
[]
> > > > I wonder how much effort, if any, should be made to convert
> > > > struct sk_buff * to const struct sk_buff * where appropriate.
> > > >
> > > > For instance:n
> > > >
> > > > This __ip_options_echo could use const struct sk_buff *skb
> > > > if fib_compute_spec_dst was changed to const struct sk_buff *skb.
> > >
> > > Well, this seems something certainly doable, as a follow up ;)
> >
> > It's doable, but it seems a non-trivial inspection task.
> >
> > I believe coccinelle does not have the ability to automate this.
>
> What are the exact conditions when the change is possible?
>
> I guess something like the value is only used for accessing its fields.
> and is not passed to any other function?
I expect the entire call tree needs to be known
and inspected for modification of fields.
^ permalink raw reply [flat|nested] 11+ messages in thread
* Re: [PATCH v3 net-next 3/3] tcp: better TCP_SKB_CB layout to reduce cache line misses
2014-09-25 20:14 ` [PATCH v3 net-next 3/3] tcp: better TCP_SKB_CB layout to reduce cache line misses Eric Dumazet
@ 2014-09-27 16:46 ` Eric Dumazet
0 siblings, 0 replies; 11+ messages in thread
From: Eric Dumazet @ 2014-09-27 16:46 UTC (permalink / raw)
To: Eric Dumazet
Cc: David S. Miller, netdev, Yuchung Cheng, Neal Cardwell,
Christoph Paasch
On Thu, 2014-09-25 at 13:14 -0700, Eric Dumazet wrote:
> TCP maintains lists of skb in write queue, and in receive queues
> (in order and out of order queues)
>
> Scanning these lists both in input and output path usually requires
> access to skb->next, TCP_SKB_CB(skb)->seq, and TCP_SKB_CB(skb)->end_seq
>
> These fields are currently in two different cache lines, meaning we
> waste lot of memory bandwidth when these queues are big and flows
> have either packet drops or packet reorders.
>
> We can move TCP_SKB_CB(skb)->header at the end of TCP_SKB_CB, because
> this header is not used in fast path. This allows TCP to search much faster
> in the skb lists.
>
> Even with regular flows, we save one cache line miss in fast path.
>
> Thanks to Christoph Paasch for noticing we need to cleanup
> skb->cb[] (IPCB/IP6CB) before entering IP stack in tx path,
> and that I forgot IPCB use in tcp_v4_hnd_req() and tcp_v4_save_options().
>
> Signed-off-by: Eric Dumazet <edumazet@google.com>
> ---
Silly me, I'll send a v4, sorry for the noise
net/ipv4/tcp_ipv4.c: In function 'tcp_v4_save_options':
net/ipv4/tcp_ipv4.c:889:33: warning: initialization from incompatible pointer type [enabled by default]
net/ipv4/tcp_ipv4.c: In function 'tcp_v4_hnd_req':
net/ipv4/tcp_ipv4.c:1432:43: warning: passing argument 3 of 'cookie_v4_check' from incompatible pointer type [enabled by default]
In file included from net/ipv4/tcp_ipv4.c:69:0:
include/net/tcp.h:474:14: note: expected 'struct ip_options *' but argument is of type 'struct inet_skb_parm *'
^ permalink raw reply [flat|nested] 11+ messages in thread
end of thread, other threads:[~2014-09-27 16:46 UTC | newest]
Thread overview: 11+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2014-09-25 20:14 [PATCH v3 net-next 0/3] tcp: better TCP_SKB_CB layout Eric Dumazet
2014-09-25 20:14 ` [PATCH v3 net-next 1/3] ipv4: rename ip_options_echo to __ip_options_echo() Eric Dumazet
2014-09-25 20:30 ` Joe Perches
2014-09-25 20:38 ` Eric Dumazet
2014-09-25 20:49 ` Joe Perches
2014-09-26 5:29 ` Julia Lawall
2014-09-26 8:08 ` Joe Perches
2014-09-26 6:21 ` Julia Lawall
2014-09-25 20:14 ` [PATCH v3 net-next 2/3] ipv6: add a struct inet6_skb_parm param to ipv6_opt_accepted() Eric Dumazet
2014-09-25 20:14 ` [PATCH v3 net-next 3/3] tcp: better TCP_SKB_CB layout to reduce cache line misses Eric Dumazet
2014-09-27 16:46 ` Eric Dumazet
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).