* [PATCH net-next 01/18] net: introduce struct net_hotdata
2024-03-05 16:03 [PATCH net-next 00/18] net: group together hot data Eric Dumazet
@ 2024-03-05 16:03 ` Eric Dumazet
2024-03-05 16:03 ` [PATCH net-next 02/18] net: move netdev_budget and netdev_budget to net_hotdata Eric Dumazet
` (18 subsequent siblings)
19 siblings, 0 replies; 24+ messages in thread
From: Eric Dumazet @ 2024-03-05 16:03 UTC (permalink / raw)
To: David S . Miller, Jakub Kicinski, Paolo Abeni
Cc: netdev, David Ahern, Willem de Bruijn, Soheil Hassas Yeganeh,
Neal Cardwell, eric.dumazet, Eric Dumazet
Instead of spreading networking critical fields
all over the places, add a custom net_hotdata
structure so that we can precisely control its layout.
In this first patch, move :
- gro_normal_batch used in rx (GRO stack)
- offload_base used in rx and tx (GRO and TSO stacks)
Signed-off-by: Eric Dumazet <edumazet@google.com>
---
include/linux/netdevice.h | 1 -
include/net/gro.h | 5 ++---
include/net/hotdata.h | 15 +++++++++++++++
net/core/Makefile | 1 +
net/core/gro.c | 15 ++++++---------
net/core/gso.c | 4 ++--
net/core/hotdata.c | 9 +++++++++
net/core/sysctl_net_core.c | 3 ++-
8 files changed, 37 insertions(+), 16 deletions(-)
create mode 100644 include/net/hotdata.h
create mode 100644 net/core/hotdata.c
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index c41019f3417948d09ae9a50b57b856be1dc8ae42..15ce809e0541078bff7a48b8d7cb2cf2c1ac8a93 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -4790,7 +4790,6 @@ void dev_get_tstats64(struct net_device *dev, struct rtnl_link_stats64 *s);
extern int netdev_max_backlog;
extern int dev_rx_weight;
extern int dev_tx_weight;
-extern int gro_normal_batch;
enum {
NESTED_SYNC_IMM_BIT,
diff --git a/include/net/gro.h b/include/net/gro.h
index 2b58671a65492bf3f9dabf1e7a2d985cee007e11..d6fc8fbd37302338fc09ab01fead899002c5833f 100644
--- a/include/net/gro.h
+++ b/include/net/gro.h
@@ -9,6 +9,7 @@
#include <net/ip6_checksum.h>
#include <linux/skbuff.h>
#include <net/udp.h>
+#include <net/hotdata.h>
struct napi_gro_cb {
union {
@@ -446,7 +447,7 @@ static inline void gro_normal_one(struct napi_struct *napi, struct sk_buff *skb,
{
list_add_tail(&skb->list, &napi->rx_list);
napi->rx_count += segs;
- if (napi->rx_count >= READ_ONCE(gro_normal_batch))
+ if (napi->rx_count >= READ_ONCE(net_hotdata.gro_normal_batch))
gro_normal_list(napi);
}
@@ -493,6 +494,4 @@ static inline void inet6_get_iif_sdif(const struct sk_buff *skb, int *iif, int *
#endif
}
-extern struct list_head offload_base;
-
#endif /* _NET_IPV6_GRO_H */
diff --git a/include/net/hotdata.h b/include/net/hotdata.h
new file mode 100644
index 0000000000000000000000000000000000000000..6ed32e4e34aa3bdc6e860f5a8a6cab69c36c7fad
--- /dev/null
+++ b/include/net/hotdata.h
@@ -0,0 +1,15 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+#ifndef _NET_HOTDATA_H
+#define _NET_HOTDATA_H
+
+#include <linux/types.h>
+
+/* Read mostly data used in network fast paths. */
+struct net_hotdata {
+ struct list_head offload_base;
+ int gro_normal_batch;
+};
+
+extern struct net_hotdata net_hotdata;
+
+#endif /* _NET_HOTDATA_H */
diff --git a/net/core/Makefile b/net/core/Makefile
index 821aec06abf1460d3504de4b6b66a328bba748d8..6e6548011fae570e345717e43eb3c1a6133571c7 100644
--- a/net/core/Makefile
+++ b/net/core/Makefile
@@ -18,6 +18,7 @@ obj-y += dev.o dev_addr_lists.o dst.o netevent.o \
obj-$(CONFIG_NETDEV_ADDR_LIST_TEST) += dev_addr_lists_test.o
obj-y += net-sysfs.o
+obj-y += hotdata.o
obj-$(CONFIG_PAGE_POOL) += page_pool.o page_pool_user.o
obj-$(CONFIG_PROC_FS) += net-procfs.o
obj-$(CONFIG_NET_PKTGEN) += pktgen.o
diff --git a/net/core/gro.c b/net/core/gro.c
index 6a0edbd826a17573b51c5f71e20ff0c09364fc21..ee30d4f0c03876e78795397d1c495881a2c9e80f 100644
--- a/net/core/gro.c
+++ b/net/core/gro.c
@@ -10,9 +10,6 @@
#define GRO_MAX_HEAD (MAX_HEADER + 128)
static DEFINE_SPINLOCK(offload_lock);
-struct list_head offload_base __read_mostly = LIST_HEAD_INIT(offload_base);
-/* Maximum number of GRO_NORMAL skbs to batch up for list-RX */
-int gro_normal_batch __read_mostly = 8;
/**
* dev_add_offload - register offload handlers
@@ -31,7 +28,7 @@ void dev_add_offload(struct packet_offload *po)
struct packet_offload *elem;
spin_lock(&offload_lock);
- list_for_each_entry(elem, &offload_base, list) {
+ list_for_each_entry(elem, &net_hotdata.offload_base, list) {
if (po->priority < elem->priority)
break;
}
@@ -55,7 +52,7 @@ EXPORT_SYMBOL(dev_add_offload);
*/
static void __dev_remove_offload(struct packet_offload *po)
{
- struct list_head *head = &offload_base;
+ struct list_head *head = &net_hotdata.offload_base;
struct packet_offload *po1;
spin_lock(&offload_lock);
@@ -235,9 +232,9 @@ int skb_gro_receive(struct sk_buff *p, struct sk_buff *skb)
static void napi_gro_complete(struct napi_struct *napi, struct sk_buff *skb)
{
+ struct list_head *head = &net_hotdata.offload_base;
struct packet_offload *ptype;
__be16 type = skb->protocol;
- struct list_head *head = &offload_base;
int err = -ENOENT;
BUILD_BUG_ON(sizeof(struct napi_gro_cb) > sizeof(skb->cb));
@@ -444,7 +441,7 @@ static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff
{
u32 bucket = skb_get_hash_raw(skb) & (GRO_HASH_BUCKETS - 1);
struct gro_list *gro_list = &napi->gro_hash[bucket];
- struct list_head *head = &offload_base;
+ struct list_head *head = &net_hotdata.offload_base;
struct packet_offload *ptype;
__be16 type = skb->protocol;
struct sk_buff *pp = NULL;
@@ -550,7 +547,7 @@ static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff
struct packet_offload *gro_find_receive_by_type(__be16 type)
{
- struct list_head *offload_head = &offload_base;
+ struct list_head *offload_head = &net_hotdata.offload_base;
struct packet_offload *ptype;
list_for_each_entry_rcu(ptype, offload_head, list) {
@@ -564,7 +561,7 @@ EXPORT_SYMBOL(gro_find_receive_by_type);
struct packet_offload *gro_find_complete_by_type(__be16 type)
{
- struct list_head *offload_head = &offload_base;
+ struct list_head *offload_head = &net_hotdata.offload_base;
struct packet_offload *ptype;
list_for_each_entry_rcu(ptype, offload_head, list) {
diff --git a/net/core/gso.c b/net/core/gso.c
index 9e1803bfc9c6cac2fe7054661f8995909a6c28d9..bcd156372f4df080f83cc45fc96df1789125a8ae 100644
--- a/net/core/gso.c
+++ b/net/core/gso.c
@@ -17,7 +17,7 @@ struct sk_buff *skb_eth_gso_segment(struct sk_buff *skb,
struct packet_offload *ptype;
rcu_read_lock();
- list_for_each_entry_rcu(ptype, &offload_base, list) {
+ list_for_each_entry_rcu(ptype, &net_hotdata.offload_base, list) {
if (ptype->type == type && ptype->callbacks.gso_segment) {
segs = ptype->callbacks.gso_segment(skb, features);
break;
@@ -48,7 +48,7 @@ struct sk_buff *skb_mac_gso_segment(struct sk_buff *skb,
__skb_pull(skb, vlan_depth);
rcu_read_lock();
- list_for_each_entry_rcu(ptype, &offload_base, list) {
+ list_for_each_entry_rcu(ptype, &net_hotdata.offload_base, list) {
if (ptype->type == type && ptype->callbacks.gso_segment) {
segs = ptype->callbacks.gso_segment(skb, features);
break;
diff --git a/net/core/hotdata.c b/net/core/hotdata.c
new file mode 100644
index 0000000000000000000000000000000000000000..abb8ad19d59acc0d7d6e1b06f4506afa42bde44b
--- /dev/null
+++ b/net/core/hotdata.c
@@ -0,0 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+#include <net/hotdata.h>
+#include <linux/cache.h>
+#include <linux/list.h>
+
+struct net_hotdata net_hotdata __cacheline_aligned = {
+ .offload_base = LIST_HEAD_INIT(net_hotdata.offload_base),
+ .gro_normal_batch = 8,
+};
diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c
index 986f15e5d6c41250c8b9099fc1d2883112e77ffb..0eb1242eabbe0d3ea58886b1db409c9d991ac672 100644
--- a/net/core/sysctl_net_core.c
+++ b/net/core/sysctl_net_core.c
@@ -23,6 +23,7 @@
#include <net/net_ratelimit.h>
#include <net/busy_poll.h>
#include <net/pkt_sched.h>
+#include <net/hotdata.h>
#include "dev.h"
@@ -632,7 +633,7 @@ static struct ctl_table net_core_table[] = {
},
{
.procname = "gro_normal_batch",
- .data = &gro_normal_batch,
+ .data = &net_hotdata.gro_normal_batch,
.maxlen = sizeof(unsigned int),
.mode = 0644,
.proc_handler = proc_dointvec_minmax,
--
2.44.0.278.ge034bb2e1d-goog
^ permalink raw reply related [flat|nested] 24+ messages in thread* [PATCH net-next 02/18] net: move netdev_budget and netdev_budget to net_hotdata
2024-03-05 16:03 [PATCH net-next 00/18] net: group together hot data Eric Dumazet
2024-03-05 16:03 ` [PATCH net-next 01/18] net: introduce struct net_hotdata Eric Dumazet
@ 2024-03-05 16:03 ` Eric Dumazet
2024-03-08 2:26 ` Andrew Lunn
2024-03-05 16:03 ` [PATCH net-next 03/18] net: move netdev_tstamp_prequeue into net_hotdata Eric Dumazet
` (17 subsequent siblings)
19 siblings, 1 reply; 24+ messages in thread
From: Eric Dumazet @ 2024-03-05 16:03 UTC (permalink / raw)
To: David S . Miller, Jakub Kicinski, Paolo Abeni
Cc: netdev, David Ahern, Willem de Bruijn, Soheil Hassas Yeganeh,
Neal Cardwell, eric.dumazet, Eric Dumazet
netdev_budget and netdev_budget are used in rx path (net_rx_action())
Move them into net_hotdata for better cache locality.
Signed-off-by: Eric Dumazet <edumazet@google.com>
---
include/net/hotdata.h | 2 ++
net/core/dev.c | 7 ++-----
net/core/dev.h | 2 --
net/core/hotdata.c | 6 ++++++
net/core/sysctl_net_core.c | 4 ++--
5 files changed, 12 insertions(+), 9 deletions(-)
diff --git a/include/net/hotdata.h b/include/net/hotdata.h
index 6ed32e4e34aa3bdc6e860f5a8a6cab69c36c7fad..72170223385ebe65cce42f762b3686c072291d36 100644
--- a/include/net/hotdata.h
+++ b/include/net/hotdata.h
@@ -8,6 +8,8 @@
struct net_hotdata {
struct list_head offload_base;
int gro_normal_batch;
+ int netdev_budget;
+ int netdev_budget_usecs;
};
extern struct net_hotdata net_hotdata;
diff --git a/net/core/dev.c b/net/core/dev.c
index fe054cbd41e92cbca87f1c0640c6ebe4fb6b2d86..0102a1810e7b148f465b87886b743e3d12c0e578 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -4410,9 +4410,6 @@ EXPORT_SYMBOL(netdev_max_backlog);
int netdev_tstamp_prequeue __read_mostly = 1;
unsigned int sysctl_skb_defer_max __read_mostly = 64;
-int netdev_budget __read_mostly = 300;
-/* Must be at least 2 jiffes to guarantee 1 jiffy timeout */
-unsigned int __read_mostly netdev_budget_usecs = 2 * USEC_PER_SEC / HZ;
int weight_p __read_mostly = 64; /* old backlog weight */
int dev_weight_rx_bias __read_mostly = 1; /* bias for backlog weight */
int dev_weight_tx_bias __read_mostly = 1; /* bias for output_queue quota */
@@ -6790,8 +6787,8 @@ static __latent_entropy void net_rx_action(struct softirq_action *h)
{
struct softnet_data *sd = this_cpu_ptr(&softnet_data);
unsigned long time_limit = jiffies +
- usecs_to_jiffies(READ_ONCE(netdev_budget_usecs));
- int budget = READ_ONCE(netdev_budget);
+ usecs_to_jiffies(READ_ONCE(net_hotdata.netdev_budget_usecs));
+ int budget = READ_ONCE(net_hotdata.netdev_budget);
LIST_HEAD(list);
LIST_HEAD(repoll);
diff --git a/net/core/dev.h b/net/core/dev.h
index 45892267848d7a35a09aea95f04cfd9b72204d3c..9a6170530850c78508f9234ec82b174f4bf5a4a3 100644
--- a/net/core/dev.h
+++ b/net/core/dev.h
@@ -38,8 +38,6 @@ int dev_addr_init(struct net_device *dev);
void dev_addr_check(struct net_device *dev);
/* sysctls not referred to from outside net/core/ */
-extern int netdev_budget;
-extern unsigned int netdev_budget_usecs;
extern unsigned int sysctl_skb_defer_max;
extern int netdev_tstamp_prequeue;
extern int netdev_unregister_timeout_secs;
diff --git a/net/core/hotdata.c b/net/core/hotdata.c
index abb8ad19d59acc0d7d6e1b06f4506afa42bde44b..907d69120397dfb8d5a901912b72580fe256c762 100644
--- a/net/core/hotdata.c
+++ b/net/core/hotdata.c
@@ -1,9 +1,15 @@
// SPDX-License-Identifier: GPL-2.0-or-later
#include <net/hotdata.h>
#include <linux/cache.h>
+#include <linux/jiffies.h>
#include <linux/list.h>
+
struct net_hotdata net_hotdata __cacheline_aligned = {
.offload_base = LIST_HEAD_INIT(net_hotdata.offload_base),
.gro_normal_batch = 8,
+
+ .netdev_budget = 300,
+ /* Must be at least 2 jiffes to guarantee 1 jiffy timeout */
+ .netdev_budget_usecs = 2 * USEC_PER_SEC / HZ,
};
diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c
index 0eb1242eabbe0d3ea58886b1db409c9d991ac672..a9c2d798b219506da75a5d0a30d490ff4011d668 100644
--- a/net/core/sysctl_net_core.c
+++ b/net/core/sysctl_net_core.c
@@ -577,7 +577,7 @@ static struct ctl_table net_core_table[] = {
#endif
{
.procname = "netdev_budget",
- .data = &netdev_budget,
+ .data = &net_hotdata.netdev_budget,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec
@@ -600,7 +600,7 @@ static struct ctl_table net_core_table[] = {
},
{
.procname = "netdev_budget_usecs",
- .data = &netdev_budget_usecs,
+ .data = &net_hotdata.netdev_budget_usecs,
.maxlen = sizeof(unsigned int),
.mode = 0644,
.proc_handler = proc_dointvec_minmax,
--
2.44.0.278.ge034bb2e1d-goog
^ permalink raw reply related [flat|nested] 24+ messages in thread* Re: [PATCH net-next 02/18] net: move netdev_budget and netdev_budget to net_hotdata
2024-03-05 16:03 ` [PATCH net-next 02/18] net: move netdev_budget and netdev_budget to net_hotdata Eric Dumazet
@ 2024-03-08 2:26 ` Andrew Lunn
0 siblings, 0 replies; 24+ messages in thread
From: Andrew Lunn @ 2024-03-08 2:26 UTC (permalink / raw)
To: Eric Dumazet
Cc: David S . Miller, Jakub Kicinski, Paolo Abeni, netdev,
David Ahern, Willem de Bruijn, Soheil Hassas Yeganeh,
Neal Cardwell, eric.dumazet
On Tue, Mar 05, 2024 at 04:03:57PM +0000, Eric Dumazet wrote:
> netdev_budget and netdev_budget are used in rx path (net_rx_action())
I think one of these is missing _usecs ?
Andrew
^ permalink raw reply [flat|nested] 24+ messages in thread
* [PATCH net-next 03/18] net: move netdev_tstamp_prequeue into net_hotdata
2024-03-05 16:03 [PATCH net-next 00/18] net: group together hot data Eric Dumazet
2024-03-05 16:03 ` [PATCH net-next 01/18] net: introduce struct net_hotdata Eric Dumazet
2024-03-05 16:03 ` [PATCH net-next 02/18] net: move netdev_budget and netdev_budget to net_hotdata Eric Dumazet
@ 2024-03-05 16:03 ` Eric Dumazet
2024-03-05 16:03 ` [PATCH net-next 04/18] net: move ptype_all " Eric Dumazet
` (16 subsequent siblings)
19 siblings, 0 replies; 24+ messages in thread
From: Eric Dumazet @ 2024-03-05 16:03 UTC (permalink / raw)
To: David S . Miller, Jakub Kicinski, Paolo Abeni
Cc: netdev, David Ahern, Willem de Bruijn, Soheil Hassas Yeganeh,
Neal Cardwell, eric.dumazet, Eric Dumazet
netdev_tstamp_prequeue is used in rx path.
Move it to net_hotdata for better cache locality.
Signed-off-by: Eric Dumazet <edumazet@google.com>
---
include/net/hotdata.h | 1 +
net/core/dev.c | 10 +++++-----
net/core/dev.h | 1 -
net/core/hotdata.c | 2 ++
net/core/sysctl_net_core.c | 2 +-
5 files changed, 9 insertions(+), 7 deletions(-)
diff --git a/include/net/hotdata.h b/include/net/hotdata.h
index 72170223385ebe65cce42f762b3686c072291d36..149e56528537d8ed3365e46d6dc96e39c73a733a 100644
--- a/include/net/hotdata.h
+++ b/include/net/hotdata.h
@@ -10,6 +10,7 @@ struct net_hotdata {
int gro_normal_batch;
int netdev_budget;
int netdev_budget_usecs;
+ int tstamp_prequeue;
};
extern struct net_hotdata net_hotdata;
diff --git a/net/core/dev.c b/net/core/dev.c
index 0102a1810e7b148f465b87886b743e3d12c0e578..53ebdb55e8b7c0a6522eb3fdbb7bdd00948eb9a5 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -4408,7 +4408,6 @@ EXPORT_SYMBOL(__dev_direct_xmit);
int netdev_max_backlog __read_mostly = 1000;
EXPORT_SYMBOL(netdev_max_backlog);
-int netdev_tstamp_prequeue __read_mostly = 1;
unsigned int sysctl_skb_defer_max __read_mostly = 64;
int weight_p __read_mostly = 64; /* old backlog weight */
int dev_weight_rx_bias __read_mostly = 1; /* bias for backlog weight */
@@ -5052,7 +5051,7 @@ static int netif_rx_internal(struct sk_buff *skb)
{
int ret;
- net_timestamp_check(READ_ONCE(netdev_tstamp_prequeue), skb);
+ net_timestamp_check(READ_ONCE(net_hotdata.tstamp_prequeue), skb);
trace_netif_rx(skb);
@@ -5344,7 +5343,7 @@ static int __netif_receive_skb_core(struct sk_buff **pskb, bool pfmemalloc,
int ret = NET_RX_DROP;
__be16 type;
- net_timestamp_check(!READ_ONCE(netdev_tstamp_prequeue), skb);
+ net_timestamp_check(!READ_ONCE(net_hotdata.tstamp_prequeue), skb);
trace_netif_receive_skb(skb);
@@ -5728,7 +5727,7 @@ static int netif_receive_skb_internal(struct sk_buff *skb)
{
int ret;
- net_timestamp_check(READ_ONCE(netdev_tstamp_prequeue), skb);
+ net_timestamp_check(READ_ONCE(net_hotdata.tstamp_prequeue), skb);
if (skb_defer_rx_timestamp(skb))
return NET_RX_SUCCESS;
@@ -5758,7 +5757,8 @@ void netif_receive_skb_list_internal(struct list_head *head)
INIT_LIST_HEAD(&sublist);
list_for_each_entry_safe(skb, next, head, list) {
- net_timestamp_check(READ_ONCE(netdev_tstamp_prequeue), skb);
+ net_timestamp_check(READ_ONCE(net_hotdata.tstamp_prequeue),
+ skb);
skb_list_del_init(skb);
if (!skb_defer_rx_timestamp(skb))
list_add_tail(&skb->list, &sublist);
diff --git a/net/core/dev.h b/net/core/dev.h
index 9a6170530850c78508f9234ec82b174f4bf5a4a3..2bcaf8eee50c179db2ca59880521b9be6ecd45c8 100644
--- a/net/core/dev.h
+++ b/net/core/dev.h
@@ -39,7 +39,6 @@ void dev_addr_check(struct net_device *dev);
/* sysctls not referred to from outside net/core/ */
extern unsigned int sysctl_skb_defer_max;
-extern int netdev_tstamp_prequeue;
extern int netdev_unregister_timeout_secs;
extern int weight_p;
extern int dev_weight_rx_bias;
diff --git a/net/core/hotdata.c b/net/core/hotdata.c
index 907d69120397dfb8d5a901912b72580fe256c762..087c4c84987df07f11a87112a778a5cac608a654 100644
--- a/net/core/hotdata.c
+++ b/net/core/hotdata.c
@@ -12,4 +12,6 @@ struct net_hotdata net_hotdata __cacheline_aligned = {
.netdev_budget = 300,
/* Must be at least 2 jiffes to guarantee 1 jiffy timeout */
.netdev_budget_usecs = 2 * USEC_PER_SEC / HZ,
+
+ .tstamp_prequeue = 1,
};
diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c
index a9c2d798b219506da75a5d0a30d490ff4011d668..bddd07da099886f2747f2ac4ba39a482b0f4231d 100644
--- a/net/core/sysctl_net_core.c
+++ b/net/core/sysctl_net_core.c
@@ -499,7 +499,7 @@ static struct ctl_table net_core_table[] = {
#endif
{
.procname = "netdev_tstamp_prequeue",
- .data = &netdev_tstamp_prequeue,
+ .data = &net_hotdata.tstamp_prequeue,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec
--
2.44.0.278.ge034bb2e1d-goog
^ permalink raw reply related [flat|nested] 24+ messages in thread* [PATCH net-next 04/18] net: move ptype_all into net_hotdata
2024-03-05 16:03 [PATCH net-next 00/18] net: group together hot data Eric Dumazet
` (2 preceding siblings ...)
2024-03-05 16:03 ` [PATCH net-next 03/18] net: move netdev_tstamp_prequeue into net_hotdata Eric Dumazet
@ 2024-03-05 16:03 ` Eric Dumazet
2024-03-05 16:04 ` [PATCH net-next 05/18] net: move netdev_max_backlog to net_hotdata Eric Dumazet
` (15 subsequent siblings)
19 siblings, 0 replies; 24+ messages in thread
From: Eric Dumazet @ 2024-03-05 16:03 UTC (permalink / raw)
To: David S . Miller, Jakub Kicinski, Paolo Abeni
Cc: netdev, David Ahern, Willem de Bruijn, Soheil Hassas Yeganeh,
Neal Cardwell, eric.dumazet, Eric Dumazet
ptype_all is used in rx/tx fast paths.
Move it to net_hotdata for better cache locality.
Signed-off-by: Eric Dumazet <edumazet@google.com>
---
include/linux/netdevice.h | 1 -
include/net/hotdata.h | 1 +
net/core/dev.c | 16 +++++++---------
net/core/hotdata.c | 1 +
net/core/net-procfs.c | 7 ++++---
5 files changed, 13 insertions(+), 13 deletions(-)
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 15ce809e0541078bff7a48b8d7cb2cf2c1ac8a93..e2e7d5a7ef34de165cd293eb71800e1e6b450432 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -5300,7 +5300,6 @@ static inline const char *netdev_reg_state(const struct net_device *dev)
#define PTYPE_HASH_SIZE (16)
#define PTYPE_HASH_MASK (PTYPE_HASH_SIZE - 1)
-extern struct list_head ptype_all __read_mostly;
extern struct list_head ptype_base[PTYPE_HASH_SIZE] __read_mostly;
extern struct net_device *blackhole_netdev;
diff --git a/include/net/hotdata.h b/include/net/hotdata.h
index 149e56528537d8ed3365e46d6dc96e39c73a733a..d462cb8f16bad459b439c566274c01a0f18a95d0 100644
--- a/include/net/hotdata.h
+++ b/include/net/hotdata.h
@@ -7,6 +7,7 @@
/* Read mostly data used in network fast paths. */
struct net_hotdata {
struct list_head offload_base;
+ struct list_head ptype_all;
int gro_normal_batch;
int netdev_budget;
int netdev_budget_usecs;
diff --git a/net/core/dev.c b/net/core/dev.c
index 53ebdb55e8b7c0a6522eb3fdbb7bdd00948eb9a5..119b7004a8e51ea6785c60e558988d369eec8935 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -161,7 +161,6 @@
static DEFINE_SPINLOCK(ptype_lock);
struct list_head ptype_base[PTYPE_HASH_SIZE] __read_mostly;
-struct list_head ptype_all __read_mostly; /* Taps */
static int netif_rx_internal(struct sk_buff *skb);
static int call_netdevice_notifiers_extack(unsigned long val,
@@ -540,7 +539,7 @@ static inline void netdev_set_addr_lockdep_class(struct net_device *dev)
static inline struct list_head *ptype_head(const struct packet_type *pt)
{
if (pt->type == htons(ETH_P_ALL))
- return pt->dev ? &pt->dev->ptype_all : &ptype_all;
+ return pt->dev ? &pt->dev->ptype_all : &net_hotdata.ptype_all;
else
return pt->dev ? &pt->dev->ptype_specific :
&ptype_base[ntohs(pt->type) & PTYPE_HASH_MASK];
@@ -2226,7 +2225,8 @@ static inline bool skb_loop_sk(struct packet_type *ptype, struct sk_buff *skb)
*/
bool dev_nit_active(struct net_device *dev)
{
- return !list_empty(&ptype_all) || !list_empty(&dev->ptype_all);
+ return !list_empty(&net_hotdata.ptype_all) ||
+ !list_empty(&dev->ptype_all);
}
EXPORT_SYMBOL_GPL(dev_nit_active);
@@ -2237,10 +2237,9 @@ EXPORT_SYMBOL_GPL(dev_nit_active);
void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev)
{
- struct packet_type *ptype;
+ struct list_head *ptype_list = &net_hotdata.ptype_all;
+ struct packet_type *ptype, *pt_prev = NULL;
struct sk_buff *skb2 = NULL;
- struct packet_type *pt_prev = NULL;
- struct list_head *ptype_list = &ptype_all;
rcu_read_lock();
again:
@@ -2286,7 +2285,7 @@ void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev)
pt_prev = ptype;
}
- if (ptype_list == &ptype_all) {
+ if (ptype_list == &net_hotdata.ptype_all) {
ptype_list = &dev->ptype_all;
goto again;
}
@@ -5387,7 +5386,7 @@ static int __netif_receive_skb_core(struct sk_buff **pskb, bool pfmemalloc,
if (pfmemalloc)
goto skip_taps;
- list_for_each_entry_rcu(ptype, &ptype_all, list) {
+ list_for_each_entry_rcu(ptype, &net_hotdata.ptype_all, list) {
if (pt_prev)
ret = deliver_skb(skb, pt_prev, orig_dev);
pt_prev = ptype;
@@ -11771,7 +11770,6 @@ static int __init net_dev_init(void)
if (netdev_kobject_init())
goto out;
- INIT_LIST_HEAD(&ptype_all);
for (i = 0; i < PTYPE_HASH_SIZE; i++)
INIT_LIST_HEAD(&ptype_base[i]);
diff --git a/net/core/hotdata.c b/net/core/hotdata.c
index 087c4c84987df07f11a87112a778a5cac608a654..29fcfe89fd9a697120f826dbe2fd36a1617581a1 100644
--- a/net/core/hotdata.c
+++ b/net/core/hotdata.c
@@ -7,6 +7,7 @@
struct net_hotdata net_hotdata __cacheline_aligned = {
.offload_base = LIST_HEAD_INIT(net_hotdata.offload_base),
+ .ptype_all = LIST_HEAD_INIT(net_hotdata.ptype_all),
.gro_normal_batch = 8,
.netdev_budget = 300,
diff --git a/net/core/net-procfs.c b/net/core/net-procfs.c
index 2e4e96d30ee1a7a51e49587378aab47aed1290da..a97eceb84e61ec347ad132ff0f22c8ce82f12e90 100644
--- a/net/core/net-procfs.c
+++ b/net/core/net-procfs.c
@@ -3,6 +3,7 @@
#include <linux/proc_fs.h>
#include <linux/seq_file.h>
#include <net/wext.h>
+#include <net/hotdata.h>
#include "dev.h"
@@ -183,7 +184,7 @@ static void *ptype_get_idx(struct seq_file *seq, loff_t pos)
}
}
- list_for_each_entry_rcu(pt, &ptype_all, list) {
+ list_for_each_entry_rcu(pt, &net_hotdata.ptype_all, list) {
if (i == pos)
return pt;
++i;
@@ -231,13 +232,13 @@ static void *ptype_seq_next(struct seq_file *seq, void *v, loff_t *pos)
}
}
- nxt = ptype_all.next;
+ nxt = net_hotdata.ptype_all.next;
goto ptype_all;
}
if (pt->type == htons(ETH_P_ALL)) {
ptype_all:
- if (nxt != &ptype_all)
+ if (nxt != &net_hotdata.ptype_all)
goto found;
hash = 0;
nxt = ptype_base[0].next;
--
2.44.0.278.ge034bb2e1d-goog
^ permalink raw reply related [flat|nested] 24+ messages in thread* [PATCH net-next 05/18] net: move netdev_max_backlog to net_hotdata
2024-03-05 16:03 [PATCH net-next 00/18] net: group together hot data Eric Dumazet
` (3 preceding siblings ...)
2024-03-05 16:03 ` [PATCH net-next 04/18] net: move ptype_all " Eric Dumazet
@ 2024-03-05 16:04 ` Eric Dumazet
2024-03-05 16:04 ` [PATCH net-next 06/18] net: move ip_packet_offload and ipv6_packet_offload " Eric Dumazet
` (14 subsequent siblings)
19 siblings, 0 replies; 24+ messages in thread
From: Eric Dumazet @ 2024-03-05 16:04 UTC (permalink / raw)
To: David S . Miller, Jakub Kicinski, Paolo Abeni
Cc: netdev, David Ahern, Willem de Bruijn, Soheil Hassas Yeganeh,
Neal Cardwell, eric.dumazet, Eric Dumazet
netdev_max_backlog is used in rx fat path.
Move it to net_hodata for better cache locality.
Signed-off-by: Eric Dumazet <edumazet@google.com>
---
include/linux/netdevice.h | 1 -
include/net/hotdata.h | 1 +
net/core/dev.c | 8 +++-----
net/core/gro_cells.c | 3 ++-
net/core/hotdata.c | 2 ++
net/core/sysctl_net_core.c | 2 +-
net/xfrm/espintcp.c | 4 +++-
net/xfrm/xfrm_input.c | 3 ++-
8 files changed, 14 insertions(+), 10 deletions(-)
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index e2e7d5a7ef34de165cd293eb71800e1e6b450432..044d6f5b2ace3e2decd4296e01c8d3e200c6c7dc 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -4787,7 +4787,6 @@ void dev_fetch_sw_netstats(struct rtnl_link_stats64 *s,
const struct pcpu_sw_netstats __percpu *netstats);
void dev_get_tstats64(struct net_device *dev, struct rtnl_link_stats64 *s);
-extern int netdev_max_backlog;
extern int dev_rx_weight;
extern int dev_tx_weight;
diff --git a/include/net/hotdata.h b/include/net/hotdata.h
index d462cb8f16bad459b439c566274c01a0f18a95d0..dc50b200a94b6b935cd79d8e0406a61209fdc68e 100644
--- a/include/net/hotdata.h
+++ b/include/net/hotdata.h
@@ -12,6 +12,7 @@ struct net_hotdata {
int netdev_budget;
int netdev_budget_usecs;
int tstamp_prequeue;
+ int max_backlog;
};
extern struct net_hotdata net_hotdata;
diff --git a/net/core/dev.c b/net/core/dev.c
index 119b7004a8e51ea6785c60e558988d369eec8935..1b112c4db983c2d7cd280bc8c2ebc621ea3c6145 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -4404,9 +4404,6 @@ EXPORT_SYMBOL(__dev_direct_xmit);
* Receiver routines
*************************************************************************/
-int netdev_max_backlog __read_mostly = 1000;
-EXPORT_SYMBOL(netdev_max_backlog);
-
unsigned int sysctl_skb_defer_max __read_mostly = 64;
int weight_p __read_mostly = 64; /* old backlog weight */
int dev_weight_rx_bias __read_mostly = 1; /* bias for backlog weight */
@@ -4713,7 +4710,7 @@ static bool skb_flow_limit(struct sk_buff *skb, unsigned int qlen)
struct softnet_data *sd;
unsigned int old_flow, new_flow;
- if (qlen < (READ_ONCE(netdev_max_backlog) >> 1))
+ if (qlen < (READ_ONCE(net_hotdata.max_backlog) >> 1))
return false;
sd = this_cpu_ptr(&softnet_data);
@@ -4761,7 +4758,8 @@ static int enqueue_to_backlog(struct sk_buff *skb, int cpu,
if (!netif_running(skb->dev))
goto drop;
qlen = skb_queue_len(&sd->input_pkt_queue);
- if (qlen <= READ_ONCE(netdev_max_backlog) && !skb_flow_limit(skb, qlen)) {
+ if (qlen <= READ_ONCE(net_hotdata.max_backlog) &&
+ !skb_flow_limit(skb, qlen)) {
if (qlen) {
enqueue:
__skb_queue_tail(&sd->input_pkt_queue, skb);
diff --git a/net/core/gro_cells.c b/net/core/gro_cells.c
index ed5ec5de47f670753924bd0c72db1e3ceb9b9e7a..ff8e5b64bf6b76451a69e3eae132b593c60ee204 100644
--- a/net/core/gro_cells.c
+++ b/net/core/gro_cells.c
@@ -3,6 +3,7 @@
#include <linux/slab.h>
#include <linux/netdevice.h>
#include <net/gro_cells.h>
+#include <net/hotdata.h>
struct gro_cell {
struct sk_buff_head napi_skbs;
@@ -26,7 +27,7 @@ int gro_cells_receive(struct gro_cells *gcells, struct sk_buff *skb)
cell = this_cpu_ptr(gcells->cells);
- if (skb_queue_len(&cell->napi_skbs) > READ_ONCE(netdev_max_backlog)) {
+ if (skb_queue_len(&cell->napi_skbs) > READ_ONCE(net_hotdata.max_backlog)) {
drop:
dev_core_stats_rx_dropped_inc(dev);
kfree_skb(skb);
diff --git a/net/core/hotdata.c b/net/core/hotdata.c
index 29fcfe89fd9a697120f826dbe2fd36a1617581a1..35ed5a83ecc7ebda513fe4fafc596e053f0252c5 100644
--- a/net/core/hotdata.c
+++ b/net/core/hotdata.c
@@ -15,4 +15,6 @@ struct net_hotdata net_hotdata __cacheline_aligned = {
.netdev_budget_usecs = 2 * USEC_PER_SEC / HZ,
.tstamp_prequeue = 1,
+ .max_backlog = 1000,
};
+EXPORT_SYMBOL(net_hotdata);
diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c
index bddd07da099886f2747f2ac4ba39a482b0f4231d..8eaeeb289914258f90cf940e906d5c6be0cc0cd6 100644
--- a/net/core/sysctl_net_core.c
+++ b/net/core/sysctl_net_core.c
@@ -440,7 +440,7 @@ static struct ctl_table net_core_table[] = {
},
{
.procname = "netdev_max_backlog",
- .data = &netdev_max_backlog,
+ .data = &net_hotdata.max_backlog,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec
diff --git a/net/xfrm/espintcp.c b/net/xfrm/espintcp.c
index d3b3f9e720b3b6c2a4ea89df4257a564100b2c4b..fe82e2d073006e5ab1b03868c851147c0422d26d 100644
--- a/net/xfrm/espintcp.c
+++ b/net/xfrm/espintcp.c
@@ -10,6 +10,7 @@
#if IS_ENABLED(CONFIG_IPV6)
#include <net/ipv6_stubs.h>
#endif
+#include <net/hotdata.h>
static void handle_nonesp(struct espintcp_ctx *ctx, struct sk_buff *skb,
struct sock *sk)
@@ -169,7 +170,8 @@ int espintcp_queue_out(struct sock *sk, struct sk_buff *skb)
{
struct espintcp_ctx *ctx = espintcp_getctx(sk);
- if (skb_queue_len(&ctx->out_queue) >= READ_ONCE(netdev_max_backlog))
+ if (skb_queue_len(&ctx->out_queue) >=
+ READ_ONCE(net_hotdata.max_backlog))
return -ENOBUFS;
__skb_queue_tail(&ctx->out_queue, skb);
diff --git a/net/xfrm/xfrm_input.c b/net/xfrm/xfrm_input.c
index bd4ce21d76d7551d8f0a4a11f4b75705a7f634c5..161f535c8b9495b01f6d9689e14c40e5c0885968 100644
--- a/net/xfrm/xfrm_input.c
+++ b/net/xfrm/xfrm_input.c
@@ -21,6 +21,7 @@
#include <net/ip_tunnels.h>
#include <net/ip6_tunnel.h>
#include <net/dst_metadata.h>
+#include <net/hotdata.h>
#include "xfrm_inout.h"
@@ -764,7 +765,7 @@ int xfrm_trans_queue_net(struct net *net, struct sk_buff *skb,
trans = this_cpu_ptr(&xfrm_trans_tasklet);
- if (skb_queue_len(&trans->queue) >= READ_ONCE(netdev_max_backlog))
+ if (skb_queue_len(&trans->queue) >= READ_ONCE(net_hotdata.max_backlog))
return -ENOBUFS;
BUILD_BUG_ON(sizeof(struct xfrm_trans_cb) > sizeof(skb->cb));
--
2.44.0.278.ge034bb2e1d-goog
^ permalink raw reply related [flat|nested] 24+ messages in thread* [PATCH net-next 06/18] net: move ip_packet_offload and ipv6_packet_offload to net_hotdata
2024-03-05 16:03 [PATCH net-next 00/18] net: group together hot data Eric Dumazet
` (4 preceding siblings ...)
2024-03-05 16:04 ` [PATCH net-next 05/18] net: move netdev_max_backlog to net_hotdata Eric Dumazet
@ 2024-03-05 16:04 ` Eric Dumazet
2024-03-06 6:08 ` kernel test robot
2024-03-05 16:04 ` [PATCH net-next 07/18] net: move tcpv4_offload and tcpv6_offload " Eric Dumazet
` (13 subsequent siblings)
19 siblings, 1 reply; 24+ messages in thread
From: Eric Dumazet @ 2024-03-05 16:04 UTC (permalink / raw)
To: David S . Miller, Jakub Kicinski, Paolo Abeni
Cc: netdev, David Ahern, Willem de Bruijn, Soheil Hassas Yeganeh,
Neal Cardwell, eric.dumazet, Eric Dumazet
These structures are used in GRO and GSO paths.
Signed-off-by: Eric Dumazet <edumazet@google.com>
---
include/net/hotdata.h | 7 +++++++
net/ipv4/af_inet.c | 18 +++++++++---------
net/ipv6/ip6_offload.c | 18 +++++++++---------
3 files changed, 25 insertions(+), 18 deletions(-)
diff --git a/include/net/hotdata.h b/include/net/hotdata.h
index dc50b200a94b6b935cd79d8e0406a61209fdc68e..ec752d234c5ed4b9c110c9e61c143fe8fc27089e 100644
--- a/include/net/hotdata.h
+++ b/include/net/hotdata.h
@@ -3,9 +3,16 @@
#define _NET_HOTDATA_H
#include <linux/types.h>
+#include <linux/netdevice.h>
/* Read mostly data used in network fast paths. */
struct net_hotdata {
+#if IS_ENABLED(CONFIG_INET)
+ struct packet_offload ip_packet_offload;
+#endif
+#if IS_ENABLED(CONFIG_IPV6)
+ struct packet_offload ipv6_packet_offload;
+#endif
struct list_head offload_base;
struct list_head ptype_all;
int gro_normal_batch;
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index 5daebdcbca326aa1fc042e1e1ff1e82a18bd283d..08dda6955562ea6b89e02b8299b03ab52b342f27 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -1904,14 +1904,6 @@ static int ipv4_proc_init(void);
* IP protocol layer initialiser
*/
-static struct packet_offload ip_packet_offload __read_mostly = {
- .type = cpu_to_be16(ETH_P_IP),
- .callbacks = {
- .gso_segment = inet_gso_segment,
- .gro_receive = inet_gro_receive,
- .gro_complete = inet_gro_complete,
- },
-};
static const struct net_offload ipip_offload = {
.callbacks = {
@@ -1938,7 +1930,15 @@ static int __init ipv4_offload_init(void)
if (ipip_offload_init() < 0)
pr_crit("%s: Cannot add IPIP protocol offload\n", __func__);
- dev_add_offload(&ip_packet_offload);
+ net_hotdata.ip_packet_offload = (struct packet_offload) {
+ .type = cpu_to_be16(ETH_P_IP),
+ .callbacks = {
+ .gso_segment = inet_gso_segment,
+ .gro_receive = inet_gro_receive,
+ .gro_complete = inet_gro_complete,
+ },
+ };
+ dev_add_offload(&net_hotdata.ip_packet_offload);
return 0;
}
diff --git a/net/ipv6/ip6_offload.c b/net/ipv6/ip6_offload.c
index cca64c7809bee9a0360cbfab6a645d3f8d2ffea3..b41e35af69ea2835aa47d6ca01d9b109d4092462 100644
--- a/net/ipv6/ip6_offload.c
+++ b/net/ipv6/ip6_offload.c
@@ -419,14 +419,6 @@ static int ip4ip6_gro_complete(struct sk_buff *skb, int nhoff)
return inet_gro_complete(skb, nhoff);
}
-static struct packet_offload ipv6_packet_offload __read_mostly = {
- .type = cpu_to_be16(ETH_P_IPV6),
- .callbacks = {
- .gso_segment = ipv6_gso_segment,
- .gro_receive = ipv6_gro_receive,
- .gro_complete = ipv6_gro_complete,
- },
-};
static struct sk_buff *sit_gso_segment(struct sk_buff *skb,
netdev_features_t features)
@@ -486,7 +478,15 @@ static int __init ipv6_offload_init(void)
if (ipv6_exthdrs_offload_init() < 0)
pr_crit("%s: Cannot add EXTHDRS protocol offload\n", __func__);
- dev_add_offload(&ipv6_packet_offload);
+ net_hotdata.ipv6_packet_offload = (struct packet_offload) {
+ .type = cpu_to_be16(ETH_P_IPV6),
+ .callbacks = {
+ .gso_segment = ipv6_gso_segment,
+ .gro_receive = ipv6_gro_receive,
+ .gro_complete = ipv6_gro_complete,
+ },
+ };
+ dev_add_offload(&net_hotdata.ipv6_packet_offload);
inet_add_offload(&sit_offload, IPPROTO_IPV6);
inet6_add_offload(&ip6ip6_offload, IPPROTO_IPV6);
--
2.44.0.278.ge034bb2e1d-goog
^ permalink raw reply related [flat|nested] 24+ messages in thread* Re: [PATCH net-next 06/18] net: move ip_packet_offload and ipv6_packet_offload to net_hotdata
2024-03-05 16:04 ` [PATCH net-next 06/18] net: move ip_packet_offload and ipv6_packet_offload " Eric Dumazet
@ 2024-03-06 6:08 ` kernel test robot
2024-03-06 9:00 ` Eric Dumazet
0 siblings, 1 reply; 24+ messages in thread
From: kernel test robot @ 2024-03-06 6:08 UTC (permalink / raw)
To: Eric Dumazet, David S . Miller, Jakub Kicinski, Paolo Abeni
Cc: llvm, oe-kbuild-all, netdev, David Ahern, Willem de Bruijn,
Soheil Hassas Yeganeh, Neal Cardwell, eric.dumazet, Eric Dumazet
Hi Eric,
kernel test robot noticed the following build errors:
[auto build test ERROR on net-next/main]
url: https://github.com/intel-lab-lkp/linux/commits/Eric-Dumazet/net-introduce-struct-net_hotdata/20240306-001407
base: net-next/main
patch link: https://lore.kernel.org/r/20240305160413.2231423-7-edumazet%40google.com
patch subject: [PATCH net-next 06/18] net: move ip_packet_offload and ipv6_packet_offload to net_hotdata
config: hexagon-defconfig (https://download.01.org/0day-ci/archive/20240306/202403061318.QMW92UEi-lkp@intel.com/config)
compiler: clang version 19.0.0git (https://github.com/llvm/llvm-project 325f51237252e6dab8e4e1ea1fa7acbb4faee1cd)
reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20240306/202403061318.QMW92UEi-lkp@intel.com/reproduce)
If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <lkp@intel.com>
| Closes: https://lore.kernel.org/oe-kbuild-all/202403061318.QMW92UEi-lkp@intel.com/
All errors (new ones prefixed by >>):
In file included from include/linux/highmem.h:12:
In file included from include/linux/hardirq.h:11:
In file included from ./arch/hexagon/include/generated/asm/hardirq.h:1:
In file included from include/asm-generic/hardirq.h:17:
In file included from include/linux/irq.h:20:
In file included from include/linux/io.h:13:
In file included from arch/hexagon/include/asm/io.h:328:
include/asm-generic/io.h:573:61: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic]
573 | val = __le32_to_cpu((__le32 __force)__raw_readl(PCI_IOBASE + addr));
| ~~~~~~~~~~ ^
include/uapi/linux/byteorder/little_endian.h:35:51: note: expanded from macro '__le32_to_cpu'
35 | #define __le32_to_cpu(x) ((__force __u32)(__le32)(x))
| ^
In file included from net/ipv6/ip6_offload.c:9:
In file included from include/linux/netdevice.h:38:
In file included from include/net/net_namespace.h:43:
In file included from include/linux/skbuff.h:17:
In file included from include/linux/bvec.h:10:
In file included from include/linux/highmem.h:12:
In file included from include/linux/hardirq.h:11:
In file included from ./arch/hexagon/include/generated/asm/hardirq.h:1:
In file included from include/asm-generic/hardirq.h:17:
In file included from include/linux/irq.h:20:
In file included from include/linux/io.h:13:
In file included from arch/hexagon/include/asm/io.h:328:
include/asm-generic/io.h:584:33: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic]
584 | __raw_writeb(value, PCI_IOBASE + addr);
| ~~~~~~~~~~ ^
include/asm-generic/io.h:594:59: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic]
594 | __raw_writew((u16 __force)cpu_to_le16(value), PCI_IOBASE + addr);
| ~~~~~~~~~~ ^
include/asm-generic/io.h:604:59: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic]
604 | __raw_writel((u32 __force)cpu_to_le32(value), PCI_IOBASE + addr);
| ~~~~~~~~~~ ^
In file included from net/ipv6/ip6_offload.c:9:
In file included from include/linux/netdevice.h:45:
In file included from include/uapi/linux/neighbour.h:6:
In file included from include/linux/netlink.h:9:
In file included from include/net/scm.h:9:
In file included from include/linux/security.h:35:
include/linux/bpf.h:736:48: warning: bitwise operation between different enumeration types ('enum bpf_type_flag' and 'enum bpf_arg_type') [-Wenum-enum-conversion]
736 | ARG_PTR_TO_MAP_VALUE_OR_NULL = PTR_MAYBE_NULL | ARG_PTR_TO_MAP_VALUE,
| ~~~~~~~~~~~~~~ ^ ~~~~~~~~~~~~~~~~~~~~
include/linux/bpf.h:737:43: warning: bitwise operation between different enumeration types ('enum bpf_type_flag' and 'enum bpf_arg_type') [-Wenum-enum-conversion]
737 | ARG_PTR_TO_MEM_OR_NULL = PTR_MAYBE_NULL | ARG_PTR_TO_MEM,
| ~~~~~~~~~~~~~~ ^ ~~~~~~~~~~~~~~
include/linux/bpf.h:738:43: warning: bitwise operation between different enumeration types ('enum bpf_type_flag' and 'enum bpf_arg_type') [-Wenum-enum-conversion]
738 | ARG_PTR_TO_CTX_OR_NULL = PTR_MAYBE_NULL | ARG_PTR_TO_CTX,
| ~~~~~~~~~~~~~~ ^ ~~~~~~~~~~~~~~
include/linux/bpf.h:739:45: warning: bitwise operation between different enumeration types ('enum bpf_type_flag' and 'enum bpf_arg_type') [-Wenum-enum-conversion]
739 | ARG_PTR_TO_SOCKET_OR_NULL = PTR_MAYBE_NULL | ARG_PTR_TO_SOCKET,
| ~~~~~~~~~~~~~~ ^ ~~~~~~~~~~~~~~~~~
include/linux/bpf.h:740:44: warning: bitwise operation between different enumeration types ('enum bpf_type_flag' and 'enum bpf_arg_type') [-Wenum-enum-conversion]
740 | ARG_PTR_TO_STACK_OR_NULL = PTR_MAYBE_NULL | ARG_PTR_TO_STACK,
| ~~~~~~~~~~~~~~ ^ ~~~~~~~~~~~~~~~~
include/linux/bpf.h:741:45: warning: bitwise operation between different enumeration types ('enum bpf_type_flag' and 'enum bpf_arg_type') [-Wenum-enum-conversion]
741 | ARG_PTR_TO_BTF_ID_OR_NULL = PTR_MAYBE_NULL | ARG_PTR_TO_BTF_ID,
| ~~~~~~~~~~~~~~ ^ ~~~~~~~~~~~~~~~~~
include/linux/bpf.h:745:38: warning: bitwise operation between different enumeration types ('enum bpf_type_flag' and 'enum bpf_arg_type') [-Wenum-enum-conversion]
745 | ARG_PTR_TO_UNINIT_MEM = MEM_UNINIT | ARG_PTR_TO_MEM,
| ~~~~~~~~~~ ^ ~~~~~~~~~~~~~~
include/linux/bpf.h:747:45: warning: bitwise operation between different enumeration types ('enum bpf_type_flag' and 'enum bpf_arg_type') [-Wenum-enum-conversion]
747 | ARG_PTR_TO_FIXED_SIZE_MEM = MEM_FIXED_SIZE | ARG_PTR_TO_MEM,
| ~~~~~~~~~~~~~~ ^ ~~~~~~~~~~~~~~
include/linux/bpf.h:770:48: warning: bitwise operation between different enumeration types ('enum bpf_type_flag' and 'enum bpf_return_type') [-Wenum-enum-conversion]
770 | RET_PTR_TO_MAP_VALUE_OR_NULL = PTR_MAYBE_NULL | RET_PTR_TO_MAP_VALUE,
| ~~~~~~~~~~~~~~ ^ ~~~~~~~~~~~~~~~~~~~~
include/linux/bpf.h:771:45: warning: bitwise operation between different enumeration types ('enum bpf_type_flag' and 'enum bpf_return_type') [-Wenum-enum-conversion]
771 | RET_PTR_TO_SOCKET_OR_NULL = PTR_MAYBE_NULL | RET_PTR_TO_SOCKET,
| ~~~~~~~~~~~~~~ ^ ~~~~~~~~~~~~~~~~~
include/linux/bpf.h:772:47: warning: bitwise operation between different enumeration types ('enum bpf_type_flag' and 'enum bpf_return_type') [-Wenum-enum-conversion]
772 | RET_PTR_TO_TCP_SOCK_OR_NULL = PTR_MAYBE_NULL | RET_PTR_TO_TCP_SOCK,
| ~~~~~~~~~~~~~~ ^ ~~~~~~~~~~~~~~~~~~~
include/linux/bpf.h:773:50: warning: bitwise operation between different enumeration types ('enum bpf_type_flag' and 'enum bpf_return_type') [-Wenum-enum-conversion]
773 | RET_PTR_TO_SOCK_COMMON_OR_NULL = PTR_MAYBE_NULL | RET_PTR_TO_SOCK_COMMON,
| ~~~~~~~~~~~~~~ ^ ~~~~~~~~~~~~~~~~~~~~~~
include/linux/bpf.h:775:49: warning: bitwise operation between different enumeration types ('enum bpf_type_flag' and 'enum bpf_return_type') [-Wenum-enum-conversion]
775 | RET_PTR_TO_DYNPTR_MEM_OR_NULL = PTR_MAYBE_NULL | RET_PTR_TO_MEM,
| ~~~~~~~~~~~~~~ ^ ~~~~~~~~~~~~~~
include/linux/bpf.h:776:45: warning: bitwise operation between different enumeration types ('enum bpf_type_flag' and 'enum bpf_return_type') [-Wenum-enum-conversion]
776 | RET_PTR_TO_BTF_ID_OR_NULL = PTR_MAYBE_NULL | RET_PTR_TO_BTF_ID,
| ~~~~~~~~~~~~~~ ^ ~~~~~~~~~~~~~~~~~
include/linux/bpf.h:777:43: warning: bitwise operation between different enumeration types ('enum bpf_type_flag' and 'enum bpf_return_type') [-Wenum-enum-conversion]
777 | RET_PTR_TO_BTF_ID_TRUSTED = PTR_TRUSTED | RET_PTR_TO_BTF_ID,
| ~~~~~~~~~~~ ^ ~~~~~~~~~~~~~~~~~
include/linux/bpf.h:888:44: warning: bitwise operation between different enumeration types ('enum bpf_type_flag' and 'enum bpf_reg_type') [-Wenum-enum-conversion]
888 | PTR_TO_MAP_VALUE_OR_NULL = PTR_MAYBE_NULL | PTR_TO_MAP_VALUE,
| ~~~~~~~~~~~~~~ ^ ~~~~~~~~~~~~~~~~
include/linux/bpf.h:889:42: warning: bitwise operation between different enumeration types ('enum bpf_type_flag' and 'enum bpf_reg_type') [-Wenum-enum-conversion]
889 | PTR_TO_SOCKET_OR_NULL = PTR_MAYBE_NULL | PTR_TO_SOCKET,
| ~~~~~~~~~~~~~~ ^ ~~~~~~~~~~~~~
include/linux/bpf.h:890:46: warning: bitwise operation between different enumeration types ('enum bpf_type_flag' and 'enum bpf_reg_type') [-Wenum-enum-conversion]
890 | PTR_TO_SOCK_COMMON_OR_NULL = PTR_MAYBE_NULL | PTR_TO_SOCK_COMMON,
| ~~~~~~~~~~~~~~ ^ ~~~~~~~~~~~~~~~~~~
include/linux/bpf.h:891:44: warning: bitwise operation between different enumeration types ('enum bpf_type_flag' and 'enum bpf_reg_type') [-Wenum-enum-conversion]
891 | PTR_TO_TCP_SOCK_OR_NULL = PTR_MAYBE_NULL | PTR_TO_TCP_SOCK,
| ~~~~~~~~~~~~~~ ^ ~~~~~~~~~~~~~~~
include/linux/bpf.h:892:42: warning: bitwise operation between different enumeration types ('enum bpf_type_flag' and 'enum bpf_reg_type') [-Wenum-enum-conversion]
892 | PTR_TO_BTF_ID_OR_NULL = PTR_MAYBE_NULL | PTR_TO_BTF_ID,
| ~~~~~~~~~~~~~~ ^ ~~~~~~~~~~~~~
>> net/ipv6/ip6_offload.c:481:14: error: no member named 'ipv6_packet_offload' in 'struct net_hotdata'; did you mean 'ip_packet_offload'?
481 | net_hotdata.ipv6_packet_offload = (struct packet_offload) {
| ^~~~~~~~~~~~~~~~~~~
| ip_packet_offload
include/net/hotdata.h:11:24: note: 'ip_packet_offload' declared here
11 | struct packet_offload ip_packet_offload;
| ^
net/ipv6/ip6_offload.c:489:31: error: no member named 'ipv6_packet_offload' in 'struct net_hotdata'; did you mean 'ip_packet_offload'?
489 | dev_add_offload(&net_hotdata.ipv6_packet_offload);
| ^~~~~~~~~~~~~~~~~~~
| ip_packet_offload
include/net/hotdata.h:11:24: note: 'ip_packet_offload' declared here
11 | struct packet_offload ip_packet_offload;
| ^
27 warnings and 2 errors generated.
vim +481 net/ipv6/ip6_offload.c
475
476 if (tcpv6_offload_init() < 0)
477 pr_crit("%s: Cannot add TCP protocol offload\n", __func__);
478 if (ipv6_exthdrs_offload_init() < 0)
479 pr_crit("%s: Cannot add EXTHDRS protocol offload\n", __func__);
480
> 481 net_hotdata.ipv6_packet_offload = (struct packet_offload) {
482 .type = cpu_to_be16(ETH_P_IPV6),
483 .callbacks = {
484 .gso_segment = ipv6_gso_segment,
485 .gro_receive = ipv6_gro_receive,
486 .gro_complete = ipv6_gro_complete,
487 },
488 };
489 dev_add_offload(&net_hotdata.ipv6_packet_offload);
490
491 inet_add_offload(&sit_offload, IPPROTO_IPV6);
492 inet6_add_offload(&ip6ip6_offload, IPPROTO_IPV6);
493 inet6_add_offload(&ip4ip6_offload, IPPROTO_IPIP);
494
495 return 0;
496 }
497
--
0-DAY CI Kernel Test Service
https://github.com/intel/lkp-tests/wiki
^ permalink raw reply [flat|nested] 24+ messages in thread* Re: [PATCH net-next 06/18] net: move ip_packet_offload and ipv6_packet_offload to net_hotdata
2024-03-06 6:08 ` kernel test robot
@ 2024-03-06 9:00 ` Eric Dumazet
0 siblings, 0 replies; 24+ messages in thread
From: Eric Dumazet @ 2024-03-06 9:00 UTC (permalink / raw)
To: kernel test robot
Cc: David S . Miller, Jakub Kicinski, Paolo Abeni, llvm,
oe-kbuild-all, netdev, David Ahern, Willem de Bruijn,
Soheil Hassas Yeganeh, Neal Cardwell, eric.dumazet
On Wed, Mar 6, 2024 at 7:09 AM kernel test robot <lkp@intel.com> wrote:
>
> Hi Eric,
>
> kernel test robot noticed the following build errors:
>
> [auto build test ERROR on net-next/main]
>
> url: https://github.com/intel-lab-lkp/linux/commits/Eric-Dumazet/net-introduce-struct-net_hotdata/20240306-001407
> base: net-next/main
> patch link: https://lore.kernel.org/r/20240305160413.2231423-7-edumazet%40google.com
> patch subject: [PATCH net-next 06/18] net: move ip_packet_offload and ipv6_packet_offload to net_hotdata
> config: hexagon-defconfig (https://download.01.org/0day-ci/archive/20240306/202403061318.QMW92UEi-lkp@intel.com/config)
> compiler: clang version 19.0.0git (https://github.com/llvm/llvm-project 325f51237252e6dab8e4e1ea1fa7acbb4faee1cd)
> reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20240306/202403061318.QMW92UEi-lkp@intel.com/reproduce)
>
> If you fix the issue in a separate patch/commit (i.e. not just a new version of
> the same patch/commit), kindly add following tags
> | Reported-by: kernel test robot <lkp@intel.com>
> | Closes: https://lore.kernel.org/oe-kbuild-all/202403061318.QMW92UEi-lkp@intel.com/
Right, all the offloads need to be there if CONFIG_INET=y, regardless
of CONFIG_IPV6
I will in v2 have changes so that the cumulative diff is :
diff --git a/include/net/hotdata.h b/include/net/hotdata.h
index 7bb6e46aec8f19deff42112041feb47724cdd538..9230052a453c1b959b9a72f1d1a4878d76f243b8
100644
--- a/include/net/hotdata.h
+++ b/include/net/hotdata.h
@@ -14,13 +14,13 @@ struct net_hotdata {
struct net_protocol tcp_protocol;
struct net_offload udpv4_offload;
struct net_protocol udp_protocol;
-#endif
-#if IS_ENABLED(CONFIG_IPV6)
struct packet_offload ipv6_packet_offload;
struct net_offload tcpv6_offload;
+#if defined (CONFIG_IPV6)
struct inet6_protocol tcpv6_protocol;
- struct net_offload udpv6_offload;
struct inet6_protocol udpv6_protocol;
+#endif
+ struct net_offload udpv6_offload;
#endif
struct list_head offload_base;
struct list_head ptype_all;
^ permalink raw reply [flat|nested] 24+ messages in thread
* [PATCH net-next 07/18] net: move tcpv4_offload and tcpv6_offload to net_hotdata
2024-03-05 16:03 [PATCH net-next 00/18] net: group together hot data Eric Dumazet
` (5 preceding siblings ...)
2024-03-05 16:04 ` [PATCH net-next 06/18] net: move ip_packet_offload and ipv6_packet_offload " Eric Dumazet
@ 2024-03-05 16:04 ` Eric Dumazet
2024-03-05 16:04 ` [PATCH net-next 08/18] net: move dev_tx_weight " Eric Dumazet
` (12 subsequent siblings)
19 siblings, 0 replies; 24+ messages in thread
From: Eric Dumazet @ 2024-03-05 16:04 UTC (permalink / raw)
To: David S . Miller, Jakub Kicinski, Paolo Abeni
Cc: netdev, David Ahern, Willem de Bruijn, Soheil Hassas Yeganeh,
Neal Cardwell, eric.dumazet, Eric Dumazet
These are used in TCP fast paths.
Move them into net_hotdata for better cache locality.
Signed-off-by: Eric Dumazet <edumazet@google.com>
---
include/net/hotdata.h | 3 +++
net/ipv4/tcp_offload.c | 17 ++++++++---------
net/ipv6/tcpv6_offload.c | 16 ++++++++--------
3 files changed, 19 insertions(+), 17 deletions(-)
diff --git a/include/net/hotdata.h b/include/net/hotdata.h
index ec752d234c5ed4b9c110c9e61c143fe8fc27089e..a4a8df3bc0dea1b4c9589bd70f7ac457ebc5b634 100644
--- a/include/net/hotdata.h
+++ b/include/net/hotdata.h
@@ -4,14 +4,17 @@
#include <linux/types.h>
#include <linux/netdevice.h>
+#include <net/protocol.h>
/* Read mostly data used in network fast paths. */
struct net_hotdata {
#if IS_ENABLED(CONFIG_INET)
struct packet_offload ip_packet_offload;
+ struct net_offload tcpv4_offload;
#endif
#if IS_ENABLED(CONFIG_IPV6)
struct packet_offload ipv6_packet_offload;
+ struct net_offload tcpv6_offload;
#endif
struct list_head offload_base;
struct list_head ptype_all;
diff --git a/net/ipv4/tcp_offload.c b/net/ipv4/tcp_offload.c
index b955ab3b236d965a38054efa004fe12f03074c70..ebe4722bb0204433936e69724879779141288789 100644
--- a/net/ipv4/tcp_offload.c
+++ b/net/ipv4/tcp_offload.c
@@ -345,15 +345,14 @@ INDIRECT_CALLABLE_SCOPE int tcp4_gro_complete(struct sk_buff *skb, int thoff)
return 0;
}
-static const struct net_offload tcpv4_offload = {
- .callbacks = {
- .gso_segment = tcp4_gso_segment,
- .gro_receive = tcp4_gro_receive,
- .gro_complete = tcp4_gro_complete,
- },
-};
-
int __init tcpv4_offload_init(void)
{
- return inet_add_offload(&tcpv4_offload, IPPROTO_TCP);
+ net_hotdata.tcpv4_offload = (struct net_offload) {
+ .callbacks = {
+ .gso_segment = tcp4_gso_segment,
+ .gro_receive = tcp4_gro_receive,
+ .gro_complete = tcp4_gro_complete,
+ },
+ };
+ return inet_add_offload(&net_hotdata.tcpv4_offload, IPPROTO_TCP);
}
diff --git a/net/ipv6/tcpv6_offload.c b/net/ipv6/tcpv6_offload.c
index bf0c957e4b5eaaabc0ac3a7e55c7de6608cec156..4b07d1e6c952957419924c83c5d3ac0e9d0b2565 100644
--- a/net/ipv6/tcpv6_offload.c
+++ b/net/ipv6/tcpv6_offload.c
@@ -66,15 +66,15 @@ static struct sk_buff *tcp6_gso_segment(struct sk_buff *skb,
return tcp_gso_segment(skb, features);
}
-static const struct net_offload tcpv6_offload = {
- .callbacks = {
- .gso_segment = tcp6_gso_segment,
- .gro_receive = tcp6_gro_receive,
- .gro_complete = tcp6_gro_complete,
- },
-};
int __init tcpv6_offload_init(void)
{
- return inet6_add_offload(&tcpv6_offload, IPPROTO_TCP);
+ net_hotdata.tcpv6_offload = (struct net_offload) {
+ .callbacks = {
+ .gso_segment = tcp6_gso_segment,
+ .gro_receive = tcp6_gro_receive,
+ .gro_complete = tcp6_gro_complete,
+ },
+ };
+ return inet6_add_offload(&net_hotdata.tcpv6_offload, IPPROTO_TCP);
}
--
2.44.0.278.ge034bb2e1d-goog
^ permalink raw reply related [flat|nested] 24+ messages in thread* [PATCH net-next 08/18] net: move dev_tx_weight to net_hotdata
2024-03-05 16:03 [PATCH net-next 00/18] net: group together hot data Eric Dumazet
` (6 preceding siblings ...)
2024-03-05 16:04 ` [PATCH net-next 07/18] net: move tcpv4_offload and tcpv6_offload " Eric Dumazet
@ 2024-03-05 16:04 ` Eric Dumazet
2024-03-05 16:04 ` [PATCH net-next 09/18] net: move dev_rx_weight " Eric Dumazet
` (11 subsequent siblings)
19 siblings, 0 replies; 24+ messages in thread
From: Eric Dumazet @ 2024-03-05 16:04 UTC (permalink / raw)
To: David S . Miller, Jakub Kicinski, Paolo Abeni
Cc: netdev, David Ahern, Willem de Bruijn, Soheil Hassas Yeganeh,
Neal Cardwell, eric.dumazet, Eric Dumazet
dev_tx_weight is used in tx fast path.
Move it to net_hotdata for better cache locality.
Signed-off-by: Eric Dumazet <edumazet@google.com>
---
include/linux/netdevice.h | 1 -
include/net/hotdata.h | 1 +
net/core/dev.c | 1 -
net/core/hotdata.c | 1 +
net/core/sysctl_net_core.c | 2 +-
net/sched/sch_generic.c | 3 ++-
6 files changed, 5 insertions(+), 4 deletions(-)
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 044d6f5b2ace3e2decd4296e01c8d3e200c6c7dc..c2a735edc44be95fbd4bbd1e234d883582bfde10 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -4788,7 +4788,6 @@ void dev_fetch_sw_netstats(struct rtnl_link_stats64 *s,
void dev_get_tstats64(struct net_device *dev, struct rtnl_link_stats64 *s);
extern int dev_rx_weight;
-extern int dev_tx_weight;
enum {
NESTED_SYNC_IMM_BIT,
diff --git a/include/net/hotdata.h b/include/net/hotdata.h
index a4a8df3bc0dea1b4c9589bd70f7ac457ebc5b634..2b0eb6b7f1f2c9b1273b07e06ba0b5c12a2934bf 100644
--- a/include/net/hotdata.h
+++ b/include/net/hotdata.h
@@ -23,6 +23,7 @@ struct net_hotdata {
int netdev_budget_usecs;
int tstamp_prequeue;
int max_backlog;
+ int dev_tx_weight;
};
extern struct net_hotdata net_hotdata;
diff --git a/net/core/dev.c b/net/core/dev.c
index 1b112c4db983c2d7cd280bc8c2ebc621ea3c6145..3f8d451f42fb9ba621f1ea19e784c7f0be0bf2d8 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -4409,7 +4409,6 @@ int weight_p __read_mostly = 64; /* old backlog weight */
int dev_weight_rx_bias __read_mostly = 1; /* bias for backlog weight */
int dev_weight_tx_bias __read_mostly = 1; /* bias for output_queue quota */
int dev_rx_weight __read_mostly = 64;
-int dev_tx_weight __read_mostly = 64;
/* Called with irq disabled */
static inline void ____napi_schedule(struct softnet_data *sd,
diff --git a/net/core/hotdata.c b/net/core/hotdata.c
index 35ed5a83ecc7ebda513fe4fafc596e053f0252c5..ec8c3b48e8fea57491c5870055cffb44c779db44 100644
--- a/net/core/hotdata.c
+++ b/net/core/hotdata.c
@@ -16,5 +16,6 @@ struct net_hotdata net_hotdata __cacheline_aligned = {
.tstamp_prequeue = 1,
.max_backlog = 1000,
+ .dev_tx_weight = 64,
};
EXPORT_SYMBOL(net_hotdata);
diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c
index 8eaeeb289914258f90cf940e906d5c6be0cc0cd6..a30016a8660e09db89b3153e4103c185a800a2ef 100644
--- a/net/core/sysctl_net_core.c
+++ b/net/core/sysctl_net_core.c
@@ -302,7 +302,7 @@ static int proc_do_dev_weight(struct ctl_table *table, int write,
if (!ret && write) {
weight = READ_ONCE(weight_p);
WRITE_ONCE(dev_rx_weight, weight * dev_weight_rx_bias);
- WRITE_ONCE(dev_tx_weight, weight * dev_weight_tx_bias);
+ WRITE_ONCE(net_hotdata.dev_tx_weight, weight * dev_weight_tx_bias);
}
mutex_unlock(&dev_weight_mutex);
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index 9b3e9262040b6ef6516752c558c8997bf4054123..ff5336493777507242320d7e9214c637663f0734 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -27,6 +27,7 @@
#include <net/sch_generic.h>
#include <net/pkt_sched.h>
#include <net/dst.h>
+#include <net/hotdata.h>
#include <trace/events/qdisc.h>
#include <trace/events/net.h>
#include <net/xfrm.h>
@@ -409,7 +410,7 @@ static inline bool qdisc_restart(struct Qdisc *q, int *packets)
void __qdisc_run(struct Qdisc *q)
{
- int quota = READ_ONCE(dev_tx_weight);
+ int quota = READ_ONCE(net_hotdata.dev_tx_weight);
int packets;
while (qdisc_restart(q, &packets)) {
--
2.44.0.278.ge034bb2e1d-goog
^ permalink raw reply related [flat|nested] 24+ messages in thread* [PATCH net-next 09/18] net: move dev_rx_weight to net_hotdata
2024-03-05 16:03 [PATCH net-next 00/18] net: group together hot data Eric Dumazet
` (7 preceding siblings ...)
2024-03-05 16:04 ` [PATCH net-next 08/18] net: move dev_tx_weight " Eric Dumazet
@ 2024-03-05 16:04 ` Eric Dumazet
2024-03-05 16:04 ` [PATCH net-next 10/18] net: move skbuff_cache(s) " Eric Dumazet
` (10 subsequent siblings)
19 siblings, 0 replies; 24+ messages in thread
From: Eric Dumazet @ 2024-03-05 16:04 UTC (permalink / raw)
To: David S . Miller, Jakub Kicinski, Paolo Abeni
Cc: netdev, David Ahern, Willem de Bruijn, Soheil Hassas Yeganeh,
Neal Cardwell, eric.dumazet, Eric Dumazet
dev_rx_weight is read from process_backlog().
Move it to net_hotdata for better cache locality.
Signed-off-by: Eric Dumazet <edumazet@google.com>
---
include/linux/netdevice.h | 2 --
include/net/hotdata.h | 1 +
net/core/dev.c | 3 +--
net/core/hotdata.c | 1 +
net/core/sysctl_net_core.c | 2 +-
5 files changed, 4 insertions(+), 5 deletions(-)
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index c2a735edc44be95fbd4bbd1e234d883582bfde10..d3d4d1052ecb0bc704e9bb40fffbea85cfb6ab03 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -4787,8 +4787,6 @@ void dev_fetch_sw_netstats(struct rtnl_link_stats64 *s,
const struct pcpu_sw_netstats __percpu *netstats);
void dev_get_tstats64(struct net_device *dev, struct rtnl_link_stats64 *s);
-extern int dev_rx_weight;
-
enum {
NESTED_SYNC_IMM_BIT,
NESTED_SYNC_TODO_BIT,
diff --git a/include/net/hotdata.h b/include/net/hotdata.h
index 2b0eb6b7f1f2c9b1273b07e06ba0b5c12a2934bf..f45085eddbc997c2a9686b5b4de9da4cc8c4ede8 100644
--- a/include/net/hotdata.h
+++ b/include/net/hotdata.h
@@ -24,6 +24,7 @@ struct net_hotdata {
int tstamp_prequeue;
int max_backlog;
int dev_tx_weight;
+ int dev_rx_weight;
};
extern struct net_hotdata net_hotdata;
diff --git a/net/core/dev.c b/net/core/dev.c
index 3f8d451f42fb9ba621f1ea19e784c7f0be0bf2d8..26676dbd6c8594371a4d30e0dd95d72342e226ca 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -4408,7 +4408,6 @@ unsigned int sysctl_skb_defer_max __read_mostly = 64;
int weight_p __read_mostly = 64; /* old backlog weight */
int dev_weight_rx_bias __read_mostly = 1; /* bias for backlog weight */
int dev_weight_tx_bias __read_mostly = 1; /* bias for output_queue quota */
-int dev_rx_weight __read_mostly = 64;
/* Called with irq disabled */
static inline void ____napi_schedule(struct softnet_data *sd,
@@ -5978,7 +5977,7 @@ static int process_backlog(struct napi_struct *napi, int quota)
net_rps_action_and_irq_enable(sd);
}
- napi->weight = READ_ONCE(dev_rx_weight);
+ napi->weight = READ_ONCE(net_hotdata.dev_rx_weight);
while (again) {
struct sk_buff *skb;
diff --git a/net/core/hotdata.c b/net/core/hotdata.c
index ec8c3b48e8fea57491c5870055cffb44c779db44..c8a7a451c18a383d091e413a510d84d163473f2f 100644
--- a/net/core/hotdata.c
+++ b/net/core/hotdata.c
@@ -17,5 +17,6 @@ struct net_hotdata net_hotdata __cacheline_aligned = {
.tstamp_prequeue = 1,
.max_backlog = 1000,
.dev_tx_weight = 64,
+ .dev_rx_weight = 64,
};
EXPORT_SYMBOL(net_hotdata);
diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c
index a30016a8660e09db89b3153e4103c185a800a2ef..8a4c698dad9c97636ca9cebfad925d4220e98f2a 100644
--- a/net/core/sysctl_net_core.c
+++ b/net/core/sysctl_net_core.c
@@ -301,7 +301,7 @@ static int proc_do_dev_weight(struct ctl_table *table, int write,
ret = proc_dointvec(table, write, buffer, lenp, ppos);
if (!ret && write) {
weight = READ_ONCE(weight_p);
- WRITE_ONCE(dev_rx_weight, weight * dev_weight_rx_bias);
+ WRITE_ONCE(net_hotdata.dev_rx_weight, weight * dev_weight_rx_bias);
WRITE_ONCE(net_hotdata.dev_tx_weight, weight * dev_weight_tx_bias);
}
mutex_unlock(&dev_weight_mutex);
--
2.44.0.278.ge034bb2e1d-goog
^ permalink raw reply related [flat|nested] 24+ messages in thread* [PATCH net-next 10/18] net: move skbuff_cache(s) to net_hotdata
2024-03-05 16:03 [PATCH net-next 00/18] net: group together hot data Eric Dumazet
` (8 preceding siblings ...)
2024-03-05 16:04 ` [PATCH net-next 09/18] net: move dev_rx_weight " Eric Dumazet
@ 2024-03-05 16:04 ` Eric Dumazet
2024-03-05 16:04 ` [PATCH net-next 11/18] udp: move udpv4_offload and udpv6_offload " Eric Dumazet
` (9 subsequent siblings)
19 siblings, 0 replies; 24+ messages in thread
From: Eric Dumazet @ 2024-03-05 16:04 UTC (permalink / raw)
To: David S . Miller, Jakub Kicinski, Paolo Abeni
Cc: netdev, David Ahern, Willem de Bruijn, Soheil Hassas Yeganeh,
Neal Cardwell, eric.dumazet, Eric Dumazet
skbuff_cache, skbuff_fclone_cache and skb_small_head_cache
are used in rx/tx fast paths.
Move them to net_hotdata for better cache locality.
Signed-off-by: Eric Dumazet <edumazet@google.com>
---
include/linux/skbuff.h | 1 -
include/net/hotdata.h | 3 +++
kernel/bpf/cpumap.c | 4 +++-
net/bpf/test_run.c | 4 +++-
net/core/skbuff.c | 44 +++++++++++++++++++-----------------------
net/core/xdp.c | 5 +++--
6 files changed, 32 insertions(+), 29 deletions(-)
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 3013355b63f5f29acfcb0331bfd1c3308aba034d..d0508f90bed50ca66850b29383033e11985dad34 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -1271,7 +1271,6 @@ static inline void consume_skb(struct sk_buff *skb)
void __consume_stateless_skb(struct sk_buff *skb);
void __kfree_skb(struct sk_buff *skb);
-extern struct kmem_cache *skbuff_cache;
void kfree_skb_partial(struct sk_buff *skb, bool head_stolen);
bool skb_try_coalesce(struct sk_buff *to, struct sk_buff *from,
diff --git a/include/net/hotdata.h b/include/net/hotdata.h
index f45085eddbc997c2a9686b5b4de9da4cc8c4ede8..728aee1cf07c8f0d85873d912248a99e148f84b1 100644
--- a/include/net/hotdata.h
+++ b/include/net/hotdata.h
@@ -18,6 +18,9 @@ struct net_hotdata {
#endif
struct list_head offload_base;
struct list_head ptype_all;
+ struct kmem_cache *skbuff_cache;
+ struct kmem_cache *skbuff_fclone_cache;
+ struct kmem_cache *skb_small_head_cache;
int gro_normal_batch;
int netdev_budget;
int netdev_budget_usecs;
diff --git a/kernel/bpf/cpumap.c b/kernel/bpf/cpumap.c
index 8a0bb80fe48a344964e4029fec5e895ee512babf..33f4246cdf0de8232bfbba6a4300f158db3ca9d5 100644
--- a/kernel/bpf/cpumap.c
+++ b/kernel/bpf/cpumap.c
@@ -24,6 +24,7 @@
#include <linux/filter.h>
#include <linux/ptr_ring.h>
#include <net/xdp.h>
+#include <net/hotdata.h>
#include <linux/sched.h>
#include <linux/workqueue.h>
@@ -326,7 +327,8 @@ static int cpu_map_kthread_run(void *data)
/* Support running another XDP prog on this CPU */
nframes = cpu_map_bpf_prog_run(rcpu, frames, xdp_n, &stats, &list);
if (nframes) {
- m = kmem_cache_alloc_bulk(skbuff_cache, gfp, nframes, skbs);
+ m = kmem_cache_alloc_bulk(net_hotdata.skbuff_cache,
+ gfp, nframes, skbs);
if (unlikely(m == 0)) {
for (i = 0; i < nframes; i++)
skbs[i] = NULL; /* effect: xdp_return_frame */
diff --git a/net/bpf/test_run.c b/net/bpf/test_run.c
index 5535f9adc6589d028138026ebff24286098dc46d..61efeadaff8db0529a62d074f441e2a7c35eaa9e 100644
--- a/net/bpf/test_run.c
+++ b/net/bpf/test_run.c
@@ -12,6 +12,7 @@
#include <linux/rcupdate_trace.h>
#include <linux/sched/signal.h>
#include <net/bpf_sk_storage.h>
+#include <net/hotdata.h>
#include <net/sock.h>
#include <net/tcp.h>
#include <net/net_namespace.h>
@@ -254,7 +255,8 @@ static int xdp_recv_frames(struct xdp_frame **frames, int nframes,
int i, n;
LIST_HEAD(list);
- n = kmem_cache_alloc_bulk(skbuff_cache, gfp, nframes, (void **)skbs);
+ n = kmem_cache_alloc_bulk(net_hotdata.skbuff_cache, gfp, nframes,
+ (void **)skbs);
if (unlikely(n == 0)) {
for (i = 0; i < nframes; i++)
xdp_return_frame(frames[i]);
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 43d7fc150acc9263760162c3f5778fa0a646bcc4..766219011aeaf5782df9d624696d273ef6c1577c 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -69,6 +69,7 @@
#include <net/sock.h>
#include <net/checksum.h>
#include <net/gso.h>
+#include <net/hotdata.h>
#include <net/ip6_checksum.h>
#include <net/xfrm.h>
#include <net/mpls.h>
@@ -88,15 +89,10 @@
#include "dev.h"
#include "sock_destructor.h"
-struct kmem_cache *skbuff_cache __ro_after_init;
-static struct kmem_cache *skbuff_fclone_cache __ro_after_init;
#ifdef CONFIG_SKB_EXTENSIONS
static struct kmem_cache *skbuff_ext_cache __ro_after_init;
#endif
-
-static struct kmem_cache *skb_small_head_cache __ro_after_init;
-
#define SKB_SMALL_HEAD_SIZE SKB_HEAD_ALIGN(MAX_TCP_HEADER)
/* We want SKB_SMALL_HEAD_CACHE_SIZE to not be a power of two.
@@ -349,7 +345,7 @@ static struct sk_buff *napi_skb_cache_get(void)
struct sk_buff *skb;
if (unlikely(!nc->skb_count)) {
- nc->skb_count = kmem_cache_alloc_bulk(skbuff_cache,
+ nc->skb_count = kmem_cache_alloc_bulk(net_hotdata.skbuff_cache,
GFP_ATOMIC,
NAPI_SKB_CACHE_BULK,
nc->skb_cache);
@@ -358,7 +354,7 @@ static struct sk_buff *napi_skb_cache_get(void)
}
skb = nc->skb_cache[--nc->skb_count];
- kasan_mempool_unpoison_object(skb, kmem_cache_size(skbuff_cache));
+ kasan_mempool_unpoison_object(skb, kmem_cache_size(net_hotdata.skbuff_cache));
return skb;
}
@@ -416,7 +412,7 @@ struct sk_buff *slab_build_skb(void *data)
struct sk_buff *skb;
unsigned int size;
- skb = kmem_cache_alloc(skbuff_cache, GFP_ATOMIC);
+ skb = kmem_cache_alloc(net_hotdata.skbuff_cache, GFP_ATOMIC);
if (unlikely(!skb))
return NULL;
@@ -467,7 +463,7 @@ struct sk_buff *__build_skb(void *data, unsigned int frag_size)
{
struct sk_buff *skb;
- skb = kmem_cache_alloc(skbuff_cache, GFP_ATOMIC);
+ skb = kmem_cache_alloc(net_hotdata.skbuff_cache, GFP_ATOMIC);
if (unlikely(!skb))
return NULL;
@@ -578,7 +574,7 @@ static void *kmalloc_reserve(unsigned int *size, gfp_t flags, int node,
obj_size = SKB_HEAD_ALIGN(*size);
if (obj_size <= SKB_SMALL_HEAD_CACHE_SIZE &&
!(flags & KMALLOC_NOT_NORMAL_BITS)) {
- obj = kmem_cache_alloc_node(skb_small_head_cache,
+ obj = kmem_cache_alloc_node(net_hotdata.skb_small_head_cache,
flags | __GFP_NOMEMALLOC | __GFP_NOWARN,
node);
*size = SKB_SMALL_HEAD_CACHE_SIZE;
@@ -586,7 +582,7 @@ static void *kmalloc_reserve(unsigned int *size, gfp_t flags, int node,
goto out;
/* Try again but now we are using pfmemalloc reserves */
ret_pfmemalloc = true;
- obj = kmem_cache_alloc_node(skb_small_head_cache, flags, node);
+ obj = kmem_cache_alloc_node(net_hotdata.skb_small_head_cache, flags, node);
goto out;
}
@@ -649,7 +645,7 @@ struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask,
u8 *data;
cache = (flags & SKB_ALLOC_FCLONE)
- ? skbuff_fclone_cache : skbuff_cache;
+ ? net_hotdata.skbuff_fclone_cache : net_hotdata.skbuff_cache;
if (sk_memalloc_socks() && (flags & SKB_ALLOC_RX))
gfp_mask |= __GFP_MEMALLOC;
@@ -1095,7 +1091,7 @@ static int skb_pp_frag_ref(struct sk_buff *skb)
static void skb_kfree_head(void *head, unsigned int end_offset)
{
if (end_offset == SKB_SMALL_HEAD_HEADROOM)
- kmem_cache_free(skb_small_head_cache, head);
+ kmem_cache_free(net_hotdata.skb_small_head_cache, head);
else
kfree(head);
}
@@ -1162,7 +1158,7 @@ static void kfree_skbmem(struct sk_buff *skb)
switch (skb->fclone) {
case SKB_FCLONE_UNAVAILABLE:
- kmem_cache_free(skbuff_cache, skb);
+ kmem_cache_free(net_hotdata.skbuff_cache, skb);
return;
case SKB_FCLONE_ORIG:
@@ -1183,7 +1179,7 @@ static void kfree_skbmem(struct sk_buff *skb)
if (!refcount_dec_and_test(&fclones->fclone_ref))
return;
fastpath:
- kmem_cache_free(skbuff_fclone_cache, fclones);
+ kmem_cache_free(net_hotdata.skbuff_fclone_cache, fclones);
}
void skb_release_head_state(struct sk_buff *skb)
@@ -1280,7 +1276,7 @@ static void kfree_skb_add_bulk(struct sk_buff *skb,
sa->skb_array[sa->skb_count++] = skb;
if (unlikely(sa->skb_count == KFREE_SKB_BULK_SIZE)) {
- kmem_cache_free_bulk(skbuff_cache, KFREE_SKB_BULK_SIZE,
+ kmem_cache_free_bulk(net_hotdata.skbuff_cache, KFREE_SKB_BULK_SIZE,
sa->skb_array);
sa->skb_count = 0;
}
@@ -1305,7 +1301,7 @@ kfree_skb_list_reason(struct sk_buff *segs, enum skb_drop_reason reason)
}
if (sa.skb_count)
- kmem_cache_free_bulk(skbuff_cache, sa.skb_count, sa.skb_array);
+ kmem_cache_free_bulk(net_hotdata.skbuff_cache, sa.skb_count, sa.skb_array);
}
EXPORT_SYMBOL(kfree_skb_list_reason);
@@ -1467,9 +1463,9 @@ static void napi_skb_cache_put(struct sk_buff *skb)
if (unlikely(nc->skb_count == NAPI_SKB_CACHE_SIZE)) {
for (i = NAPI_SKB_CACHE_HALF; i < NAPI_SKB_CACHE_SIZE; i++)
kasan_mempool_unpoison_object(nc->skb_cache[i],
- kmem_cache_size(skbuff_cache));
+ kmem_cache_size(net_hotdata.skbuff_cache));
- kmem_cache_free_bulk(skbuff_cache, NAPI_SKB_CACHE_HALF,
+ kmem_cache_free_bulk(net_hotdata.skbuff_cache, NAPI_SKB_CACHE_HALF,
nc->skb_cache + NAPI_SKB_CACHE_HALF);
nc->skb_count = NAPI_SKB_CACHE_HALF;
}
@@ -2066,7 +2062,7 @@ struct sk_buff *skb_clone(struct sk_buff *skb, gfp_t gfp_mask)
if (skb_pfmemalloc(skb))
gfp_mask |= __GFP_MEMALLOC;
- n = kmem_cache_alloc(skbuff_cache, gfp_mask);
+ n = kmem_cache_alloc(net_hotdata.skbuff_cache, gfp_mask);
if (!n)
return NULL;
@@ -5005,7 +5001,7 @@ static void skb_extensions_init(void) {}
void __init skb_init(void)
{
- skbuff_cache = kmem_cache_create_usercopy("skbuff_head_cache",
+ net_hotdata.skbuff_cache = kmem_cache_create_usercopy("skbuff_head_cache",
sizeof(struct sk_buff),
0,
SLAB_HWCACHE_ALIGN|SLAB_PANIC|
@@ -5013,7 +5009,7 @@ void __init skb_init(void)
offsetof(struct sk_buff, cb),
sizeof_field(struct sk_buff, cb),
NULL);
- skbuff_fclone_cache = kmem_cache_create("skbuff_fclone_cache",
+ net_hotdata.skbuff_fclone_cache = kmem_cache_create("skbuff_fclone_cache",
sizeof(struct sk_buff_fclones),
0,
SLAB_HWCACHE_ALIGN|SLAB_PANIC,
@@ -5022,7 +5018,7 @@ void __init skb_init(void)
* struct skb_shared_info is located at the end of skb->head,
* and should not be copied to/from user.
*/
- skb_small_head_cache = kmem_cache_create_usercopy("skbuff_small_head",
+ net_hotdata.skb_small_head_cache = kmem_cache_create_usercopy("skbuff_small_head",
SKB_SMALL_HEAD_CACHE_SIZE,
0,
SLAB_HWCACHE_ALIGN | SLAB_PANIC,
@@ -5895,7 +5891,7 @@ void kfree_skb_partial(struct sk_buff *skb, bool head_stolen)
{
if (head_stolen) {
skb_release_head_state(skb);
- kmem_cache_free(skbuff_cache, skb);
+ kmem_cache_free(net_hotdata.skbuff_cache, skb);
} else {
__kfree_skb(skb);
}
diff --git a/net/core/xdp.c b/net/core/xdp.c
index 0e3709a29175baf3ee90fdc9cc2d97d861da11a3..41693154e426f78656cc18fc97ca7e82e648270b 100644
--- a/net/core/xdp.c
+++ b/net/core/xdp.c
@@ -16,6 +16,7 @@
#include <linux/bug.h>
#include <net/page_pool/helpers.h>
+#include <net/hotdata.h>
#include <net/xdp.h>
#include <net/xdp_priv.h> /* struct xdp_mem_allocator */
#include <trace/events/xdp.h>
@@ -589,7 +590,7 @@ EXPORT_SYMBOL_GPL(xdp_warn);
int xdp_alloc_skb_bulk(void **skbs, int n_skb, gfp_t gfp)
{
- n_skb = kmem_cache_alloc_bulk(skbuff_cache, gfp, n_skb, skbs);
+ n_skb = kmem_cache_alloc_bulk(net_hotdata.skbuff_cache, gfp, n_skb, skbs);
if (unlikely(!n_skb))
return -ENOMEM;
@@ -658,7 +659,7 @@ struct sk_buff *xdp_build_skb_from_frame(struct xdp_frame *xdpf,
{
struct sk_buff *skb;
- skb = kmem_cache_alloc(skbuff_cache, GFP_ATOMIC);
+ skb = kmem_cache_alloc(net_hotdata.skbuff_cache, GFP_ATOMIC);
if (unlikely(!skb))
return NULL;
--
2.44.0.278.ge034bb2e1d-goog
^ permalink raw reply related [flat|nested] 24+ messages in thread* [PATCH net-next 11/18] udp: move udpv4_offload and udpv6_offload to net_hotdata
2024-03-05 16:03 [PATCH net-next 00/18] net: group together hot data Eric Dumazet
` (9 preceding siblings ...)
2024-03-05 16:04 ` [PATCH net-next 10/18] net: move skbuff_cache(s) " Eric Dumazet
@ 2024-03-05 16:04 ` Eric Dumazet
2024-03-05 16:04 ` [PATCH net-next 12/18] ipv6: move tcpv6_protocol and udpv6_protocol " Eric Dumazet
` (8 subsequent siblings)
19 siblings, 0 replies; 24+ messages in thread
From: Eric Dumazet @ 2024-03-05 16:04 UTC (permalink / raw)
To: David S . Miller, Jakub Kicinski, Paolo Abeni
Cc: netdev, David Ahern, Willem de Bruijn, Soheil Hassas Yeganeh,
Neal Cardwell, eric.dumazet, Eric Dumazet
These structures are used in GRO and GSO paths.
Move them to net_hodata for better cache locality.
Signed-off-by: Eric Dumazet <edumazet@google.com>
---
include/net/hotdata.h | 2 ++
net/ipv4/udp_offload.c | 17 ++++++++---------
net/ipv6/udp_offload.c | 21 ++++++++++-----------
3 files changed, 20 insertions(+), 20 deletions(-)
diff --git a/include/net/hotdata.h b/include/net/hotdata.h
index 728aee1cf07c8f0d85873d912248a99e148f84b1..086114e4d3bcc9184304b024ed93c9024888fcf1 100644
--- a/include/net/hotdata.h
+++ b/include/net/hotdata.h
@@ -11,10 +11,12 @@ struct net_hotdata {
#if IS_ENABLED(CONFIG_INET)
struct packet_offload ip_packet_offload;
struct net_offload tcpv4_offload;
+ struct net_offload udpv4_offload;
#endif
#if IS_ENABLED(CONFIG_IPV6)
struct packet_offload ipv6_packet_offload;
struct net_offload tcpv6_offload;
+ struct net_offload udpv6_offload;
#endif
struct list_head offload_base;
struct list_head ptype_all;
diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c
index 6c95d28d0c4a7e56d587a986113b3711f8de964c..b9880743765c6c24c28bea095f16f0cf091664ce 100644
--- a/net/ipv4/udp_offload.c
+++ b/net/ipv4/udp_offload.c
@@ -737,15 +737,14 @@ INDIRECT_CALLABLE_SCOPE int udp4_gro_complete(struct sk_buff *skb, int nhoff)
return udp_gro_complete(skb, nhoff, udp4_lib_lookup_skb);
}
-static const struct net_offload udpv4_offload = {
- .callbacks = {
- .gso_segment = udp4_ufo_fragment,
- .gro_receive = udp4_gro_receive,
- .gro_complete = udp4_gro_complete,
- },
-};
-
int __init udpv4_offload_init(void)
{
- return inet_add_offload(&udpv4_offload, IPPROTO_UDP);
+ net_hotdata.udpv4_offload = (struct net_offload) {
+ .callbacks = {
+ .gso_segment = udp4_ufo_fragment,
+ .gro_receive = udp4_gro_receive,
+ .gro_complete = udp4_gro_complete,
+ },
+ };
+ return inet_add_offload(&net_hotdata.udpv4_offload, IPPROTO_UDP);
}
diff --git a/net/ipv6/udp_offload.c b/net/ipv6/udp_offload.c
index 6b95ba241ebe2af7e5f2760d8a9c1d78f08579c5..312bcaeea96fb78ac488124cf7795aa834392c64 100644
--- a/net/ipv6/udp_offload.c
+++ b/net/ipv6/udp_offload.c
@@ -192,20 +192,19 @@ INDIRECT_CALLABLE_SCOPE int udp6_gro_complete(struct sk_buff *skb, int nhoff)
return udp_gro_complete(skb, nhoff, udp6_lib_lookup_skb);
}
-static const struct net_offload udpv6_offload = {
- .callbacks = {
- .gso_segment = udp6_ufo_fragment,
- .gro_receive = udp6_gro_receive,
- .gro_complete = udp6_gro_complete,
- },
-};
-
-int udpv6_offload_init(void)
+int __init udpv6_offload_init(void)
{
- return inet6_add_offload(&udpv6_offload, IPPROTO_UDP);
+ net_hotdata.udpv6_offload = (struct net_offload) {
+ .callbacks = {
+ .gso_segment = udp6_ufo_fragment,
+ .gro_receive = udp6_gro_receive,
+ .gro_complete = udp6_gro_complete,
+ },
+ };
+ return inet6_add_offload(&net_hotdata.udpv6_offload, IPPROTO_UDP);
}
int udpv6_offload_exit(void)
{
- return inet6_del_offload(&udpv6_offload, IPPROTO_UDP);
+ return inet6_del_offload(&net_hotdata.udpv6_offload, IPPROTO_UDP);
}
--
2.44.0.278.ge034bb2e1d-goog
^ permalink raw reply related [flat|nested] 24+ messages in thread* [PATCH net-next 12/18] ipv6: move tcpv6_protocol and udpv6_protocol to net_hotdata
2024-03-05 16:03 [PATCH net-next 00/18] net: group together hot data Eric Dumazet
` (10 preceding siblings ...)
2024-03-05 16:04 ` [PATCH net-next 11/18] udp: move udpv4_offload and udpv6_offload " Eric Dumazet
@ 2024-03-05 16:04 ` Eric Dumazet
2024-03-05 16:04 ` [PATCH net-next 13/18] inet: move tcp_protocol and udp_protocol " Eric Dumazet
` (7 subsequent siblings)
19 siblings, 0 replies; 24+ messages in thread
From: Eric Dumazet @ 2024-03-05 16:04 UTC (permalink / raw)
To: David S . Miller, Jakub Kicinski, Paolo Abeni
Cc: netdev, David Ahern, Willem de Bruijn, Soheil Hassas Yeganeh,
Neal Cardwell, eric.dumazet, Eric Dumazet
These structures are read in rx path, move them to net_hotdata
for better cache locality.
Signed-off-by: Eric Dumazet <edumazet@google.com>
---
include/net/hotdata.h | 2 ++
net/ipv6/tcp_ipv6.c | 17 +++++++++--------
net/ipv6/udp.c | 16 ++++++++--------
3 files changed, 19 insertions(+), 16 deletions(-)
diff --git a/include/net/hotdata.h b/include/net/hotdata.h
index 086114e4d3bcc9184304b024ed93c9024888fcf1..97617acb75e1f2141fe7170d93c06f9813c725a3 100644
--- a/include/net/hotdata.h
+++ b/include/net/hotdata.h
@@ -16,7 +16,9 @@ struct net_hotdata {
#if IS_ENABLED(CONFIG_IPV6)
struct packet_offload ipv6_packet_offload;
struct net_offload tcpv6_offload;
+ struct inet6_protocol tcpv6_protocol;
struct net_offload udpv6_offload;
+ struct inet6_protocol udpv6_protocol;
#endif
struct list_head offload_base;
struct list_head ptype_all;
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index f677f0fa51968d00c3571d55ae7850742387f2d1..3f4cba49e9ee6520987993dcea082e6065b4688b 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -58,6 +58,7 @@
#include <net/timewait_sock.h>
#include <net/inet_common.h>
#include <net/secure_seq.h>
+#include <net/hotdata.h>
#include <net/busy_poll.h>
#include <linux/proc_fs.h>
@@ -2367,11 +2368,6 @@ struct proto tcpv6_prot = {
};
EXPORT_SYMBOL_GPL(tcpv6_prot);
-static const struct inet6_protocol tcpv6_protocol = {
- .handler = tcp_v6_rcv,
- .err_handler = tcp_v6_err,
- .flags = INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL,
-};
static struct inet_protosw tcpv6_protosw = {
.type = SOCK_STREAM,
@@ -2408,7 +2404,12 @@ int __init tcpv6_init(void)
{
int ret;
- ret = inet6_add_protocol(&tcpv6_protocol, IPPROTO_TCP);
+ net_hotdata.tcpv6_protocol = (struct inet6_protocol) {
+ .handler = tcp_v6_rcv,
+ .err_handler = tcp_v6_err,
+ .flags = INET6_PROTO_NOPOLICY | INET6_PROTO_FINAL,
+ };
+ ret = inet6_add_protocol(&net_hotdata.tcpv6_protocol, IPPROTO_TCP);
if (ret)
goto out;
@@ -2433,7 +2434,7 @@ int __init tcpv6_init(void)
out_tcpv6_protosw:
inet6_unregister_protosw(&tcpv6_protosw);
out_tcpv6_protocol:
- inet6_del_protocol(&tcpv6_protocol, IPPROTO_TCP);
+ inet6_del_protocol(&net_hotdata.tcpv6_protocol, IPPROTO_TCP);
goto out;
}
@@ -2441,5 +2442,5 @@ void tcpv6_exit(void)
{
unregister_pernet_subsys(&tcpv6_net_ops);
inet6_unregister_protosw(&tcpv6_protosw);
- inet6_del_protocol(&tcpv6_protocol, IPPROTO_TCP);
+ inet6_del_protocol(&net_hotdata.tcpv6_protocol, IPPROTO_TCP);
}
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index 3f2249b4cd5f6a594dd9768e29f20f0d9a57faed..97d86909aabb6588d0bba901f6df1f23a4f2e561 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -1702,11 +1702,6 @@ int udpv6_getsockopt(struct sock *sk, int level, int optname,
return ipv6_getsockopt(sk, level, optname, optval, optlen);
}
-static const struct inet6_protocol udpv6_protocol = {
- .handler = udpv6_rcv,
- .err_handler = udpv6_err,
- .flags = INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL,
-};
/* ------------------------------------------------------------------------ */
#ifdef CONFIG_PROC_FS
@@ -1803,7 +1798,12 @@ int __init udpv6_init(void)
{
int ret;
- ret = inet6_add_protocol(&udpv6_protocol, IPPROTO_UDP);
+ net_hotdata.udpv6_protocol = (struct inet6_protocol) {
+ .handler = udpv6_rcv,
+ .err_handler = udpv6_err,
+ .flags = INET6_PROTO_NOPOLICY | INET6_PROTO_FINAL,
+ };
+ ret = inet6_add_protocol(&net_hotdata.udpv6_protocol, IPPROTO_UDP);
if (ret)
goto out;
@@ -1814,12 +1814,12 @@ int __init udpv6_init(void)
return ret;
out_udpv6_protocol:
- inet6_del_protocol(&udpv6_protocol, IPPROTO_UDP);
+ inet6_del_protocol(&net_hotdata.udpv6_protocol, IPPROTO_UDP);
goto out;
}
void udpv6_exit(void)
{
inet6_unregister_protosw(&udpv6_protosw);
- inet6_del_protocol(&udpv6_protocol, IPPROTO_UDP);
+ inet6_del_protocol(&net_hotdata.udpv6_protocol, IPPROTO_UDP);
}
--
2.44.0.278.ge034bb2e1d-goog
^ permalink raw reply related [flat|nested] 24+ messages in thread* [PATCH net-next 13/18] inet: move tcp_protocol and udp_protocol to net_hotdata
2024-03-05 16:03 [PATCH net-next 00/18] net: group together hot data Eric Dumazet
` (11 preceding siblings ...)
2024-03-05 16:04 ` [PATCH net-next 12/18] ipv6: move tcpv6_protocol and udpv6_protocol " Eric Dumazet
@ 2024-03-05 16:04 ` Eric Dumazet
2024-03-05 16:04 ` [PATCH net-next 14/18] inet: move inet_ehash_secret and udp_ehash_secret into net_hotdata Eric Dumazet
` (6 subsequent siblings)
19 siblings, 0 replies; 24+ messages in thread
From: Eric Dumazet @ 2024-03-05 16:04 UTC (permalink / raw)
To: David S . Miller, Jakub Kicinski, Paolo Abeni
Cc: netdev, David Ahern, Willem de Bruijn, Soheil Hassas Yeganeh,
Neal Cardwell, eric.dumazet, Eric Dumazet
These structures are read in rx path, move them to net_hotdata
for better cache locality.
Signed-off-by: Eric Dumazet <edumazet@google.com>
---
include/net/hotdata.h | 2 ++
net/ipv4/af_inet.c | 30 +++++++++++++++---------------
2 files changed, 17 insertions(+), 15 deletions(-)
diff --git a/include/net/hotdata.h b/include/net/hotdata.h
index 97617acb75e1f2141fe7170d93c06f9813c725a3..4d1cb3c29d4edfbc18cf56c370a1e04e5fcb1cbd 100644
--- a/include/net/hotdata.h
+++ b/include/net/hotdata.h
@@ -11,7 +11,9 @@ struct net_hotdata {
#if IS_ENABLED(CONFIG_INET)
struct packet_offload ip_packet_offload;
struct net_offload tcpv4_offload;
+ struct net_protocol tcp_protocol;
struct net_offload udpv4_offload;
+ struct net_protocol udp_protocol;
#endif
#if IS_ENABLED(CONFIG_IPV6)
struct packet_offload ipv6_packet_offload;
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index 08dda6955562ea6b89e02b8299b03ab52b342f27..6f1cfd176e7b84f23d8a5e505bf8e13b2b755f06 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -1751,19 +1751,6 @@ static const struct net_protocol igmp_protocol = {
};
#endif
-static const struct net_protocol tcp_protocol = {
- .handler = tcp_v4_rcv,
- .err_handler = tcp_v4_err,
- .no_policy = 1,
- .icmp_strict_tag_validation = 1,
-};
-
-static const struct net_protocol udp_protocol = {
- .handler = udp_rcv,
- .err_handler = udp_err,
- .no_policy = 1,
-};
-
static const struct net_protocol icmp_protocol = {
.handler = icmp_rcv,
.err_handler = icmp_err,
@@ -1992,9 +1979,22 @@ static int __init inet_init(void)
if (inet_add_protocol(&icmp_protocol, IPPROTO_ICMP) < 0)
pr_crit("%s: Cannot add ICMP protocol\n", __func__);
- if (inet_add_protocol(&udp_protocol, IPPROTO_UDP) < 0)
+
+ net_hotdata.udp_protocol = (struct net_protocol) {
+ .handler = udp_rcv,
+ .err_handler = udp_err,
+ .no_policy = 1,
+ };
+ if (inet_add_protocol(&net_hotdata.udp_protocol, IPPROTO_UDP) < 0)
pr_crit("%s: Cannot add UDP protocol\n", __func__);
- if (inet_add_protocol(&tcp_protocol, IPPROTO_TCP) < 0)
+
+ net_hotdata.tcp_protocol = (struct net_protocol) {
+ .handler = tcp_v4_rcv,
+ .err_handler = tcp_v4_err,
+ .no_policy = 1,
+ .icmp_strict_tag_validation = 1,
+ };
+ if (inet_add_protocol(&net_hotdata.tcp_protocol, IPPROTO_TCP) < 0)
pr_crit("%s: Cannot add TCP protocol\n", __func__);
#ifdef CONFIG_IP_MULTICAST
if (inet_add_protocol(&igmp_protocol, IPPROTO_IGMP) < 0)
--
2.44.0.278.ge034bb2e1d-goog
^ permalink raw reply related [flat|nested] 24+ messages in thread* [PATCH net-next 14/18] inet: move inet_ehash_secret and udp_ehash_secret into net_hotdata
2024-03-05 16:03 [PATCH net-next 00/18] net: group together hot data Eric Dumazet
` (12 preceding siblings ...)
2024-03-05 16:04 ` [PATCH net-next 13/18] inet: move tcp_protocol and udp_protocol " Eric Dumazet
@ 2024-03-05 16:04 ` Eric Dumazet
2024-03-05 16:04 ` [PATCH net-next 15/18] ipv6: move inet6_ehash_secret and udp6_ehash_secret " Eric Dumazet
` (5 subsequent siblings)
19 siblings, 0 replies; 24+ messages in thread
From: Eric Dumazet @ 2024-03-05 16:04 UTC (permalink / raw)
To: David S . Miller, Jakub Kicinski, Paolo Abeni
Cc: netdev, David Ahern, Willem de Bruijn, Soheil Hassas Yeganeh,
Neal Cardwell, eric.dumazet, Eric Dumazet
"struct net_protocol" has a 32bit hole in 32bit arches.
Use it to store the 32bit secret used by UDP and TCP,
to increase cache locality in rx path.
Signed-off-by: Eric Dumazet <edumazet@google.com>
---
include/net/hotdata.h | 3 +++
include/net/protocol.h | 1 +
net/ipv4/inet_hashtables.c | 3 +--
net/ipv4/udp.c | 2 --
4 files changed, 5 insertions(+), 4 deletions(-)
diff --git a/include/net/hotdata.h b/include/net/hotdata.h
index 4d1cb3c29d4edfbc18cf56c370a1e04e5fcb1cbd..4dd86be99116ff83f2524461a006565b2ade2241 100644
--- a/include/net/hotdata.h
+++ b/include/net/hotdata.h
@@ -36,6 +36,9 @@ struct net_hotdata {
int dev_rx_weight;
};
+#define inet_ehash_secret net_hotdata.tcp_protocol.secret
+#define udp_ehash_secret net_hotdata.udp_protocol.secret
+
extern struct net_hotdata net_hotdata;
#endif /* _NET_HOTDATA_H */
diff --git a/include/net/protocol.h b/include/net/protocol.h
index 6aef8cb11cc8c409e5f7a2519f5e747be584c8d5..3ff26e66735cec98b08eadb1c3f129e011923cb0 100644
--- a/include/net/protocol.h
+++ b/include/net/protocol.h
@@ -46,6 +46,7 @@ struct net_protocol {
* socket lookup?
*/
icmp_strict_tag_validation:1;
+ u32 secret;
};
#if IS_ENABLED(CONFIG_IPV6)
diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c
index 308ff34002ea6b5e0620004f65ffd833087afbc1..7498af3201647fd937bf8177f04c200bea178a79 100644
--- a/net/ipv4/inet_hashtables.c
+++ b/net/ipv4/inet_hashtables.c
@@ -24,6 +24,7 @@
#include <net/inet6_hashtables.h>
#endif
#include <net/secure_seq.h>
+#include <net/hotdata.h>
#include <net/ip.h>
#include <net/tcp.h>
#include <net/sock_reuseport.h>
@@ -32,8 +33,6 @@ u32 inet_ehashfn(const struct net *net, const __be32 laddr,
const __u16 lport, const __be32 faddr,
const __be16 fport)
{
- static u32 inet_ehash_secret __read_mostly;
-
net_get_random_once(&inet_ehash_secret, sizeof(inet_ehash_secret));
return __inet_ehashfn(laddr, lport, faddr, fport,
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index a8acea17b4e5344d022ae8f8eb674d1a36f8035a..2beabf5b2d8628f1fed69a0212c57bd3cd638483 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -411,8 +411,6 @@ INDIRECT_CALLABLE_SCOPE
u32 udp_ehashfn(const struct net *net, const __be32 laddr, const __u16 lport,
const __be32 faddr, const __be16 fport)
{
- static u32 udp_ehash_secret __read_mostly;
-
net_get_random_once(&udp_ehash_secret, sizeof(udp_ehash_secret));
return __inet_ehashfn(laddr, lport, faddr, fport,
--
2.44.0.278.ge034bb2e1d-goog
^ permalink raw reply related [flat|nested] 24+ messages in thread* [PATCH net-next 15/18] ipv6: move inet6_ehash_secret and udp6_ehash_secret into net_hotdata
2024-03-05 16:03 [PATCH net-next 00/18] net: group together hot data Eric Dumazet
` (13 preceding siblings ...)
2024-03-05 16:04 ` [PATCH net-next 14/18] inet: move inet_ehash_secret and udp_ehash_secret into net_hotdata Eric Dumazet
@ 2024-03-05 16:04 ` Eric Dumazet
2024-03-05 16:04 ` [PATCH net-next 16/18] ipv6: move tcp_ipv6_hash_secret and udp_ipv6_hash_secret to net_hotdata Eric Dumazet
` (4 subsequent siblings)
19 siblings, 0 replies; 24+ messages in thread
From: Eric Dumazet @ 2024-03-05 16:04 UTC (permalink / raw)
To: David S . Miller, Jakub Kicinski, Paolo Abeni
Cc: netdev, David Ahern, Willem de Bruijn, Soheil Hassas Yeganeh,
Neal Cardwell, eric.dumazet, Eric Dumazet
"struct inet6_protocol" has a 32bit hole in 32bit arches.
Use it to store the 32bit secret used by UDP and TCP,
to increase cache locality in rx path.
Signed-off-by: Eric Dumazet <edumazet@google.com>
---
include/net/hotdata.h | 2 ++
include/net/protocol.h | 1 +
net/ipv6/inet6_hashtables.c | 2 +-
net/ipv6/udp.c | 1 -
4 files changed, 4 insertions(+), 2 deletions(-)
diff --git a/include/net/hotdata.h b/include/net/hotdata.h
index 4dd86be99116ff83f2524461a006565b2ade2241..e4dac2f859efd421b975a61360536af949046d0e 100644
--- a/include/net/hotdata.h
+++ b/include/net/hotdata.h
@@ -38,6 +38,8 @@ struct net_hotdata {
#define inet_ehash_secret net_hotdata.tcp_protocol.secret
#define udp_ehash_secret net_hotdata.udp_protocol.secret
+#define inet6_ehash_secret net_hotdata.tcpv6_protocol.secret
+#define udp6_ehash_secret net_hotdata.udpv6_protocol.secret
extern struct net_hotdata net_hotdata;
diff --git a/include/net/protocol.h b/include/net/protocol.h
index 3ff26e66735cec98b08eadb1c3f129e011923cb0..213649d2ab098edc4eb9f44a5403540887c2b8ef 100644
--- a/include/net/protocol.h
+++ b/include/net/protocol.h
@@ -60,6 +60,7 @@ struct inet6_protocol {
__be32 info);
unsigned int flags; /* INET6_PROTO_xxx */
+ u32 secret;
};
#define INET6_PROTO_NOPOLICY 0x1
diff --git a/net/ipv6/inet6_hashtables.c b/net/ipv6/inet6_hashtables.c
index b0e8d278e8a9b794d0001efdc0f43716f9a34f8f..0fee97f3166cf8326b3b714ac6bde48ca5188cec 100644
--- a/net/ipv6/inet6_hashtables.c
+++ b/net/ipv6/inet6_hashtables.c
@@ -14,6 +14,7 @@
#include <linux/random.h>
#include <net/addrconf.h>
+#include <net/hotdata.h>
#include <net/inet_connection_sock.h>
#include <net/inet_hashtables.h>
#include <net/inet6_hashtables.h>
@@ -25,7 +26,6 @@ u32 inet6_ehashfn(const struct net *net,
const struct in6_addr *laddr, const u16 lport,
const struct in6_addr *faddr, const __be16 fport)
{
- static u32 inet6_ehash_secret __read_mostly;
static u32 ipv6_hash_secret __read_mostly;
u32 lhash, fhash;
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index 97d86909aabb6588d0bba901f6df1f23a4f2e561..1e1c67a51675e8534a953a6d4d63211388d95ca9 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -79,7 +79,6 @@ u32 udp6_ehashfn(const struct net *net,
const struct in6_addr *faddr,
const __be16 fport)
{
- static u32 udp6_ehash_secret __read_mostly;
static u32 udp_ipv6_hash_secret __read_mostly;
u32 lhash, fhash;
--
2.44.0.278.ge034bb2e1d-goog
^ permalink raw reply related [flat|nested] 24+ messages in thread* [PATCH net-next 16/18] ipv6: move tcp_ipv6_hash_secret and udp_ipv6_hash_secret to net_hotdata
2024-03-05 16:03 [PATCH net-next 00/18] net: group together hot data Eric Dumazet
` (14 preceding siblings ...)
2024-03-05 16:04 ` [PATCH net-next 15/18] ipv6: move inet6_ehash_secret and udp6_ehash_secret " Eric Dumazet
@ 2024-03-05 16:04 ` Eric Dumazet
2024-03-05 16:04 ` [PATCH net-next 17/18] net: introduce include/net/rps.h Eric Dumazet
` (3 subsequent siblings)
19 siblings, 0 replies; 24+ messages in thread
From: Eric Dumazet @ 2024-03-05 16:04 UTC (permalink / raw)
To: David S . Miller, Jakub Kicinski, Paolo Abeni
Cc: netdev, David Ahern, Willem de Bruijn, Soheil Hassas Yeganeh,
Neal Cardwell, eric.dumazet, Eric Dumazet
Use a 32bit hole in "struct net_offload" to store
the remaining 32bit secrets used by TCPv6 and UDPv6.
Signed-off-by: Eric Dumazet <edumazet@google.com>
---
include/net/hotdata.h | 2 ++
include/net/protocol.h | 1 +
net/ipv6/inet6_hashtables.c | 6 ++----
net/ipv6/udp.c | 2 --
4 files changed, 5 insertions(+), 6 deletions(-)
diff --git a/include/net/hotdata.h b/include/net/hotdata.h
index e4dac2f859efd421b975a61360536af949046d0e..0a0a9106b40030f56b04c1e48083c13498ce0939 100644
--- a/include/net/hotdata.h
+++ b/include/net/hotdata.h
@@ -39,7 +39,9 @@ struct net_hotdata {
#define inet_ehash_secret net_hotdata.tcp_protocol.secret
#define udp_ehash_secret net_hotdata.udp_protocol.secret
#define inet6_ehash_secret net_hotdata.tcpv6_protocol.secret
+#define tcp_ipv6_hash_secret net_hotdata.tcpv6_offload.secret
#define udp6_ehash_secret net_hotdata.udpv6_protocol.secret
+#define udp_ipv6_hash_secret net_hotdata.udpv6_offload.secret
extern struct net_hotdata net_hotdata;
diff --git a/include/net/protocol.h b/include/net/protocol.h
index 213649d2ab098edc4eb9f44a5403540887c2b8ef..b2499f88f8f8199de28555d0cbd6b4523325205b 100644
--- a/include/net/protocol.h
+++ b/include/net/protocol.h
@@ -70,6 +70,7 @@ struct inet6_protocol {
struct net_offload {
struct offload_callbacks callbacks;
unsigned int flags; /* Flags used by IPv6 for now */
+ u32 secret;
};
/* This should be set for any extension header which is compatible with GSO. */
#define INET6_PROTO_GSO_EXTHDR 0x1
diff --git a/net/ipv6/inet6_hashtables.c b/net/ipv6/inet6_hashtables.c
index 0fee97f3166cf8326b3b714ac6bde48ca5188cec..2e81383b663b71b95719a295fd9629f1193e4225 100644
--- a/net/ipv6/inet6_hashtables.c
+++ b/net/ipv6/inet6_hashtables.c
@@ -26,15 +26,13 @@ u32 inet6_ehashfn(const struct net *net,
const struct in6_addr *laddr, const u16 lport,
const struct in6_addr *faddr, const __be16 fport)
{
- static u32 ipv6_hash_secret __read_mostly;
-
u32 lhash, fhash;
net_get_random_once(&inet6_ehash_secret, sizeof(inet6_ehash_secret));
- net_get_random_once(&ipv6_hash_secret, sizeof(ipv6_hash_secret));
+ net_get_random_once(&tcp_ipv6_hash_secret, sizeof(tcp_ipv6_hash_secret));
lhash = (__force u32)laddr->s6_addr32[3];
- fhash = __ipv6_addr_jhash(faddr, ipv6_hash_secret);
+ fhash = __ipv6_addr_jhash(faddr, tcp_ipv6_hash_secret);
return __inet6_ehashfn(lhash, lport, fhash, fport,
inet6_ehash_secret + net_hash_mix(net));
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index 1e1c67a51675e8534a953a6d4d63211388d95ca9..80ad8f436b179d7279cbbd5108c3494a9773c0d8 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -79,8 +79,6 @@ u32 udp6_ehashfn(const struct net *net,
const struct in6_addr *faddr,
const __be16 fport)
{
- static u32 udp_ipv6_hash_secret __read_mostly;
-
u32 lhash, fhash;
net_get_random_once(&udp6_ehash_secret,
--
2.44.0.278.ge034bb2e1d-goog
^ permalink raw reply related [flat|nested] 24+ messages in thread* [PATCH net-next 17/18] net: introduce include/net/rps.h
2024-03-05 16:03 [PATCH net-next 00/18] net: group together hot data Eric Dumazet
` (15 preceding siblings ...)
2024-03-05 16:04 ` [PATCH net-next 16/18] ipv6: move tcp_ipv6_hash_secret and udp_ipv6_hash_secret to net_hotdata Eric Dumazet
@ 2024-03-05 16:04 ` Eric Dumazet
2024-03-05 16:04 ` [PATCH net-next 18/18] net: move rps_sock_flow_table to net_hotdata Eric Dumazet
` (2 subsequent siblings)
19 siblings, 0 replies; 24+ messages in thread
From: Eric Dumazet @ 2024-03-05 16:04 UTC (permalink / raw)
To: David S . Miller, Jakub Kicinski, Paolo Abeni
Cc: netdev, David Ahern, Willem de Bruijn, Soheil Hassas Yeganeh,
Neal Cardwell, eric.dumazet, Eric Dumazet
Move RPS related structures and helpers from include/linux/netdevice.h
and include/net/sock.h to a new include file.
Signed-off-by: Eric Dumazet <edumazet@google.com>
---
drivers/net/ethernet/intel/ice/ice_arfs.c | 1 +
.../net/ethernet/mellanox/mlx4/en_netdev.c | 1 +
.../net/ethernet/mellanox/mlx5/core/en_arfs.c | 1 +
drivers/net/ethernet/sfc/rx_common.c | 1 +
drivers/net/ethernet/sfc/siena/rx_common.c | 1 +
drivers/net/tun.c | 1 +
include/linux/netdevice.h | 82 -----------
include/net/rps.h | 127 ++++++++++++++++++
include/net/sock.h | 35 -----
net/core/dev.c | 1 +
net/core/net-sysfs.c | 1 +
net/core/sysctl_net_core.c | 1 +
net/ipv4/af_inet.c | 1 +
net/ipv4/tcp.c | 1 +
net/ipv6/af_inet6.c | 1 +
net/sctp/socket.c | 1 +
16 files changed, 140 insertions(+), 117 deletions(-)
create mode 100644 include/net/rps.h
diff --git a/drivers/net/ethernet/intel/ice/ice_arfs.c b/drivers/net/ethernet/intel/ice/ice_arfs.c
index cca0e753f38ff92be24f8e8fc8963e2bf2416cfa..7cee365cc7d167865511c8b0dab08520814d4ec1 100644
--- a/drivers/net/ethernet/intel/ice/ice_arfs.c
+++ b/drivers/net/ethernet/intel/ice/ice_arfs.c
@@ -2,6 +2,7 @@
/* Copyright (C) 2018-2020, Intel Corporation. */
#include "ice.h"
+#include <net/rps.h>
/**
* ice_is_arfs_active - helper to check is aRFS is active
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
index d7da62cda821f70af530f95d23b63afd64423219..5d3fde63b273922f52f2466c7b339f3c9908e705 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
@@ -42,6 +42,7 @@
#include <net/ip.h>
#include <net/vxlan.h>
#include <net/devlink.h>
+#include <net/rps.h>
#include <linux/mlx4/driver.h>
#include <linux/mlx4/device.h>
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_arfs.c b/drivers/net/ethernet/mellanox/mlx5/core/en_arfs.c
index e66f486faafe1a6b0cfc75f0f11b2e957b040842..c7f542d0b8f08c635a6fad868a364a8f5f91ba8c 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_arfs.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_arfs.c
@@ -34,6 +34,7 @@
#include <linux/mlx5/fs.h>
#include <linux/ip.h>
#include <linux/ipv6.h>
+#include <net/rps.h>
#include "en.h"
#define ARFS_HASH_SHIFT BITS_PER_BYTE
diff --git a/drivers/net/ethernet/sfc/rx_common.c b/drivers/net/ethernet/sfc/rx_common.c
index fac227d372db4c832a52a62144ff9c6c89995335..dcd901eccfc8f1f9c68ee391de548a2c1602ab94 100644
--- a/drivers/net/ethernet/sfc/rx_common.c
+++ b/drivers/net/ethernet/sfc/rx_common.c
@@ -11,6 +11,7 @@
#include "net_driver.h"
#include <linux/module.h>
#include <linux/iommu.h>
+#include <net/rps.h>
#include "efx.h"
#include "nic.h"
#include "rx_common.h"
diff --git a/drivers/net/ethernet/sfc/siena/rx_common.c b/drivers/net/ethernet/sfc/siena/rx_common.c
index 4579f43484c3675963de1ad8c7753a200c680fe1..219fb358a646d399dbd3c66cc34039f70bbd7341 100644
--- a/drivers/net/ethernet/sfc/siena/rx_common.c
+++ b/drivers/net/ethernet/sfc/siena/rx_common.c
@@ -11,6 +11,7 @@
#include "net_driver.h"
#include <linux/module.h>
#include <linux/iommu.h>
+#include <net/rps.h>
#include "efx.h"
#include "nic.h"
#include "rx_common.h"
diff --git a/drivers/net/tun.c b/drivers/net/tun.c
index be37235af55d2d282b162adcd9d0e599cd5e6b3e..c1be5cec0f350d61c604d5d2776e047aa48c90b8 100644
--- a/drivers/net/tun.c
+++ b/drivers/net/tun.c
@@ -78,6 +78,7 @@
#include <net/ax25.h>
#include <net/rose.h>
#include <net/6lowpan.h>
+#include <net/rps.h>
#include <linux/uaccess.h>
#include <linux/proc_fs.h>
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index d3d4d1052ecb0bc704e9bb40fffbea85cfb6ab03..aaacf86df56ab480485b34b19fecebfd00c34c59 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -227,12 +227,6 @@ struct net_device_core_stats {
#include <linux/cache.h>
#include <linux/skbuff.h>
-#ifdef CONFIG_RPS
-#include <linux/static_key.h>
-extern struct static_key_false rps_needed;
-extern struct static_key_false rfs_needed;
-#endif
-
struct neighbour;
struct neigh_parms;
struct sk_buff;
@@ -732,86 +726,10 @@ static inline void netdev_queue_numa_node_write(struct netdev_queue *q, int node
#endif
}
-#ifdef CONFIG_RPS
-/*
- * This structure holds an RPS map which can be of variable length. The
- * map is an array of CPUs.
- */
-struct rps_map {
- unsigned int len;
- struct rcu_head rcu;
- u16 cpus[];
-};
-#define RPS_MAP_SIZE(_num) (sizeof(struct rps_map) + ((_num) * sizeof(u16)))
-
-/*
- * The rps_dev_flow structure contains the mapping of a flow to a CPU, the
- * tail pointer for that CPU's input queue at the time of last enqueue, and
- * a hardware filter index.
- */
-struct rps_dev_flow {
- u16 cpu;
- u16 filter;
- unsigned int last_qtail;
-};
-#define RPS_NO_FILTER 0xffff
-
-/*
- * The rps_dev_flow_table structure contains a table of flow mappings.
- */
-struct rps_dev_flow_table {
- unsigned int mask;
- struct rcu_head rcu;
- struct rps_dev_flow flows[];
-};
-#define RPS_DEV_FLOW_TABLE_SIZE(_num) (sizeof(struct rps_dev_flow_table) + \
- ((_num) * sizeof(struct rps_dev_flow)))
-
-/*
- * The rps_sock_flow_table contains mappings of flows to the last CPU
- * on which they were processed by the application (set in recvmsg).
- * Each entry is a 32bit value. Upper part is the high-order bits
- * of flow hash, lower part is CPU number.
- * rps_cpu_mask is used to partition the space, depending on number of
- * possible CPUs : rps_cpu_mask = roundup_pow_of_two(nr_cpu_ids) - 1
- * For example, if 64 CPUs are possible, rps_cpu_mask = 0x3f,
- * meaning we use 32-6=26 bits for the hash.
- */
-struct rps_sock_flow_table {
- u32 mask;
-
- u32 ents[] ____cacheline_aligned_in_smp;
-};
-#define RPS_SOCK_FLOW_TABLE_SIZE(_num) (offsetof(struct rps_sock_flow_table, ents[_num]))
-
-#define RPS_NO_CPU 0xffff
-
-extern u32 rps_cpu_mask;
-extern struct rps_sock_flow_table __rcu *rps_sock_flow_table;
-
-static inline void rps_record_sock_flow(struct rps_sock_flow_table *table,
- u32 hash)
-{
- if (table && hash) {
- unsigned int index = hash & table->mask;
- u32 val = hash & ~rps_cpu_mask;
-
- /* We only give a hint, preemption can change CPU under us */
- val |= raw_smp_processor_id();
-
- /* The following WRITE_ONCE() is paired with the READ_ONCE()
- * here, and another one in get_rps_cpu().
- */
- if (READ_ONCE(table->ents[index]) != val)
- WRITE_ONCE(table->ents[index], val);
- }
-}
-
#ifdef CONFIG_RFS_ACCEL
bool rps_may_expire_flow(struct net_device *dev, u16 rxq_index, u32 flow_id,
u16 filter_id);
#endif
-#endif /* CONFIG_RPS */
/* XPS map type and offset of the xps map within net_device->xps_maps[]. */
enum xps_map_type {
diff --git a/include/net/rps.h b/include/net/rps.h
new file mode 100644
index 0000000000000000000000000000000000000000..6081d817d2458b7b34036d87fbdef3fa6dc914ea
--- /dev/null
+++ b/include/net/rps.h
@@ -0,0 +1,127 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+#ifndef _NET_RPS_H
+#define _NET_RPS_H
+
+#include <linux/types.h>
+#include <linux/static_key.h>
+#include <net/sock.h>
+
+#ifdef CONFIG_RPS
+
+extern struct static_key_false rps_needed;
+extern struct static_key_false rfs_needed;
+
+/*
+ * This structure holds an RPS map which can be of variable length. The
+ * map is an array of CPUs.
+ */
+struct rps_map {
+ unsigned int len;
+ struct rcu_head rcu;
+ u16 cpus[];
+};
+#define RPS_MAP_SIZE(_num) (sizeof(struct rps_map) + ((_num) * sizeof(u16)))
+
+/*
+ * The rps_dev_flow structure contains the mapping of a flow to a CPU, the
+ * tail pointer for that CPU's input queue at the time of last enqueue, and
+ * a hardware filter index.
+ */
+struct rps_dev_flow {
+ u16 cpu;
+ u16 filter;
+ unsigned int last_qtail;
+};
+#define RPS_NO_FILTER 0xffff
+
+/*
+ * The rps_dev_flow_table structure contains a table of flow mappings.
+ */
+struct rps_dev_flow_table {
+ unsigned int mask;
+ struct rcu_head rcu;
+ struct rps_dev_flow flows[];
+};
+#define RPS_DEV_FLOW_TABLE_SIZE(_num) (sizeof(struct rps_dev_flow_table) + \
+ ((_num) * sizeof(struct rps_dev_flow)))
+
+/*
+ * The rps_sock_flow_table contains mappings of flows to the last CPU
+ * on which they were processed by the application (set in recvmsg).
+ * Each entry is a 32bit value. Upper part is the high-order bits
+ * of flow hash, lower part is CPU number.
+ * rps_cpu_mask is used to partition the space, depending on number of
+ * possible CPUs : rps_cpu_mask = roundup_pow_of_two(nr_cpu_ids) - 1
+ * For example, if 64 CPUs are possible, rps_cpu_mask = 0x3f,
+ * meaning we use 32-6=26 bits for the hash.
+ */
+struct rps_sock_flow_table {
+ u32 mask;
+
+ u32 ents[] ____cacheline_aligned_in_smp;
+};
+#define RPS_SOCK_FLOW_TABLE_SIZE(_num) (offsetof(struct rps_sock_flow_table, ents[_num]))
+
+#define RPS_NO_CPU 0xffff
+
+extern u32 rps_cpu_mask;
+extern struct rps_sock_flow_table __rcu *rps_sock_flow_table;
+
+static inline void rps_record_sock_flow(struct rps_sock_flow_table *table,
+ u32 hash)
+{
+ unsigned int index = hash & table->mask;
+ u32 val = hash & ~rps_cpu_mask;
+
+ /* We only give a hint, preemption can change CPU under us */
+ val |= raw_smp_processor_id();
+
+ /* The following WRITE_ONCE() is paired with the READ_ONCE()
+ * here, and another one in get_rps_cpu().
+ */
+ if (READ_ONCE(table->ents[index]) != val)
+ WRITE_ONCE(table->ents[index], val);
+}
+
+#endif /* CONFIG_RPS */
+
+static inline void sock_rps_record_flow_hash(__u32 hash)
+{
+#ifdef CONFIG_RPS
+ struct rps_sock_flow_table *sock_flow_table;
+
+ if (!hash)
+ return;
+ rcu_read_lock();
+ sock_flow_table = rcu_dereference(rps_sock_flow_table);
+ if (sock_flow_table)
+ rps_record_sock_flow(sock_flow_table, hash);
+ rcu_read_unlock();
+#endif
+}
+
+static inline void sock_rps_record_flow(const struct sock *sk)
+{
+#ifdef CONFIG_RPS
+ if (static_branch_unlikely(&rfs_needed)) {
+ /* Reading sk->sk_rxhash might incur an expensive cache line
+ * miss.
+ *
+ * TCP_ESTABLISHED does cover almost all states where RFS
+ * might be useful, and is cheaper [1] than testing :
+ * IPv4: inet_sk(sk)->inet_daddr
+ * IPv6: ipv6_addr_any(&sk->sk_v6_daddr)
+ * OR an additional socket flag
+ * [1] : sk_state and sk_prot are in the same cache line.
+ */
+ if (sk->sk_state == TCP_ESTABLISHED) {
+ /* This READ_ONCE() is paired with the WRITE_ONCE()
+ * from sock_rps_save_rxhash() and sock_rps_reset_rxhash().
+ */
+ sock_rps_record_flow_hash(READ_ONCE(sk->sk_rxhash));
+ }
+ }
+#endif
+}
+
+#endif /* _NET_RPS_H */
diff --git a/include/net/sock.h b/include/net/sock.h
index 09a0cde8bf52286167f3b9e9b32f632f3d1b5487..b5e00702acc1f037df7eb8ad085d00e0b18079a8 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -1117,41 +1117,6 @@ static inline void sk_incoming_cpu_update(struct sock *sk)
WRITE_ONCE(sk->sk_incoming_cpu, cpu);
}
-static inline void sock_rps_record_flow_hash(__u32 hash)
-{
-#ifdef CONFIG_RPS
- struct rps_sock_flow_table *sock_flow_table;
-
- rcu_read_lock();
- sock_flow_table = rcu_dereference(rps_sock_flow_table);
- rps_record_sock_flow(sock_flow_table, hash);
- rcu_read_unlock();
-#endif
-}
-
-static inline void sock_rps_record_flow(const struct sock *sk)
-{
-#ifdef CONFIG_RPS
- if (static_branch_unlikely(&rfs_needed)) {
- /* Reading sk->sk_rxhash might incur an expensive cache line
- * miss.
- *
- * TCP_ESTABLISHED does cover almost all states where RFS
- * might be useful, and is cheaper [1] than testing :
- * IPv4: inet_sk(sk)->inet_daddr
- * IPv6: ipv6_addr_any(&sk->sk_v6_daddr)
- * OR an additional socket flag
- * [1] : sk_state and sk_prot are in the same cache line.
- */
- if (sk->sk_state == TCP_ESTABLISHED) {
- /* This READ_ONCE() is paired with the WRITE_ONCE()
- * from sock_rps_save_rxhash() and sock_rps_reset_rxhash().
- */
- sock_rps_record_flow_hash(READ_ONCE(sk->sk_rxhash));
- }
- }
-#endif
-}
static inline void sock_rps_save_rxhash(struct sock *sk,
const struct sk_buff *skb)
diff --git a/net/core/dev.c b/net/core/dev.c
index 26676dbd6c8594371a4d30e0dd95d72342e226ca..e9f24a31ae121f713e6ef5a530a65218bbb457e8 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -155,6 +155,7 @@
#include <net/netdev_rx_queue.h>
#include <net/page_pool/types.h>
#include <net/page_pool/helpers.h>
+#include <net/rps.h>
#include "dev.h"
#include "net-sysfs.h"
diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index af238026ac3c6d81267bf0b53a7b240ee9ba32b1..5560083774b1ad4e2612637990dfa3f80f750c83 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -24,6 +24,7 @@
#include <linux/of_net.h>
#include <linux/cpu.h>
#include <net/netdev_rx_queue.h>
+#include <net/rps.h>
#include "dev.h"
#include "net-sysfs.h"
diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c
index 8a4c698dad9c97636ca9cebfad925d4220e98f2a..4b93e27404e83a5b3afaa23ebd18cf55b1fdc6e8 100644
--- a/net/core/sysctl_net_core.c
+++ b/net/core/sysctl_net_core.c
@@ -24,6 +24,7 @@
#include <net/busy_poll.h>
#include <net/pkt_sched.h>
#include <net/hotdata.h>
+#include <net/rps.h>
#include "dev.h"
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index 6f1cfd176e7b84f23d8a5e505bf8e13b2b755f06..55bd72997b31063b7baad350fdcff40e938aecb8 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -119,6 +119,7 @@
#endif
#include <net/l3mdev.h>
#include <net/compat.h>
+#include <net/rps.h>
#include <trace/events/sock.h>
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 7e1b848398d04f2da2a91c3af97b1e2e3895b8ee..c5b83875411aee6037d0afd060eb9d3e16f8b6b0 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -279,6 +279,7 @@
#include <linux/uaccess.h>
#include <asm/ioctls.h>
#include <net/busy_poll.h>
+#include <net/rps.h>
/* Track pending CMSGs. */
enum {
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index b90d46533cdcc1ffb61ca483e6f67ab358ede55c..8041dc181bd42e5e1af2d9e9fe6af50057e5b58f 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -64,6 +64,7 @@
#include <net/xfrm.h>
#include <net/ioam6.h>
#include <net/rawv6.h>
+#include <net/rps.h>
#include <linux/uaccess.h>
#include <linux/mroute6.h>
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index 6b9fcdb0952a0fe599ae5d1d1cc6fa9557a3a3bc..c67679a41044fc8e801d175b235249f2c8b99dc0 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -67,6 +67,7 @@
#include <net/sctp/sctp.h>
#include <net/sctp/sm.h>
#include <net/sctp/stream_sched.h>
+#include <net/rps.h>
/* Forward declarations for internal helper functions. */
static bool sctp_writeable(const struct sock *sk);
--
2.44.0.278.ge034bb2e1d-goog
^ permalink raw reply related [flat|nested] 24+ messages in thread* [PATCH net-next 18/18] net: move rps_sock_flow_table to net_hotdata
2024-03-05 16:03 [PATCH net-next 00/18] net: group together hot data Eric Dumazet
` (16 preceding siblings ...)
2024-03-05 16:04 ` [PATCH net-next 17/18] net: introduce include/net/rps.h Eric Dumazet
@ 2024-03-05 16:04 ` Eric Dumazet
2024-03-05 18:28 ` [PATCH net-next 00/18] net: group together hot data Soheil Hassas Yeganeh
2024-03-06 4:39 ` David Ahern
19 siblings, 0 replies; 24+ messages in thread
From: Eric Dumazet @ 2024-03-05 16:04 UTC (permalink / raw)
To: David S . Miller, Jakub Kicinski, Paolo Abeni
Cc: netdev, David Ahern, Willem de Bruijn, Soheil Hassas Yeganeh,
Neal Cardwell, eric.dumazet, Eric Dumazet
rps_sock_flow_table and rps_cpu_mask are used in fast path.
Move them to net_hotdata for better cache locality.
Signed-off-by: Eric Dumazet <edumazet@google.com>
---
include/net/hotdata.h | 4 ++++
include/net/rps.h | 8 +++-----
net/core/dev.c | 12 +++---------
net/core/sysctl_net_core.c | 9 ++++++---
4 files changed, 16 insertions(+), 17 deletions(-)
diff --git a/include/net/hotdata.h b/include/net/hotdata.h
index 0a0a9106b40030f56b04c1e48083c13498ce0939..7bb6e46aec8f19deff42112041feb47724cdd538 100644
--- a/include/net/hotdata.h
+++ b/include/net/hotdata.h
@@ -27,6 +27,10 @@ struct net_hotdata {
struct kmem_cache *skbuff_cache;
struct kmem_cache *skbuff_fclone_cache;
struct kmem_cache *skb_small_head_cache;
+#ifdef CONFIG_RPS
+ struct rps_sock_flow_table __rcu *rps_sock_flow_table;
+ u32 rps_cpu_mask;
+#endif
int gro_normal_batch;
int netdev_budget;
int netdev_budget_usecs;
diff --git a/include/net/rps.h b/include/net/rps.h
index 6081d817d2458b7b34036d87fbdef3fa6dc914ea..7660243e905b92651a41292e04caf72c5f12f26e 100644
--- a/include/net/rps.h
+++ b/include/net/rps.h
@@ -5,6 +5,7 @@
#include <linux/types.h>
#include <linux/static_key.h>
#include <net/sock.h>
+#include <net/hotdata.h>
#ifdef CONFIG_RPS
@@ -64,14 +65,11 @@ struct rps_sock_flow_table {
#define RPS_NO_CPU 0xffff
-extern u32 rps_cpu_mask;
-extern struct rps_sock_flow_table __rcu *rps_sock_flow_table;
-
static inline void rps_record_sock_flow(struct rps_sock_flow_table *table,
u32 hash)
{
unsigned int index = hash & table->mask;
- u32 val = hash & ~rps_cpu_mask;
+ u32 val = hash & ~net_hotdata.rps_cpu_mask;
/* We only give a hint, preemption can change CPU under us */
val |= raw_smp_processor_id();
@@ -93,7 +91,7 @@ static inline void sock_rps_record_flow_hash(__u32 hash)
if (!hash)
return;
rcu_read_lock();
- sock_flow_table = rcu_dereference(rps_sock_flow_table);
+ sock_flow_table = rcu_dereference(net_hotdata.rps_sock_flow_table);
if (sock_flow_table)
rps_record_sock_flow(sock_flow_table, hash);
rcu_read_unlock();
diff --git a/net/core/dev.c b/net/core/dev.c
index e9f24a31ae121f713e6ef5a530a65218bbb457e8..9f54d8e61ec0f4a4ad1824e333f89ad08c5ec431 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -4450,12 +4450,6 @@ static inline void ____napi_schedule(struct softnet_data *sd,
#ifdef CONFIG_RPS
-/* One global table that all flow-based protocols share. */
-struct rps_sock_flow_table __rcu *rps_sock_flow_table __read_mostly;
-EXPORT_SYMBOL(rps_sock_flow_table);
-u32 rps_cpu_mask __read_mostly;
-EXPORT_SYMBOL(rps_cpu_mask);
-
struct static_key_false rps_needed __read_mostly;
EXPORT_SYMBOL(rps_needed);
struct static_key_false rfs_needed __read_mostly;
@@ -4547,7 +4541,7 @@ static int get_rps_cpu(struct net_device *dev, struct sk_buff *skb,
if (!hash)
goto done;
- sock_flow_table = rcu_dereference(rps_sock_flow_table);
+ sock_flow_table = rcu_dereference(net_hotdata.rps_sock_flow_table);
if (flow_table && sock_flow_table) {
struct rps_dev_flow *rflow;
u32 next_cpu;
@@ -4557,10 +4551,10 @@ static int get_rps_cpu(struct net_device *dev, struct sk_buff *skb,
* This READ_ONCE() pairs with WRITE_ONCE() from rps_record_sock_flow().
*/
ident = READ_ONCE(sock_flow_table->ents[hash & sock_flow_table->mask]);
- if ((ident ^ hash) & ~rps_cpu_mask)
+ if ((ident ^ hash) & ~net_hotdata.rps_cpu_mask)
goto try_rps;
- next_cpu = ident & rps_cpu_mask;
+ next_cpu = ident & net_hotdata.rps_cpu_mask;
/* OK, now we know there is a match,
* we can look at the local (per receive queue) flow table
diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c
index 4b93e27404e83a5b3afaa23ebd18cf55b1fdc6e8..6973dda3abda63e0924efa4b6b7026786e8bfb4f 100644
--- a/net/core/sysctl_net_core.c
+++ b/net/core/sysctl_net_core.c
@@ -140,7 +140,8 @@ static int rps_sock_flow_sysctl(struct ctl_table *table, int write,
mutex_lock(&sock_flow_mutex);
- orig_sock_table = rcu_dereference_protected(rps_sock_flow_table,
+ orig_sock_table = rcu_dereference_protected(
+ net_hotdata.rps_sock_flow_table,
lockdep_is_held(&sock_flow_mutex));
size = orig_size = orig_sock_table ? orig_sock_table->mask + 1 : 0;
@@ -161,7 +162,8 @@ static int rps_sock_flow_sysctl(struct ctl_table *table, int write,
mutex_unlock(&sock_flow_mutex);
return -ENOMEM;
}
- rps_cpu_mask = roundup_pow_of_two(nr_cpu_ids) - 1;
+ net_hotdata.rps_cpu_mask =
+ roundup_pow_of_two(nr_cpu_ids) - 1;
sock_table->mask = size - 1;
} else
sock_table = orig_sock_table;
@@ -172,7 +174,8 @@ static int rps_sock_flow_sysctl(struct ctl_table *table, int write,
sock_table = NULL;
if (sock_table != orig_sock_table) {
- rcu_assign_pointer(rps_sock_flow_table, sock_table);
+ rcu_assign_pointer(net_hotdata.rps_sock_flow_table,
+ sock_table);
if (sock_table) {
static_branch_inc(&rps_needed);
static_branch_inc(&rfs_needed);
--
2.44.0.278.ge034bb2e1d-goog
^ permalink raw reply related [flat|nested] 24+ messages in thread* Re: [PATCH net-next 00/18] net: group together hot data
2024-03-05 16:03 [PATCH net-next 00/18] net: group together hot data Eric Dumazet
` (17 preceding siblings ...)
2024-03-05 16:04 ` [PATCH net-next 18/18] net: move rps_sock_flow_table to net_hotdata Eric Dumazet
@ 2024-03-05 18:28 ` Soheil Hassas Yeganeh
2024-03-06 4:39 ` David Ahern
19 siblings, 0 replies; 24+ messages in thread
From: Soheil Hassas Yeganeh @ 2024-03-05 18:28 UTC (permalink / raw)
To: Eric Dumazet
Cc: David S . Miller, Jakub Kicinski, Paolo Abeni, netdev,
David Ahern, Willem de Bruijn, Neal Cardwell, eric.dumazet
On Tue, Mar 5, 2024 at 11:04 AM Eric Dumazet <edumazet@google.com> wrote:
>
> While our recent structure reorganizations were focused
> on increasing max throughput, there is still an
> area where improvements are much needed.
>
> In many cases, a cpu handles one packet at a time,
> instead of a nice batch.
>
> Hardware interrupt.
> -> Software interrupt.
> -> Network/Protocol stacks.
>
> If the cpu was idle or busy in other layers,
> it has to pull many cache lines.
>
> This series adds a new net_hotdata structure, where
> some critical (and read-mostly) data used in
> rx and tx path is packed in a small number of cache lines.
>
> Synthetic benchmarks will not see much difference,
> but latency of single packet should improve.
>
> net_hodata current size on 64bit is 416 bytes,
> but might grow in the future.
>
> Also move RPS definitions to a new include file.
Acked-by: Soheil Hassas Yeganeh <soheil@google.com>
Nice series!
> Eric Dumazet (18):
> net: introduce struct net_hotdata
> net: move netdev_budget and netdev_budget to net_hotdata
> net: move netdev_tstamp_prequeue into net_hotdata
> net: move ptype_all into net_hotdata
> net: move netdev_max_backlog to net_hotdata
> net: move ip_packet_offload and ipv6_packet_offload to net_hotdata
> net: move tcpv4_offload and tcpv6_offload to net_hotdata
> net: move dev_tx_weight to net_hotdata
> net: move dev_rx_weight to net_hotdata
> net: move skbuff_cache(s) to net_hotdata
> udp: move udpv4_offload and udpv6_offload to net_hotdata
> ipv6: move tcpv6_protocol and udpv6_protocol to net_hotdata
> inet: move tcp_protocol and udp_protocol to net_hotdata
> inet: move inet_ehash_secret and udp_ehash_secret into net_hotdata
> ipv6: move inet6_ehash_secret and udp6_ehash_secret into net_hotdata
> ipv6: move tcp_ipv6_hash_secret and udp_ipv6_hash_secret to
> net_hotdata
> net: introduce include/net/rps.h
> net: move rps_sock_flow_table to net_hotdata
>
> drivers/net/ethernet/intel/ice/ice_arfs.c | 1 +
> .../net/ethernet/mellanox/mlx4/en_netdev.c | 1 +
> .../net/ethernet/mellanox/mlx5/core/en_arfs.c | 1 +
> drivers/net/ethernet/sfc/rx_common.c | 1 +
> drivers/net/ethernet/sfc/siena/rx_common.c | 1 +
> drivers/net/tun.c | 1 +
> include/linux/netdevice.h | 88 ------------
> include/linux/skbuff.h | 1 -
> include/net/gro.h | 5 +-
> include/net/hotdata.h | 52 ++++++++
> include/net/protocol.h | 3 +
> include/net/rps.h | 125 ++++++++++++++++++
> include/net/sock.h | 35 -----
> kernel/bpf/cpumap.c | 4 +-
> net/bpf/test_run.c | 4 +-
> net/core/Makefile | 1 +
> net/core/dev.c | 58 +++-----
> net/core/dev.h | 3 -
> net/core/gro.c | 15 +--
> net/core/gro_cells.c | 3 +-
> net/core/gso.c | 4 +-
> net/core/hotdata.c | 22 +++
> net/core/net-procfs.c | 7 +-
> net/core/net-sysfs.c | 1 +
> net/core/skbuff.c | 44 +++---
> net/core/sysctl_net_core.c | 25 ++--
> net/core/xdp.c | 5 +-
> net/ipv4/af_inet.c | 49 +++----
> net/ipv4/inet_hashtables.c | 3 +-
> net/ipv4/tcp.c | 1 +
> net/ipv4/tcp_offload.c | 17 ++-
> net/ipv4/udp.c | 2 -
> net/ipv4/udp_offload.c | 17 ++-
> net/ipv6/af_inet6.c | 1 +
> net/ipv6/inet6_hashtables.c | 8 +-
> net/ipv6/ip6_offload.c | 18 +--
> net/ipv6/tcp_ipv6.c | 17 +--
> net/ipv6/tcpv6_offload.c | 16 +--
> net/ipv6/udp.c | 19 ++-
> net/ipv6/udp_offload.c | 21 ++-
> net/sched/sch_generic.c | 3 +-
> net/sctp/socket.c | 1 +
> net/xfrm/espintcp.c | 4 +-
> net/xfrm/xfrm_input.c | 3 +-
> 44 files changed, 391 insertions(+), 320 deletions(-)
> create mode 100644 include/net/hotdata.h
> create mode 100644 include/net/rps.h
> create mode 100644 net/core/hotdata.c
>
> --
> 2.44.0.278.ge034bb2e1d-goog
>
^ permalink raw reply [flat|nested] 24+ messages in thread* Re: [PATCH net-next 00/18] net: group together hot data
2024-03-05 16:03 [PATCH net-next 00/18] net: group together hot data Eric Dumazet
` (18 preceding siblings ...)
2024-03-05 18:28 ` [PATCH net-next 00/18] net: group together hot data Soheil Hassas Yeganeh
@ 2024-03-06 4:39 ` David Ahern
19 siblings, 0 replies; 24+ messages in thread
From: David Ahern @ 2024-03-06 4:39 UTC (permalink / raw)
To: Eric Dumazet, David S . Miller, Jakub Kicinski, Paolo Abeni
Cc: netdev, Willem de Bruijn, Soheil Hassas Yeganeh, Neal Cardwell,
eric.dumazet
On 3/5/24 9:03 AM, Eric Dumazet wrote:
> While our recent structure reorganizations were focused
> on increasing max throughput, there is still an
> area where improvements are much needed.
>
> In many cases, a cpu handles one packet at a time,
> instead of a nice batch.
>
> Hardware interrupt.
> -> Software interrupt.
> -> Network/Protocol stacks.
>
> If the cpu was idle or busy in other layers,
> it has to pull many cache lines.
>
> This series adds a new net_hotdata structure, where
> some critical (and read-mostly) data used in
> rx and tx path is packed in a small number of cache lines.
>
> Synthetic benchmarks will not see much difference,
> but latency of single packet should improve.
>
> net_hodata current size on 64bit is 416 bytes,
> but might grow in the future.
>
> Also move RPS definitions to a new include file.
>
Interesting patch set. For the set:
Reviewed-by: David Ahern <dsahern@kernel.org>
^ permalink raw reply [flat|nested] 24+ messages in thread