All of lore.kernel.org
 help / color / mirror / Atom feed
From: Thomas Gleixner <tglx@linutronix.de>
To: Eric Dumazet <edumazet@google.com>
Cc: LKML <linux-kernel@vger.kernel.org>,
	Linus Torvalds <torvalds@linuxfoundation.org>,
	x86@kernel.org, Wangyang Guo <wangyang.guo@intel.com>,
	Arjan van De Ven <arjan@linux.intel.com>,
	"David S. Miller" <davem@davemloft.net>,
	Jakub Kicinski <kuba@kernel.org>, Paolo Abeni <pabeni@redhat.com>,
	netdev@vger.kernel.org, Will Deacon <will@kernel.org>,
	Peter Zijlstra <peterz@infradead.org>,
	Boqun Feng <boqun.feng@gmail.com>,
	Mark Rutland <mark.rutland@arm.com>,
	Marc Zyngier <maz@kernel.org>
Subject: Re: [patch 1/3] net: dst: Prevent false sharing vs. dst_entry::__refcnt
Date: Thu, 02 Mar 2023 00:12:23 +0100	[thread overview]
Message-ID: <87edq8kqw8.ffs@tglx> (raw)
In-Reply-To: <875yblmq83.ffs@tglx>

On Tue, Feb 28 2023 at 22:31, Thomas Gleixner wrote:
> On Tue, Feb 28 2023 at 16:17, Eric Dumazet wrote:
>> Instead of mere pads, add some unions, and let rt6i_uncached/rt6i_uncached_list
>> use them.
>
> If I understand correctly, you suggest to move
>
>    rt6_info::rt6i_uncached[_list], rtable::rt_uncached[_list]
>    
> into struct dst_entry and fixup the usage sites, right?
>
> I don't see why that would need a union. dst_entry::rt_uncached[_list]
> would work for both, no?

So I came up with the below.

rt6_info shrinks from 232 to 224 bytes. rtable size is unchanged

Thanks,

        tglx
---

--- a/include/net/dst.h
+++ b/include/net/dst.h
@@ -69,15 +69,28 @@ struct dst_entry {
 #endif
 	int			__use;
 	unsigned long		lastuse;
-	struct lwtunnel_state   *lwtstate;
 	struct rcu_head		rcu_head;
 	short			error;
 	short			__pad;
 	__u32			tclassid;
 #ifndef CONFIG_64BIT
+	struct lwtunnel_state   *lwtstate;
 	atomic_t		__refcnt;	/* 32-bit offset 64 */
 #endif
 	netdevice_tracker	dev_tracker;
+
+	/*
+	 * Used by rtable and rt6_info. Moves lwtstate into the next cache
+	 * line on 64bit so that lwtstate does not cause false sharing with
+	 * __refcnt under contention of __refcnt. This also puts the
+	 * frequently accessed members of rtable and rt6_info out of the
+	 * __refcnt cache line.
+	 */
+	struct list_head	rt_uncached;
+	struct uncached_list	*rt_uncached_list;
+#ifdef CONFIG_64BIT
+	struct lwtunnel_state   *lwtstate;
+#endif
 };
 
 struct dst_metrics {
--- a/include/net/ip6_fib.h
+++ b/include/net/ip6_fib.h
@@ -217,9 +217,6 @@ struct rt6_info {
 	struct inet6_dev		*rt6i_idev;
 	u32				rt6i_flags;
 
-	struct list_head		rt6i_uncached;
-	struct uncached_list		*rt6i_uncached_list;
-
 	/* more non-fragment space at head required */
 	unsigned short			rt6i_nfheader_len;
 };
--- a/include/net/ip6_route.h
+++ b/include/net/ip6_route.h
@@ -104,7 +104,7 @@ static inline struct dst_entry *ip6_rout
 static inline void ip6_rt_put_flags(struct rt6_info *rt, int flags)
 {
 	if (!(flags & RT6_LOOKUP_F_DST_NOREF) ||
-	    !list_empty(&rt->rt6i_uncached))
+	    !list_empty(&rt->dst.rt_uncached))
 		ip6_rt_put(rt);
 }
 
--- a/include/net/route.h
+++ b/include/net/route.h
@@ -81,9 +81,6 @@ struct rtable {
 	/* Miscellaneous cached information */
 	u32			rt_mtu_locked:1,
 				rt_pmtu:31;
-
-	struct list_head	rt_uncached;
-	struct uncached_list	*rt_uncached_list;
 };
 
 static inline bool rt_is_input_route(const struct rtable *rt)
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -1508,20 +1508,20 @@ void rt_add_uncached_list(struct rtable
 {
 	struct uncached_list *ul = raw_cpu_ptr(&rt_uncached_list);
 
-	rt->rt_uncached_list = ul;
+	rt->dst.rt_uncached_list = ul;
 
 	spin_lock_bh(&ul->lock);
-	list_add_tail(&rt->rt_uncached, &ul->head);
+	list_add_tail(&rt->dst.rt_uncached, &ul->head);
 	spin_unlock_bh(&ul->lock);
 }
 
 void rt_del_uncached_list(struct rtable *rt)
 {
-	if (!list_empty(&rt->rt_uncached)) {
-		struct uncached_list *ul = rt->rt_uncached_list;
+	if (!list_empty(&rt->dst.rt_uncached)) {
+		struct uncached_list *ul = rt->dst.rt_uncached_list;
 
 		spin_lock_bh(&ul->lock);
-		list_del_init(&rt->rt_uncached);
+		list_del_init(&rt->dst.rt_uncached);
 		spin_unlock_bh(&ul->lock);
 	}
 }
@@ -1546,13 +1546,13 @@ void rt_flush_dev(struct net_device *dev
 			continue;
 
 		spin_lock_bh(&ul->lock);
-		list_for_each_entry_safe(rt, safe, &ul->head, rt_uncached) {
+		list_for_each_entry_safe(rt, safe, &ul->head, dst.rt_uncached) {
 			if (rt->dst.dev != dev)
 				continue;
 			rt->dst.dev = blackhole_netdev;
 			netdev_ref_replace(dev, blackhole_netdev,
 					   &rt->dst.dev_tracker, GFP_ATOMIC);
-			list_move(&rt->rt_uncached, &ul->quarantine);
+			list_move(&rt->dst.rt_uncached, &ul->quarantine);
 		}
 		spin_unlock_bh(&ul->lock);
 	}
@@ -1644,7 +1644,7 @@ struct rtable *rt_dst_alloc(struct net_d
 		rt->rt_uses_gateway = 0;
 		rt->rt_gw_family = 0;
 		rt->rt_gw4 = 0;
-		INIT_LIST_HEAD(&rt->rt_uncached);
+		INIT_LIST_HEAD(&rt->dst.rt_uncached);
 
 		rt->dst.output = ip_output;
 		if (flags & RTCF_LOCAL)
@@ -1675,7 +1675,7 @@ struct rtable *rt_dst_clone(struct net_d
 			new_rt->rt_gw4 = rt->rt_gw4;
 		else if (rt->rt_gw_family == AF_INET6)
 			new_rt->rt_gw6 = rt->rt_gw6;
-		INIT_LIST_HEAD(&new_rt->rt_uncached);
+		INIT_LIST_HEAD(&new_rt->dst.rt_uncached);
 
 		new_rt->dst.input = rt->dst.input;
 		new_rt->dst.output = rt->dst.output;
@@ -2859,7 +2859,7 @@ struct dst_entry *ipv4_blackhole_route(s
 		else if (rt->rt_gw_family == AF_INET6)
 			rt->rt_gw6 = ort->rt_gw6;
 
-		INIT_LIST_HEAD(&rt->rt_uncached);
+		INIT_LIST_HEAD(&rt->dst.rt_uncached);
 	}
 
 	dst_release(dst_orig);
--- a/net/ipv4/xfrm4_policy.c
+++ b/net/ipv4/xfrm4_policy.c
@@ -91,7 +91,7 @@ static int xfrm4_fill_dst(struct xfrm_ds
 		xdst->u.rt.rt_gw6 = rt->rt_gw6;
 	xdst->u.rt.rt_pmtu = rt->rt_pmtu;
 	xdst->u.rt.rt_mtu_locked = rt->rt_mtu_locked;
-	INIT_LIST_HEAD(&xdst->u.rt.rt_uncached);
+	INIT_LIST_HEAD(&xdst->u.rt.dst.rt_uncached);
 	rt_add_uncached_list(&xdst->u.rt);
 
 	return 0;
@@ -121,7 +121,7 @@ static void xfrm4_dst_destroy(struct dst
 	struct xfrm_dst *xdst = (struct xfrm_dst *)dst;
 
 	dst_destroy_metrics_generic(dst);
-	if (xdst->u.rt.rt_uncached_list)
+	if (xdst->u.rt.dst.rt_uncached_list)
 		rt_del_uncached_list(&xdst->u.rt);
 	xfrm_dst_destroy(xdst);
 }
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -139,20 +139,20 @@ void rt6_uncached_list_add(struct rt6_in
 {
 	struct uncached_list *ul = raw_cpu_ptr(&rt6_uncached_list);
 
-	rt->rt6i_uncached_list = ul;
+	rt->dst.rt_uncached_list = ul;
 
 	spin_lock_bh(&ul->lock);
-	list_add_tail(&rt->rt6i_uncached, &ul->head);
+	list_add_tail(&rt->dst.rt_uncached, &ul->head);
 	spin_unlock_bh(&ul->lock);
 }
 
 void rt6_uncached_list_del(struct rt6_info *rt)
 {
-	if (!list_empty(&rt->rt6i_uncached)) {
-		struct uncached_list *ul = rt->rt6i_uncached_list;
+	if (!list_empty(&rt->dst.rt_uncached)) {
+		struct uncached_list *ul = rt->dst.rt_uncached_list;
 
 		spin_lock_bh(&ul->lock);
-		list_del_init(&rt->rt6i_uncached);
+		list_del_init(&rt->dst.rt_uncached);
 		spin_unlock_bh(&ul->lock);
 	}
 }
@@ -169,7 +169,7 @@ static void rt6_uncached_list_flush_dev(
 			continue;
 
 		spin_lock_bh(&ul->lock);
-		list_for_each_entry_safe(rt, safe, &ul->head, rt6i_uncached) {
+		list_for_each_entry_safe(rt, safe, &ul->head, dst.rt_uncached) {
 			struct inet6_dev *rt_idev = rt->rt6i_idev;
 			struct net_device *rt_dev = rt->dst.dev;
 			bool handled = false;
@@ -188,7 +188,7 @@ static void rt6_uncached_list_flush_dev(
 				handled = true;
 			}
 			if (handled)
-				list_move(&rt->rt6i_uncached,
+				list_move(&rt->dst.rt_uncached,
 					  &ul->quarantine);
 		}
 		spin_unlock_bh(&ul->lock);
@@ -334,7 +334,7 @@ static const struct rt6_info ip6_blk_hol
 static void rt6_info_init(struct rt6_info *rt)
 {
 	memset_after(rt, 0, dst);
-	INIT_LIST_HEAD(&rt->rt6i_uncached);
+	INIT_LIST_HEAD(&rt->dst.rt_uncached);
 }
 
 /* allocate dst with ip6_dst_ops */
@@ -2638,7 +2638,7 @@ struct dst_entry *ip6_route_output_flags
 	dst = ip6_route_output_flags_noref(net, sk, fl6, flags);
 	rt6 = (struct rt6_info *)dst;
 	/* For dst cached in uncached_list, refcnt is already taken. */
-	if (list_empty(&rt6->rt6i_uncached) && !dst_hold_safe(dst)) {
+	if (list_empty(&rt6->dst.rt_uncached) && !dst_hold_safe(dst)) {
 		dst = &net->ipv6.ip6_null_entry->dst;
 		dst_hold(dst);
 	}
@@ -2748,7 +2748,7 @@ INDIRECT_CALLABLE_SCOPE struct dst_entry
 	from = rcu_dereference(rt->from);
 
 	if (from && (rt->rt6i_flags & RTF_PCPU ||
-	    unlikely(!list_empty(&rt->rt6i_uncached))))
+	    unlikely(!list_empty(&rt->dst.rt_uncached))))
 		dst_ret = rt6_dst_from_check(rt, from, cookie);
 	else
 		dst_ret = rt6_check(rt, from, cookie);
@@ -6483,7 +6483,7 @@ static int __net_init ip6_route_net_init
 	net->ipv6.ip6_null_entry->dst.ops = &net->ipv6.ip6_dst_ops;
 	dst_init_metrics(&net->ipv6.ip6_null_entry->dst,
 			 ip6_template_metrics, true);
-	INIT_LIST_HEAD(&net->ipv6.ip6_null_entry->rt6i_uncached);
+	INIT_LIST_HEAD(&net->ipv6.ip6_null_entry->dst.rt_uncached);
 
 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
 	net->ipv6.fib6_has_custom_rules = false;
@@ -6495,7 +6495,7 @@ static int __net_init ip6_route_net_init
 	net->ipv6.ip6_prohibit_entry->dst.ops = &net->ipv6.ip6_dst_ops;
 	dst_init_metrics(&net->ipv6.ip6_prohibit_entry->dst,
 			 ip6_template_metrics, true);
-	INIT_LIST_HEAD(&net->ipv6.ip6_prohibit_entry->rt6i_uncached);
+	INIT_LIST_HEAD(&net->ipv6.ip6_prohibit_entry->dst.rt_uncached);
 
 	net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template,
 					       sizeof(*net->ipv6.ip6_blk_hole_entry),
@@ -6505,7 +6505,7 @@ static int __net_init ip6_route_net_init
 	net->ipv6.ip6_blk_hole_entry->dst.ops = &net->ipv6.ip6_dst_ops;
 	dst_init_metrics(&net->ipv6.ip6_blk_hole_entry->dst,
 			 ip6_template_metrics, true);
-	INIT_LIST_HEAD(&net->ipv6.ip6_blk_hole_entry->rt6i_uncached);
+	INIT_LIST_HEAD(&net->ipv6.ip6_blk_hole_entry->dst.rt_uncached);
 #ifdef CONFIG_IPV6_SUBTREES
 	net->ipv6.fib6_routes_require_src = 0;
 #endif
--- a/net/ipv6/xfrm6_policy.c
+++ b/net/ipv6/xfrm6_policy.c
@@ -89,7 +89,7 @@ static int xfrm6_fill_dst(struct xfrm_ds
 	xdst->u.rt6.rt6i_gateway = rt->rt6i_gateway;
 	xdst->u.rt6.rt6i_dst = rt->rt6i_dst;
 	xdst->u.rt6.rt6i_src = rt->rt6i_src;
-	INIT_LIST_HEAD(&xdst->u.rt6.rt6i_uncached);
+	INIT_LIST_HEAD(&xdst->u.rt6.dst.rt_uncached);
 	rt6_uncached_list_add(&xdst->u.rt6);
 
 	return 0;
@@ -121,7 +121,7 @@ static void xfrm6_dst_destroy(struct dst
 	if (likely(xdst->u.rt6.rt6i_idev))
 		in6_dev_put(xdst->u.rt6.rt6i_idev);
 	dst_destroy_metrics_generic(dst);
-	if (xdst->u.rt6.rt6i_uncached_list)
+	if (xdst->u.rt6.dst.rt_uncached_list)
 		rt6_uncached_list_del(&xdst->u.rt6);
 	xfrm_dst_destroy(xdst);
 }

  reply	other threads:[~2023-03-01 23:12 UTC|newest]

Thread overview: 19+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2023-02-28 14:33 [patch 0/3] net, refcount: Address dst_entry reference count scalability issues Thomas Gleixner
2023-02-28 14:33 ` [patch 1/3] net: dst: Prevent false sharing vs. dst_entry::__refcnt Thomas Gleixner
2023-02-28 15:17   ` Eric Dumazet
2023-02-28 21:31     ` Thomas Gleixner
2023-03-01 23:12       ` Thomas Gleixner [this message]
2023-02-28 14:33 ` [patch 2/3] atomics: Provide rcuref - scalable reference counting Thomas Gleixner
2023-03-01  0:42   ` Linus Torvalds
2023-03-01  1:07     ` Linus Torvalds
2023-03-01 11:09     ` Thomas Gleixner
2023-03-02  1:05       ` Thomas Gleixner
2023-03-02  1:29         ` Randy Dunlap
2023-03-02 19:36         ` Linus Torvalds
2023-02-28 14:33 ` [patch 3/3] net: dst: Switch to rcuref_t " Thomas Gleixner
2023-02-28 15:07 ` [patch 0/3] net, refcount: Address dst_entry reference count scalability issues Eric Dumazet
2023-02-28 16:38   ` Thomas Gleixner
2023-02-28 16:59     ` Eric Dumazet
2023-03-01  1:00       ` Thomas Gleixner
2023-03-01  3:17         ` Jakub Kicinski
2023-03-01 10:40           ` Thomas Gleixner

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=87edq8kqw8.ffs@tglx \
    --to=tglx@linutronix.de \
    --cc=arjan@linux.intel.com \
    --cc=boqun.feng@gmail.com \
    --cc=davem@davemloft.net \
    --cc=edumazet@google.com \
    --cc=kuba@kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=mark.rutland@arm.com \
    --cc=maz@kernel.org \
    --cc=netdev@vger.kernel.org \
    --cc=pabeni@redhat.com \
    --cc=peterz@infradead.org \
    --cc=torvalds@linuxfoundation.org \
    --cc=wangyang.guo@intel.com \
    --cc=will@kernel.org \
    --cc=x86@kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.