netdev.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH net-next 0/4] inetpeer: reduce false sharing and atomic operations
@ 2024-12-13 13:02 Eric Dumazet
  2024-12-13 13:02 ` [PATCH net-next 1/4] inetpeer: remove create argument of inet_getpeer_v[46]() Eric Dumazet
                   ` (3 more replies)
  0 siblings, 4 replies; 11+ messages in thread
From: Eric Dumazet @ 2024-12-13 13:02 UTC (permalink / raw)
  To: David S . Miller, Jakub Kicinski, Paolo Abeni
  Cc: netdev, Simon Horman, David Ahern, Kuniyuki Iwashima,
	eric.dumazet, Eric Dumazet

After commit 8c2bd38b95f7 ("icmp: change the order of rate limits"),
there is a risk that a host receiving packets from an unique
source targeting closed ports is using a common inet_peer structure
from many cpus.

All these cpus have to acquire/release a refcount and update
the inet_peer timestamp (p->dtime)

Switch to pure RCU to avoid changing the refcount, and update
p->dtime only once per jiffy.

Tested:
  DUT : 128 cores, 32 hw rx queues.
  receiving 8,400,000 UDP packets per second, targeting closed ports.

Before the series:
- napi poll can not keep up, NIC drops 1,200,000 packets
  per second. 
- We use 20 % of cpu cycles

After this series:
- All packets are received (no more hw drops)
- We use 12 % of cpu cycles.

Eric Dumazet (4):
  inetpeer: remove create argument of inet_getpeer_v[46]()
  inetpeer: remove create argument of inet_getpeer()
  inetpeer: update inetpeer timestamp in inet_getpeer()
  inetpeer: do not get a refcount in inet_getpeer()

 include/net/inetpeer.h | 12 +++++-------
 net/ipv4/icmp.c        |  6 +++---
 net/ipv4/inetpeer.c    | 29 ++++++++---------------------
 net/ipv4/ip_fragment.c | 15 ++++++++++-----
 net/ipv4/route.c       | 17 +++++++++--------
 net/ipv6/icmp.c        |  6 +++---
 net/ipv6/ip6_output.c  |  6 +++---
 net/ipv6/ndisc.c       |  8 +++++---
 8 files changed, 46 insertions(+), 53 deletions(-)

-- 
2.47.1.613.gc27f4b7a9f-goog


^ permalink raw reply	[flat|nested] 11+ messages in thread

* [PATCH net-next 1/4] inetpeer: remove create argument of inet_getpeer_v[46]()
  2024-12-13 13:02 [PATCH net-next 0/4] inetpeer: reduce false sharing and atomic operations Eric Dumazet
@ 2024-12-13 13:02 ` Eric Dumazet
  2024-12-13 13:02 ` [PATCH net-next 2/4] inetpeer: remove create argument of inet_getpeer() Eric Dumazet
                   ` (2 subsequent siblings)
  3 siblings, 0 replies; 11+ messages in thread
From: Eric Dumazet @ 2024-12-13 13:02 UTC (permalink / raw)
  To: David S . Miller, Jakub Kicinski, Paolo Abeni
  Cc: netdev, Simon Horman, David Ahern, Kuniyuki Iwashima,
	eric.dumazet, Eric Dumazet

All callers of inet_getpeer_v4() and inet_getpeer_v6()
want to create an inetpeer.

Signed-off-by: Eric Dumazet <edumazet@google.com>
---
 include/net/inetpeer.h | 9 ++++-----
 net/ipv4/icmp.c        | 2 +-
 net/ipv4/ip_fragment.c | 2 +-
 net/ipv4/route.c       | 4 ++--
 net/ipv6/icmp.c        | 2 +-
 net/ipv6/ip6_output.c  | 2 +-
 net/ipv6/ndisc.c       | 2 +-
 7 files changed, 11 insertions(+), 12 deletions(-)

diff --git a/include/net/inetpeer.h b/include/net/inetpeer.h
index 74ff688568a0c6559946a9ae763d5c9822f1d112..6f51f81d6cb19c623e9b347dbdbbd8d849848f6e 100644
--- a/include/net/inetpeer.h
+++ b/include/net/inetpeer.h
@@ -101,25 +101,24 @@ struct inet_peer *inet_getpeer(struct inet_peer_base *base,
 
 static inline struct inet_peer *inet_getpeer_v4(struct inet_peer_base *base,
 						__be32 v4daddr,
-						int vif, int create)
+						int vif)
 {
 	struct inetpeer_addr daddr;
 
 	daddr.a4.addr = v4daddr;
 	daddr.a4.vif = vif;
 	daddr.family = AF_INET;
-	return inet_getpeer(base, &daddr, create);
+	return inet_getpeer(base, &daddr, 1);
 }
 
 static inline struct inet_peer *inet_getpeer_v6(struct inet_peer_base *base,
-						const struct in6_addr *v6daddr,
-						int create)
+						const struct in6_addr *v6daddr)
 {
 	struct inetpeer_addr daddr;
 
 	daddr.a6 = *v6daddr;
 	daddr.family = AF_INET6;
-	return inet_getpeer(base, &daddr, create);
+	return inet_getpeer(base, &daddr, 1);
 }
 
 static inline int inetpeer_addr_cmp(const struct inetpeer_addr *a,
diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
index 963a89ae9c26e8b462de57e4af981c6c46061052..5eeb9f569a706cf2766d74bcf1a667c8930804f2 100644
--- a/net/ipv4/icmp.c
+++ b/net/ipv4/icmp.c
@@ -322,7 +322,7 @@ static bool icmpv4_xrlim_allow(struct net *net, struct rtable *rt,
 		goto out;
 
 	vif = l3mdev_master_ifindex(dst->dev);
-	peer = inet_getpeer_v4(net->ipv4.peers, fl4->daddr, vif, 1);
+	peer = inet_getpeer_v4(net->ipv4.peers, fl4->daddr, vif);
 	rc = inet_peer_xrlim_allow(peer,
 				   READ_ONCE(net->ipv4.sysctl_icmp_ratelimit));
 	if (peer)
diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c
index 07036a2943c19f13f2d6d1d77cb8123867575b50..46e1171299f22ccf0b201eabbff5d3279a0703d8 100644
--- a/net/ipv4/ip_fragment.c
+++ b/net/ipv4/ip_fragment.c
@@ -89,7 +89,7 @@ static void ip4_frag_init(struct inet_frag_queue *q, const void *a)
 	q->key.v4 = *key;
 	qp->ecn = 0;
 	qp->peer = q->fqdir->max_dist ?
-		inet_getpeer_v4(net->ipv4.peers, key->saddr, key->vif, 1) :
+		inet_getpeer_v4(net->ipv4.peers, key->saddr, key->vif) :
 		NULL;
 }
 
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 0fbec350961862f76b7eab332539472fed5a5286..297a9939c6e74beffc592dbdd7266281fe842440 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -873,7 +873,7 @@ void ip_rt_send_redirect(struct sk_buff *skb)
 	rcu_read_unlock();
 
 	net = dev_net(rt->dst.dev);
-	peer = inet_getpeer_v4(net->ipv4.peers, ip_hdr(skb)->saddr, vif, 1);
+	peer = inet_getpeer_v4(net->ipv4.peers, ip_hdr(skb)->saddr, vif);
 	if (!peer) {
 		icmp_send(skb, ICMP_REDIRECT, ICMP_REDIR_HOST,
 			  rt_nexthop(rt, ip_hdr(skb)->daddr));
@@ -976,7 +976,7 @@ static int ip_error(struct sk_buff *skb)
 	}
 
 	peer = inet_getpeer_v4(net->ipv4.peers, ip_hdr(skb)->saddr,
-			       l3mdev_master_ifindex(skb->dev), 1);
+			       l3mdev_master_ifindex(skb->dev));
 
 	send = true;
 	if (peer) {
diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c
index 071b0bc1179d81b18c340ce415cef21e02a30cd7..4593e3992c67b84e3a10f30be28762974094d21f 100644
--- a/net/ipv6/icmp.c
+++ b/net/ipv6/icmp.c
@@ -222,7 +222,7 @@ static bool icmpv6_xrlim_allow(struct sock *sk, u8 type,
 		if (rt->rt6i_dst.plen < 128)
 			tmo >>= ((128 - rt->rt6i_dst.plen)>>5);
 
-		peer = inet_getpeer_v6(net->ipv6.peers, &fl6->daddr, 1);
+		peer = inet_getpeer_v6(net->ipv6.peers, &fl6->daddr);
 		res = inet_peer_xrlim_allow(peer, tmo);
 		if (peer)
 			inet_putpeer(peer);
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index 3d672dea9f56284e7a8ebabec037e04e7f3d19f4..2cbcfe70654b5cd90c433a24c47ef5496c604d0d 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -613,7 +613,7 @@ int ip6_forward(struct sk_buff *skb)
 		else
 			target = &hdr->daddr;
 
-		peer = inet_getpeer_v6(net->ipv6.peers, &hdr->daddr, 1);
+		peer = inet_getpeer_v6(net->ipv6.peers, &hdr->daddr);
 
 		/* Limit redirects both by destination (here)
 		   and by source (inside ndisc_send_redirect)
diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c
index aba94a34867379000e958538d880799c2d0c1476..f113554d13325453cd04ce4e5686d837943e96ff 100644
--- a/net/ipv6/ndisc.c
+++ b/net/ipv6/ndisc.c
@@ -1731,7 +1731,7 @@ void ndisc_send_redirect(struct sk_buff *skb, const struct in6_addr *target)
 			  "Redirect: destination is not a neighbour\n");
 		goto release;
 	}
-	peer = inet_getpeer_v6(net->ipv6.peers, &ipv6_hdr(skb)->saddr, 1);
+	peer = inet_getpeer_v6(net->ipv6.peers, &ipv6_hdr(skb)->saddr);
 	ret = inet_peer_xrlim_allow(peer, 1*HZ);
 	if (peer)
 		inet_putpeer(peer);
-- 
2.47.1.613.gc27f4b7a9f-goog


^ permalink raw reply related	[flat|nested] 11+ messages in thread

* [PATCH net-next 2/4] inetpeer: remove create argument of inet_getpeer()
  2024-12-13 13:02 [PATCH net-next 0/4] inetpeer: reduce false sharing and atomic operations Eric Dumazet
  2024-12-13 13:02 ` [PATCH net-next 1/4] inetpeer: remove create argument of inet_getpeer_v[46]() Eric Dumazet
@ 2024-12-13 13:02 ` Eric Dumazet
  2024-12-13 20:16   ` Simon Horman
  2024-12-14 14:34   ` kernel test robot
  2024-12-13 13:02 ` [PATCH net-next 3/4] inetpeer: update inetpeer timestamp in inet_getpeer() Eric Dumazet
  2024-12-13 13:02 ` [PATCH net-next 4/4] inetpeer: do not get a refcount " Eric Dumazet
  3 siblings, 2 replies; 11+ messages in thread
From: Eric Dumazet @ 2024-12-13 13:02 UTC (permalink / raw)
  To: David S . Miller, Jakub Kicinski, Paolo Abeni
  Cc: netdev, Simon Horman, David Ahern, Kuniyuki Iwashima,
	eric.dumazet, Eric Dumazet

All callers of inet_getpeer() want to create an inetpeer.

Signed-off-by: Eric Dumazet <edumazet@google.com>
---
 include/net/inetpeer.h | 7 +++----
 net/ipv4/inetpeer.c    | 9 ++-------
 2 files changed, 5 insertions(+), 11 deletions(-)

diff --git a/include/net/inetpeer.h b/include/net/inetpeer.h
index 6f51f81d6cb19c623e9b347dbdbbd8d849848f6e..f475757daafba998a10c815d0178c98d2bf1ae43 100644
--- a/include/net/inetpeer.h
+++ b/include/net/inetpeer.h
@@ -96,8 +96,7 @@ static inline struct in6_addr *inetpeer_get_addr_v6(struct inetpeer_addr *iaddr)
 
 /* can be called with or without local BH being disabled */
 struct inet_peer *inet_getpeer(struct inet_peer_base *base,
-			       const struct inetpeer_addr *daddr,
-			       int create);
+			       const struct inetpeer_addr *daddr);
 
 static inline struct inet_peer *inet_getpeer_v4(struct inet_peer_base *base,
 						__be32 v4daddr,
@@ -108,7 +107,7 @@ static inline struct inet_peer *inet_getpeer_v4(struct inet_peer_base *base,
 	daddr.a4.addr = v4daddr;
 	daddr.a4.vif = vif;
 	daddr.family = AF_INET;
-	return inet_getpeer(base, &daddr, 1);
+	return inet_getpeer(base, &daddr);
 }
 
 static inline struct inet_peer *inet_getpeer_v6(struct inet_peer_base *base,
@@ -118,7 +117,7 @@ static inline struct inet_peer *inet_getpeer_v6(struct inet_peer_base *base,
 
 	daddr.a6 = *v6daddr;
 	daddr.family = AF_INET6;
-	return inet_getpeer(base, &daddr, 1);
+	return inet_getpeer(base, &daddr);
 }
 
 static inline int inetpeer_addr_cmp(const struct inetpeer_addr *a,
diff --git a/net/ipv4/inetpeer.c b/net/ipv4/inetpeer.c
index 5ab56f4cb529769d4edb07261c08d61ff96f0c0f..58d2805b046d00cd509e2d2343abfb8eacfbdde7 100644
--- a/net/ipv4/inetpeer.c
+++ b/net/ipv4/inetpeer.c
@@ -169,8 +169,7 @@ static void inet_peer_gc(struct inet_peer_base *base,
 }
 
 struct inet_peer *inet_getpeer(struct inet_peer_base *base,
-			       const struct inetpeer_addr *daddr,
-			       int create)
+			       const struct inetpeer_addr *daddr)
 {
 	struct inet_peer *p, *gc_stack[PEER_MAX_GC];
 	struct rb_node **pp, *parent;
@@ -189,10 +188,6 @@ struct inet_peer *inet_getpeer(struct inet_peer_base *base,
 	if (p)
 		return p;
 
-	/* If no writer did a change during our lookup, we can return early. */
-	if (!create && !invalidated)
-		return NULL;
-
 	/* retry an exact lookup, taking the lock before.
 	 * At least, nodes should be hot in our cache.
 	 */
@@ -201,7 +196,7 @@ struct inet_peer *inet_getpeer(struct inet_peer_base *base,
 
 	gc_cnt = 0;
 	p = lookup(daddr, base, seq, gc_stack, &gc_cnt, &parent, &pp);
-	if (!p && create) {
+	if (!p) {
 		p = kmem_cache_alloc(peer_cachep, GFP_ATOMIC);
 		if (p) {
 			p->daddr = *daddr;
-- 
2.47.1.613.gc27f4b7a9f-goog


^ permalink raw reply related	[flat|nested] 11+ messages in thread

* [PATCH net-next 3/4] inetpeer: update inetpeer timestamp in inet_getpeer()
  2024-12-13 13:02 [PATCH net-next 0/4] inetpeer: reduce false sharing and atomic operations Eric Dumazet
  2024-12-13 13:02 ` [PATCH net-next 1/4] inetpeer: remove create argument of inet_getpeer_v[46]() Eric Dumazet
  2024-12-13 13:02 ` [PATCH net-next 2/4] inetpeer: remove create argument of inet_getpeer() Eric Dumazet
@ 2024-12-13 13:02 ` Eric Dumazet
  2024-12-15 15:34   ` Ido Schimmel
  2024-12-13 13:02 ` [PATCH net-next 4/4] inetpeer: do not get a refcount " Eric Dumazet
  3 siblings, 1 reply; 11+ messages in thread
From: Eric Dumazet @ 2024-12-13 13:02 UTC (permalink / raw)
  To: David S . Miller, Jakub Kicinski, Paolo Abeni
  Cc: netdev, Simon Horman, David Ahern, Kuniyuki Iwashima,
	eric.dumazet, Eric Dumazet

inet_putpeer() will be removed in the following patch,
because we will no longer use refcounts.

Update inetpeer timetamp (p->dtime) at lookup time.

Signed-off-by: Eric Dumazet <edumazet@google.com>
---
 net/ipv4/inetpeer.c | 12 ++++--------
 1 file changed, 4 insertions(+), 8 deletions(-)

diff --git a/net/ipv4/inetpeer.c b/net/ipv4/inetpeer.c
index 58d2805b046d00cd509e2d2343abfb8eacfbdde7..67827c9bf2c8f3ba842ff1dc3b7e1fc2976e6ef1 100644
--- a/net/ipv4/inetpeer.c
+++ b/net/ipv4/inetpeer.c
@@ -95,6 +95,7 @@ static struct inet_peer *lookup(const struct inetpeer_addr *daddr,
 {
 	struct rb_node **pp, *parent, *next;
 	struct inet_peer *p;
+	u32 now;
 
 	pp = &base->rb_root.rb_node;
 	parent = NULL;
@@ -110,6 +111,9 @@ static struct inet_peer *lookup(const struct inetpeer_addr *daddr,
 		if (cmp == 0) {
 			if (!refcount_inc_not_zero(&p->refcnt))
 				break;
+			now = jiffies;
+			if (READ_ONCE(p->dtime) != now)
+				WRITE_ONCE(p->dtime, now);
 			return p;
 		}
 		if (gc_stack) {
@@ -150,9 +154,6 @@ static void inet_peer_gc(struct inet_peer_base *base,
 	for (i = 0; i < gc_cnt; i++) {
 		p = gc_stack[i];
 
-		/* The READ_ONCE() pairs with the WRITE_ONCE()
-		 * in inet_putpeer()
-		 */
 		delta = (__u32)jiffies - READ_ONCE(p->dtime);
 
 		if (delta < ttl || !refcount_dec_if_one(&p->refcnt))
@@ -226,11 +227,6 @@ EXPORT_SYMBOL_GPL(inet_getpeer);
 
 void inet_putpeer(struct inet_peer *p)
 {
-	/* The WRITE_ONCE() pairs with itself (we run lockless)
-	 * and the READ_ONCE() in inet_peer_gc()
-	 */
-	WRITE_ONCE(p->dtime, (__u32)jiffies);
-
 	if (refcount_dec_and_test(&p->refcnt))
 		kfree_rcu(p, rcu);
 }
-- 
2.47.1.613.gc27f4b7a9f-goog


^ permalink raw reply related	[flat|nested] 11+ messages in thread

* [PATCH net-next 4/4] inetpeer: do not get a refcount in inet_getpeer()
  2024-12-13 13:02 [PATCH net-next 0/4] inetpeer: reduce false sharing and atomic operations Eric Dumazet
                   ` (2 preceding siblings ...)
  2024-12-13 13:02 ` [PATCH net-next 3/4] inetpeer: update inetpeer timestamp in inet_getpeer() Eric Dumazet
@ 2024-12-13 13:02 ` Eric Dumazet
  2024-12-15 15:48   ` Ido Schimmel
  3 siblings, 1 reply; 11+ messages in thread
From: Eric Dumazet @ 2024-12-13 13:02 UTC (permalink / raw)
  To: David S . Miller, Jakub Kicinski, Paolo Abeni
  Cc: netdev, Simon Horman, David Ahern, Kuniyuki Iwashima,
	eric.dumazet, Eric Dumazet

All inet_getpeer() callers except ip4_frag_init() don't need
to acquire a permanent refcount on the inetpeer.

They can switch to full RCU protection.

Move the refcount_inc_not_zero() into ip4_frag_init(),
so that all the other callers no longer have to
perform a pair of expensive atomic operations on
a possibly contended cache line.

inet_putpeer() no longer needs to be exported.

After this patch, my DUT can receive 8,400,000 UDP packets
per second targetting closed ports, using 50% cpu cycles
less than before.

Fixes: 8c2bd38b95f7 ("icmp: change the order of rate limits")
Signed-off-by: Eric Dumazet <edumazet@google.com>
---
 net/ipv4/icmp.c        |  4 ++--
 net/ipv4/inetpeer.c    |  8 ++------
 net/ipv4/ip_fragment.c | 15 ++++++++++-----
 net/ipv4/route.c       | 13 +++++++------
 net/ipv6/icmp.c        |  4 ++--
 net/ipv6/ip6_output.c  |  4 ++--
 net/ipv6/ndisc.c       |  6 ++++--
 7 files changed, 29 insertions(+), 25 deletions(-)

diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
index 5eeb9f569a706cf2766d74bcf1a667c8930804f2..7a1b1af2edcae0b0648ef3c3411b4ef36e6d9b14 100644
--- a/net/ipv4/icmp.c
+++ b/net/ipv4/icmp.c
@@ -322,11 +322,11 @@ static bool icmpv4_xrlim_allow(struct net *net, struct rtable *rt,
 		goto out;
 
 	vif = l3mdev_master_ifindex(dst->dev);
+	rcu_read_lock();
 	peer = inet_getpeer_v4(net->ipv4.peers, fl4->daddr, vif);
 	rc = inet_peer_xrlim_allow(peer,
 				   READ_ONCE(net->ipv4.sysctl_icmp_ratelimit));
-	if (peer)
-		inet_putpeer(peer);
+	rcu_read_unlock();
 out:
 	if (!rc)
 		__ICMP_INC_STATS(net, ICMP_MIB_RATELIMITHOST);
diff --git a/net/ipv4/inetpeer.c b/net/ipv4/inetpeer.c
index 67827c9bf2c8f3ba842ff1dc3b7e1fc2976e6ef1..b025eaba501305635ae46672ff3c7de75c4fcc08 100644
--- a/net/ipv4/inetpeer.c
+++ b/net/ipv4/inetpeer.c
@@ -109,8 +109,6 @@ static struct inet_peer *lookup(const struct inetpeer_addr *daddr,
 		p = rb_entry(parent, struct inet_peer, rb_node);
 		cmp = inetpeer_addr_cmp(daddr, &p->daddr);
 		if (cmp == 0) {
-			if (!refcount_inc_not_zero(&p->refcnt))
-				break;
 			now = jiffies;
 			if (READ_ONCE(p->dtime) != now)
 				WRITE_ONCE(p->dtime, now);
@@ -169,6 +167,7 @@ static void inet_peer_gc(struct inet_peer_base *base,
 	}
 }
 
+/* Must be called under RCU : No refcount change is done here. */
 struct inet_peer *inet_getpeer(struct inet_peer_base *base,
 			       const struct inetpeer_addr *daddr)
 {
@@ -180,11 +179,9 @@ struct inet_peer *inet_getpeer(struct inet_peer_base *base,
 	/* Attempt a lockless lookup first.
 	 * Because of a concurrent writer, we might not find an existing entry.
 	 */
-	rcu_read_lock();
 	seq = read_seqbegin(&base->lock);
 	p = lookup(daddr, base, seq, NULL, &gc_cnt, &parent, &pp);
 	invalidated = read_seqretry(&base->lock, seq);
-	rcu_read_unlock();
 
 	if (p)
 		return p;
@@ -202,7 +199,7 @@ struct inet_peer *inet_getpeer(struct inet_peer_base *base,
 		if (p) {
 			p->daddr = *daddr;
 			p->dtime = (__u32)jiffies;
-			refcount_set(&p->refcnt, 2);
+			refcount_set(&p->refcnt, 1);
 			atomic_set(&p->rid, 0);
 			p->metrics[RTAX_LOCK-1] = INETPEER_METRICS_NEW;
 			p->rate_tokens = 0;
@@ -230,7 +227,6 @@ void inet_putpeer(struct inet_peer *p)
 	if (refcount_dec_and_test(&p->refcnt))
 		kfree_rcu(p, rcu);
 }
-EXPORT_SYMBOL_GPL(inet_putpeer);
 
 /*
  *	Check transmit rate limitation for given message.
diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c
index 46e1171299f22ccf0b201eabbff5d3279a0703d8..7a435746a22dee9f11c0dc732a8b5a7724f4eea3 100644
--- a/net/ipv4/ip_fragment.c
+++ b/net/ipv4/ip_fragment.c
@@ -82,15 +82,20 @@ static int ip_frag_reasm(struct ipq *qp, struct sk_buff *skb,
 static void ip4_frag_init(struct inet_frag_queue *q, const void *a)
 {
 	struct ipq *qp = container_of(q, struct ipq, q);
-	struct net *net = q->fqdir->net;
-
 	const struct frag_v4_compare_key *key = a;
+	struct net *net = q->fqdir->net;
+	struct inet_peer *p = NULL;
 
 	q->key.v4 = *key;
 	qp->ecn = 0;
-	qp->peer = q->fqdir->max_dist ?
-		inet_getpeer_v4(net->ipv4.peers, key->saddr, key->vif) :
-		NULL;
+	if (q->fqdir->max_dist) {
+		rcu_read_lock();
+		p = inet_getpeer_v4(net->ipv4.peers, key->saddr, key->vif);
+		if (p && !refcount_inc_not_zero(&p->refcnt))
+			p = NULL;
+		rcu_read_unlock();
+	}
+	qp->peer = p;
 }
 
 static void ip4_frag_free(struct inet_frag_queue *q)
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 297a9939c6e74beffc592dbdd7266281fe842440..d2086648dcf180375c8d7981dfb72f87e50957f6 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -870,11 +870,11 @@ void ip_rt_send_redirect(struct sk_buff *skb)
 	}
 	log_martians = IN_DEV_LOG_MARTIANS(in_dev);
 	vif = l3mdev_master_ifindex_rcu(rt->dst.dev);
-	rcu_read_unlock();
 
 	net = dev_net(rt->dst.dev);
 	peer = inet_getpeer_v4(net->ipv4.peers, ip_hdr(skb)->saddr, vif);
 	if (!peer) {
+		rcu_read_unlock();
 		icmp_send(skb, ICMP_REDIRECT, ICMP_REDIR_HOST,
 			  rt_nexthop(rt, ip_hdr(skb)->daddr));
 		return;
@@ -893,7 +893,7 @@ void ip_rt_send_redirect(struct sk_buff *skb)
 	 */
 	if (peer->n_redirects >= ip_rt_redirect_number) {
 		peer->rate_last = jiffies;
-		goto out_put_peer;
+		goto out_unlock;
 	}
 
 	/* Check for load limit; set rate_last to the latest sent
@@ -914,8 +914,8 @@ void ip_rt_send_redirect(struct sk_buff *skb)
 					     &ip_hdr(skb)->saddr, inet_iif(skb),
 					     &ip_hdr(skb)->daddr, &gw);
 	}
-out_put_peer:
-	inet_putpeer(peer);
+out_unlock:
+	rcu_read_unlock();
 }
 
 static int ip_error(struct sk_buff *skb)
@@ -975,9 +975,9 @@ static int ip_error(struct sk_buff *skb)
 		break;
 	}
 
+	rcu_read_lock();
 	peer = inet_getpeer_v4(net->ipv4.peers, ip_hdr(skb)->saddr,
 			       l3mdev_master_ifindex(skb->dev));
-
 	send = true;
 	if (peer) {
 		now = jiffies;
@@ -989,8 +989,9 @@ static int ip_error(struct sk_buff *skb)
 			peer->rate_tokens -= ip_rt_error_cost;
 		else
 			send = false;
-		inet_putpeer(peer);
 	}
+	rcu_read_unlock();
+
 	if (send)
 		icmp_send(skb, ICMP_DEST_UNREACH, code, 0);
 
diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c
index 4593e3992c67b84e3a10f30be28762974094d21f..a6984a29fdb9dd972a11ca9f8d5e794c443bac6f 100644
--- a/net/ipv6/icmp.c
+++ b/net/ipv6/icmp.c
@@ -222,10 +222,10 @@ static bool icmpv6_xrlim_allow(struct sock *sk, u8 type,
 		if (rt->rt6i_dst.plen < 128)
 			tmo >>= ((128 - rt->rt6i_dst.plen)>>5);
 
+		rcu_read_lock();
 		peer = inet_getpeer_v6(net->ipv6.peers, &fl6->daddr);
 		res = inet_peer_xrlim_allow(peer, tmo);
-		if (peer)
-			inet_putpeer(peer);
+		rcu_read_unlock();
 	}
 	if (!res)
 		__ICMP6_INC_STATS(net, ip6_dst_idev(dst),
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index 2cbcfe70654b5cd90c433a24c47ef5496c604d0d..06cab008b8277f1b6e56541e91fc92f999221ac5 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -613,6 +613,7 @@ int ip6_forward(struct sk_buff *skb)
 		else
 			target = &hdr->daddr;
 
+		rcu_read_lock();
 		peer = inet_getpeer_v6(net->ipv6.peers, &hdr->daddr);
 
 		/* Limit redirects both by destination (here)
@@ -620,8 +621,7 @@ int ip6_forward(struct sk_buff *skb)
 		 */
 		if (inet_peer_xrlim_allow(peer, 1*HZ))
 			ndisc_send_redirect(skb, target);
-		if (peer)
-			inet_putpeer(peer);
+		rcu_read_unlock();
 	} else {
 		int addrtype = ipv6_addr_type(&hdr->saddr);
 
diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c
index f113554d13325453cd04ce4e5686d837943e96ff..d044c67019de6da1eb29dee875cf8cda30210ceb 100644
--- a/net/ipv6/ndisc.c
+++ b/net/ipv6/ndisc.c
@@ -1731,10 +1731,12 @@ void ndisc_send_redirect(struct sk_buff *skb, const struct in6_addr *target)
 			  "Redirect: destination is not a neighbour\n");
 		goto release;
 	}
+
+	rcu_read_lock();
 	peer = inet_getpeer_v6(net->ipv6.peers, &ipv6_hdr(skb)->saddr);
 	ret = inet_peer_xrlim_allow(peer, 1*HZ);
-	if (peer)
-		inet_putpeer(peer);
+	rcu_read_unlock();
+
 	if (!ret)
 		goto release;
 
-- 
2.47.1.613.gc27f4b7a9f-goog


^ permalink raw reply related	[flat|nested] 11+ messages in thread

* Re: [PATCH net-next 2/4] inetpeer: remove create argument of inet_getpeer()
  2024-12-13 13:02 ` [PATCH net-next 2/4] inetpeer: remove create argument of inet_getpeer() Eric Dumazet
@ 2024-12-13 20:16   ` Simon Horman
  2024-12-13 20:47     ` Eric Dumazet
  2024-12-14 14:34   ` kernel test robot
  1 sibling, 1 reply; 11+ messages in thread
From: Simon Horman @ 2024-12-13 20:16 UTC (permalink / raw)
  To: Eric Dumazet
  Cc: David S . Miller, Jakub Kicinski, Paolo Abeni, netdev,
	David Ahern, Kuniyuki Iwashima, eric.dumazet

On Fri, Dec 13, 2024 at 01:02:10PM +0000, Eric Dumazet wrote:
> All callers of inet_getpeer() want to create an inetpeer.
> 
> Signed-off-by: Eric Dumazet <edumazet@google.com>

...

> diff --git a/net/ipv4/inetpeer.c b/net/ipv4/inetpeer.c

...

> @@ -189,10 +188,6 @@ struct inet_peer *inet_getpeer(struct inet_peer_base *base,
>  	if (p)
>  		return p;
>  
> -	/* If no writer did a change during our lookup, we can return early. */
> -	if (!create && !invalidated)
> -		return NULL;
> -

Hi Eric,

With this change invalidated is set but otherwise unused in this function,
so it can probably be removed.

Flagged by W=1 builds.

>  	/* retry an exact lookup, taking the lock before.
>  	 * At least, nodes should be hot in our cache.
>  	 */

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH net-next 2/4] inetpeer: remove create argument of inet_getpeer()
  2024-12-13 20:16   ` Simon Horman
@ 2024-12-13 20:47     ` Eric Dumazet
  0 siblings, 0 replies; 11+ messages in thread
From: Eric Dumazet @ 2024-12-13 20:47 UTC (permalink / raw)
  To: Simon Horman
  Cc: David S . Miller, Jakub Kicinski, Paolo Abeni, netdev,
	David Ahern, Kuniyuki Iwashima, eric.dumazet

On Fri, Dec 13, 2024 at 9:16 PM Simon Horman <horms@kernel.org> wrote:

> Hi Eric,
>
> With this change invalidated is set but otherwise unused in this function,
> so it can probably be removed.
>
> Flagged by W=1 builds.

Ah, right, I will fix this in V2.

Thanks Simon.

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH net-next 2/4] inetpeer: remove create argument of inet_getpeer()
  2024-12-13 13:02 ` [PATCH net-next 2/4] inetpeer: remove create argument of inet_getpeer() Eric Dumazet
  2024-12-13 20:16   ` Simon Horman
@ 2024-12-14 14:34   ` kernel test robot
  1 sibling, 0 replies; 11+ messages in thread
From: kernel test robot @ 2024-12-14 14:34 UTC (permalink / raw)
  To: Eric Dumazet, David S . Miller, Jakub Kicinski, Paolo Abeni
  Cc: llvm, oe-kbuild-all, netdev, Simon Horman, David Ahern,
	Kuniyuki Iwashima, eric.dumazet, Eric Dumazet

Hi Eric,

kernel test robot noticed the following build warnings:

[auto build test WARNING on net-next/main]

url:    https://github.com/intel-lab-lkp/linux/commits/Eric-Dumazet/inetpeer-remove-create-argument-of-inet_getpeer_v-46/20241213-210500
base:   net-next/main
patch link:    https://lore.kernel.org/r/20241213130212.1783302-3-edumazet%40google.com
patch subject: [PATCH net-next 2/4] inetpeer: remove create argument of inet_getpeer()
config: i386-buildonly-randconfig-003-20241214 (https://download.01.org/0day-ci/archive/20241214/202412142229.7lFHEOun-lkp@intel.com/config)
compiler: clang version 19.1.3 (https://github.com/llvm/llvm-project ab51eccf88f5321e7c60591c5546b254b6afab99)
reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20241214/202412142229.7lFHEOun-lkp@intel.com/reproduce)

If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <lkp@intel.com>
| Closes: https://lore.kernel.org/oe-kbuild-all/202412142229.7lFHEOun-lkp@intel.com/

All warnings (new ones prefixed by >>):

   In file included from net/ipv4/inetpeer.c:19:
   In file included from include/linux/mm.h:2223:
   include/linux/vmstat.h:518:36: warning: arithmetic between different enumeration types ('enum node_stat_item' and 'enum lru_list') [-Wenum-enum-conversion]
     518 |         return node_stat_name(NR_LRU_BASE + lru) + 3; // skip "nr_"
         |                               ~~~~~~~~~~~ ^ ~~~
>> net/ipv4/inetpeer.c:177:6: warning: variable 'invalidated' set but not used [-Wunused-but-set-variable]
     177 |         int invalidated;
         |             ^
   2 warnings generated.


vim +/invalidated +177 net/ipv4/inetpeer.c

^1da177e4c3f41 Linus Torvalds   2005-04-16  170  
c0efc887dcadbd David S. Miller  2012-06-09  171  struct inet_peer *inet_getpeer(struct inet_peer_base *base,
4a6cb3d13bf1f8 Eric Dumazet     2024-12-13  172  			       const struct inetpeer_addr *daddr)
^1da177e4c3f41 Linus Torvalds   2005-04-16  173  {
b145425f269a17 Eric Dumazet     2017-07-17  174  	struct inet_peer *p, *gc_stack[PEER_MAX_GC];
b145425f269a17 Eric Dumazet     2017-07-17  175  	struct rb_node **pp, *parent;
b145425f269a17 Eric Dumazet     2017-07-17  176  	unsigned int gc_cnt, seq;
b145425f269a17 Eric Dumazet     2017-07-17 @177  	int invalidated;
^1da177e4c3f41 Linus Torvalds   2005-04-16  178  
4b9d9be839fdb7 Eric Dumazet     2011-06-08  179  	/* Attempt a lockless lookup first.
aa1039e73cc2cf Eric Dumazet     2010-06-15  180  	 * Because of a concurrent writer, we might not find an existing entry.
aa1039e73cc2cf Eric Dumazet     2010-06-15  181  	 */
7b46ac4e77f322 David S. Miller  2011-03-08  182  	rcu_read_lock();
b145425f269a17 Eric Dumazet     2017-07-17  183  	seq = read_seqbegin(&base->lock);
b145425f269a17 Eric Dumazet     2017-07-17  184  	p = lookup(daddr, base, seq, NULL, &gc_cnt, &parent, &pp);
b145425f269a17 Eric Dumazet     2017-07-17  185  	invalidated = read_seqretry(&base->lock, seq);
7b46ac4e77f322 David S. Miller  2011-03-08  186  	rcu_read_unlock();
^1da177e4c3f41 Linus Torvalds   2005-04-16  187  
4b9d9be839fdb7 Eric Dumazet     2011-06-08  188  	if (p)
aa1039e73cc2cf Eric Dumazet     2010-06-15  189  		return p;
aa1039e73cc2cf Eric Dumazet     2010-06-15  190  
aa1039e73cc2cf Eric Dumazet     2010-06-15  191  	/* retry an exact lookup, taking the lock before.
aa1039e73cc2cf Eric Dumazet     2010-06-15  192  	 * At least, nodes should be hot in our cache.
aa1039e73cc2cf Eric Dumazet     2010-06-15  193  	 */
b145425f269a17 Eric Dumazet     2017-07-17  194  	parent = NULL;
65e8354ec13a45 Eric Dumazet     2011-03-04  195  	write_seqlock_bh(&base->lock);
b145425f269a17 Eric Dumazet     2017-07-17  196  
b145425f269a17 Eric Dumazet     2017-07-17  197  	gc_cnt = 0;
b145425f269a17 Eric Dumazet     2017-07-17  198  	p = lookup(daddr, base, seq, gc_stack, &gc_cnt, &parent, &pp);
4a6cb3d13bf1f8 Eric Dumazet     2024-12-13  199  	if (!p) {
b145425f269a17 Eric Dumazet     2017-07-17  200  		p = kmem_cache_alloc(peer_cachep, GFP_ATOMIC);
aa1039e73cc2cf Eric Dumazet     2010-06-15  201  		if (p) {
b534ecf1cd26f0 David S. Miller  2010-11-30  202  			p->daddr = *daddr;
b6a37e5e25414d Eric Dumazet     2018-04-09  203  			p->dtime = (__u32)jiffies;
1cc9a98b59ba92 Reshetova, Elena 2017-06-30  204  			refcount_set(&p->refcnt, 2);
aa1039e73cc2cf Eric Dumazet     2010-06-15  205  			atomic_set(&p->rid, 0);
144001bddcb4db David S. Miller  2011-01-27  206  			p->metrics[RTAX_LOCK-1] = INETPEER_METRICS_NEW;
92d8682926342d David S. Miller  2011-02-04  207  			p->rate_tokens = 0;
c09551c6ff7fe1 Lorenzo Bianconi 2019-02-06  208  			p->n_redirects = 0;
bc9259a8bae9e8 Nicolas Dichtel  2012-09-27  209  			/* 60*HZ is arbitrary, but chosen enough high so that the first
bc9259a8bae9e8 Nicolas Dichtel  2012-09-27  210  			 * calculation of tokens is at its maximum.
bc9259a8bae9e8 Nicolas Dichtel  2012-09-27  211  			 */
bc9259a8bae9e8 Nicolas Dichtel  2012-09-27  212  			p->rate_last = jiffies - 60*HZ;
^1da177e4c3f41 Linus Torvalds   2005-04-16  213  
b145425f269a17 Eric Dumazet     2017-07-17  214  			rb_link_node(&p->rb_node, parent, pp);
b145425f269a17 Eric Dumazet     2017-07-17  215  			rb_insert_color(&p->rb_node, &base->rb_root);
98158f5a853caf David S. Miller  2010-11-30  216  			base->total++;
aa1039e73cc2cf Eric Dumazet     2010-06-15  217  		}
b145425f269a17 Eric Dumazet     2017-07-17  218  	}
b145425f269a17 Eric Dumazet     2017-07-17  219  	if (gc_cnt)
b145425f269a17 Eric Dumazet     2017-07-17  220  		inet_peer_gc(base, gc_stack, gc_cnt);
65e8354ec13a45 Eric Dumazet     2011-03-04  221  	write_sequnlock_bh(&base->lock);
^1da177e4c3f41 Linus Torvalds   2005-04-16  222  
^1da177e4c3f41 Linus Torvalds   2005-04-16  223  	return p;
^1da177e4c3f41 Linus Torvalds   2005-04-16  224  }
b3419363808f24 David S. Miller  2010-11-30  225  EXPORT_SYMBOL_GPL(inet_getpeer);
98158f5a853caf David S. Miller  2010-11-30  226  

-- 
0-DAY CI Kernel Test Service
https://github.com/intel/lkp-tests/wiki

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH net-next 3/4] inetpeer: update inetpeer timestamp in inet_getpeer()
  2024-12-13 13:02 ` [PATCH net-next 3/4] inetpeer: update inetpeer timestamp in inet_getpeer() Eric Dumazet
@ 2024-12-15 15:34   ` Ido Schimmel
  0 siblings, 0 replies; 11+ messages in thread
From: Ido Schimmel @ 2024-12-15 15:34 UTC (permalink / raw)
  To: Eric Dumazet
  Cc: David S . Miller, Jakub Kicinski, Paolo Abeni, netdev,
	Simon Horman, David Ahern, Kuniyuki Iwashima, eric.dumazet

On Fri, Dec 13, 2024 at 01:02:11PM +0000, Eric Dumazet wrote:
> inet_putpeer() will be removed in the following patch,
> because we will no longer use refcounts.
> 
> Update inetpeer timetamp (p->dtime) at lookup time.

Given you're planning a v2: s/timetamp/timestamp/

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH net-next 4/4] inetpeer: do not get a refcount in inet_getpeer()
  2024-12-13 13:02 ` [PATCH net-next 4/4] inetpeer: do not get a refcount " Eric Dumazet
@ 2024-12-15 15:48   ` Ido Schimmel
  2024-12-15 17:42     ` Eric Dumazet
  0 siblings, 1 reply; 11+ messages in thread
From: Ido Schimmel @ 2024-12-15 15:48 UTC (permalink / raw)
  To: Eric Dumazet
  Cc: David S . Miller, Jakub Kicinski, Paolo Abeni, netdev,
	Simon Horman, David Ahern, Kuniyuki Iwashima, eric.dumazet

On Fri, Dec 13, 2024 at 01:02:12PM +0000, Eric Dumazet wrote:
> diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
> index 5eeb9f569a706cf2766d74bcf1a667c8930804f2..7a1b1af2edcae0b0648ef3c3411b4ef36e6d9b14 100644
> --- a/net/ipv4/icmp.c
> +++ b/net/ipv4/icmp.c
> @@ -322,11 +322,11 @@ static bool icmpv4_xrlim_allow(struct net *net, struct rtable *rt,
>  		goto out;
>  
>  	vif = l3mdev_master_ifindex(dst->dev);
> +	rcu_read_lock();
>  	peer = inet_getpeer_v4(net->ipv4.peers, fl4->daddr, vif);
>  	rc = inet_peer_xrlim_allow(peer,
>  				   READ_ONCE(net->ipv4.sysctl_icmp_ratelimit));
> -	if (peer)
> -		inet_putpeer(peer);
> +	rcu_read_unlock();
>  out:
>  	if (!rc)
>  		__ICMP_INC_STATS(net, ICMP_MIB_RATELIMITHOST);

Maybe convert l3mdev_master_ifindex() to l3mdev_master_ifindex_rcu() and
move it into the RCU critical section?

diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
index 7a1b1af2edca..094084b61bff 100644
--- a/net/ipv4/icmp.c
+++ b/net/ipv4/icmp.c
@@ -312,7 +312,6 @@ static bool icmpv4_xrlim_allow(struct net *net, struct rtable *rt,
        struct dst_entry *dst = &rt->dst;
        struct inet_peer *peer;
        bool rc = true;
-       int vif;
 
        if (!apply_ratelimit)
                return true;
@@ -321,9 +320,9 @@ static bool icmpv4_xrlim_allow(struct net *net, struct rtable *rt,
        if (dst->dev && (dst->dev->flags&IFF_LOOPBACK))
                goto out;
 
-       vif = l3mdev_master_ifindex(dst->dev);
        rcu_read_lock();
-       peer = inet_getpeer_v4(net->ipv4.peers, fl4->daddr, vif);
+       peer = inet_getpeer_v4(net->ipv4.peers, fl4->daddr,
+                              l3mdev_master_ifindex_rcu(dst->dev));
        rc = inet_peer_xrlim_allow(peer,
                                   READ_ONCE(net->ipv4.sysctl_icmp_ratelimit));
        rcu_read_unlock();

[...]

> @@ -975,9 +975,9 @@ static int ip_error(struct sk_buff *skb)
>  		break;
>  	}
>  
> +	rcu_read_lock();
>  	peer = inet_getpeer_v4(net->ipv4.peers, ip_hdr(skb)->saddr,
>  			       l3mdev_master_ifindex(skb->dev));
> -
>  	send = true;
>  	if (peer) {
>  		now = jiffies;

And here?

diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index d2086648dcf1..9f9d4e6ea1b9 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -977,7 +977,7 @@ static int ip_error(struct sk_buff *skb)
 
        rcu_read_lock();
        peer = inet_getpeer_v4(net->ipv4.peers, ip_hdr(skb)->saddr,
-                              l3mdev_master_ifindex(skb->dev));
+                              l3mdev_master_ifindex_rcu(skb->dev));
        send = true;
        if (peer) {
                now = jiffies;

^ permalink raw reply related	[flat|nested] 11+ messages in thread

* Re: [PATCH net-next 4/4] inetpeer: do not get a refcount in inet_getpeer()
  2024-12-15 15:48   ` Ido Schimmel
@ 2024-12-15 17:42     ` Eric Dumazet
  0 siblings, 0 replies; 11+ messages in thread
From: Eric Dumazet @ 2024-12-15 17:42 UTC (permalink / raw)
  To: Ido Schimmel
  Cc: David S . Miller, Jakub Kicinski, Paolo Abeni, netdev,
	Simon Horman, David Ahern, Kuniyuki Iwashima, eric.dumazet

On Sun, Dec 15, 2024 at 4:48 PM Ido Schimmel <idosch@idosch.org> wrote:
>
> On Fri, Dec 13, 2024 at 01:02:12PM +0000, Eric Dumazet wrote:
> > diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
> > index 5eeb9f569a706cf2766d74bcf1a667c8930804f2..7a1b1af2edcae0b0648ef3c3411b4ef36e6d9b14 100644
> > --- a/net/ipv4/icmp.c
> > +++ b/net/ipv4/icmp.c
> > @@ -322,11 +322,11 @@ static bool icmpv4_xrlim_allow(struct net *net, struct rtable *rt,
> >               goto out;
> >
> >       vif = l3mdev_master_ifindex(dst->dev);
> > +     rcu_read_lock();
> >       peer = inet_getpeer_v4(net->ipv4.peers, fl4->daddr, vif);
> >       rc = inet_peer_xrlim_allow(peer,
> >                                  READ_ONCE(net->ipv4.sysctl_icmp_ratelimit));
> > -     if (peer)
> > -             inet_putpeer(peer);
> > +     rcu_read_unlock();
> >  out:
> >       if (!rc)
> >               __ICMP_INC_STATS(net, ICMP_MIB_RATELIMITHOST);
>
> Maybe convert l3mdev_master_ifindex() to l3mdev_master_ifindex_rcu() and
> move it into the RCU critical section?
>
> diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
> index 7a1b1af2edca..094084b61bff 100644
> --- a/net/ipv4/icmp.c
> +++ b/net/ipv4/icmp.c
> @@ -312,7 +312,6 @@ static bool icmpv4_xrlim_allow(struct net *net, struct rtable *rt,
>         struct dst_entry *dst = &rt->dst;
>         struct inet_peer *peer;
>         bool rc = true;
> -       int vif;
>
>         if (!apply_ratelimit)
>                 return true;
> @@ -321,9 +320,9 @@ static bool icmpv4_xrlim_allow(struct net *net, struct rtable *rt,
>         if (dst->dev && (dst->dev->flags&IFF_LOOPBACK))
>                 goto out;
>
> -       vif = l3mdev_master_ifindex(dst->dev);
>         rcu_read_lock();
> -       peer = inet_getpeer_v4(net->ipv4.peers, fl4->daddr, vif);
> +       peer = inet_getpeer_v4(net->ipv4.peers, fl4->daddr,
> +                              l3mdev_master_ifindex_rcu(dst->dev));
>         rc = inet_peer_xrlim_allow(peer,
>                                    READ_ONCE(net->ipv4.sysctl_icmp_ratelimit));
>         rcu_read_unlock();
>
> [...]
>
> > @@ -975,9 +975,9 @@ static int ip_error(struct sk_buff *skb)
> >               break;
> >       }
> >
> > +     rcu_read_lock();
> >       peer = inet_getpeer_v4(net->ipv4.peers, ip_hdr(skb)->saddr,
> >                              l3mdev_master_ifindex(skb->dev));
> > -
> >       send = true;
> >       if (peer) {
> >               now = jiffies;
>
> And here?
>
> diff --git a/net/ipv4/route.c b/net/ipv4/route.c
> index d2086648dcf1..9f9d4e6ea1b9 100644
> --- a/net/ipv4/route.c
> +++ b/net/ipv4/route.c
> @@ -977,7 +977,7 @@ static int ip_error(struct sk_buff *skb)
>
>         rcu_read_lock();
>         peer = inet_getpeer_v4(net->ipv4.peers, ip_hdr(skb)->saddr,
> -                              l3mdev_master_ifindex(skb->dev));
> +                              l3mdev_master_ifindex_rcu(skb->dev));
>         send = true;
>         if (peer) {
>                 now = jiffies;

Good ideas, I will add this to V2, thanks !

^ permalink raw reply	[flat|nested] 11+ messages in thread

end of thread, other threads:[~2024-12-15 17:42 UTC | newest]

Thread overview: 11+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2024-12-13 13:02 [PATCH net-next 0/4] inetpeer: reduce false sharing and atomic operations Eric Dumazet
2024-12-13 13:02 ` [PATCH net-next 1/4] inetpeer: remove create argument of inet_getpeer_v[46]() Eric Dumazet
2024-12-13 13:02 ` [PATCH net-next 2/4] inetpeer: remove create argument of inet_getpeer() Eric Dumazet
2024-12-13 20:16   ` Simon Horman
2024-12-13 20:47     ` Eric Dumazet
2024-12-14 14:34   ` kernel test robot
2024-12-13 13:02 ` [PATCH net-next 3/4] inetpeer: update inetpeer timestamp in inet_getpeer() Eric Dumazet
2024-12-15 15:34   ` Ido Schimmel
2024-12-13 13:02 ` [PATCH net-next 4/4] inetpeer: do not get a refcount " Eric Dumazet
2024-12-15 15:48   ` Ido Schimmel
2024-12-15 17:42     ` Eric Dumazet

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).