netdev.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [RFC PATCH] reuseport: compute the ehash only if needed
@ 2017-12-12 13:09 Paolo Abeni
  2017-12-13 20:08 ` David Miller
  0 siblings, 1 reply; 6+ messages in thread
From: Paolo Abeni @ 2017-12-12 13:09 UTC (permalink / raw)
  To: netdev; +Cc: David S. Miller, Craig Gallek, Eric Dumazet

When a reuseport socket group is using a BPF filter to distribute
the packets among the sockets, we don't need to compute any hash
value, but the current reuseport_select_sock() requires the
caller to compute such hash in advance.

This patch reworks reuseport_select_sock() to compute the hash value
only if needed - missing or failing BPF filter. Since different
hash functions have different argument types - ipv4 addresses vs ipv6
ones - to avoid over-complicate the interface, reuseport_select_sock()
is now a macro.

Additionally, the sk_reuseport test is move inside reuseport_select_sock,
to avoid some code duplication.

Overall this gives small but measurable performance improvement
under UDP flood while using SO_REUSEPORT + BPF.

Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
 include/net/sock_reuseport.h | 32 ++++++++++++++++++++++++++++----
 net/core/sock_reuseport.c    | 34 +++++++++++++++-------------------
 net/ipv4/inet_hashtables.c   | 28 ++++++++++------------------
 net/ipv4/udp.c               | 30 ++++++++++++------------------
 net/ipv6/inet6_hashtables.c  | 28 ++++++++++------------------
 net/ipv6/udp.c               | 31 ++++++++++++-------------------
 6 files changed, 87 insertions(+), 96 deletions(-)

diff --git a/include/net/sock_reuseport.h b/include/net/sock_reuseport.h
index 0054b3a9b923..e7d71d22dca7 100644
--- a/include/net/sock_reuseport.h
+++ b/include/net/sock_reuseport.h
@@ -16,13 +16,37 @@ struct sock_reuseport {
 	struct sock		*socks[0];	/* array of sock pointers */
 };
 
+struct reuseport_info {
+	struct sock_reuseport *reuse;
+	struct sock *sk;
+	u16 socks;
+};
+
 extern int reuseport_alloc(struct sock *sk);
 extern int reuseport_add_sock(struct sock *sk, struct sock *sk2);
 extern void reuseport_detach_sock(struct sock *sk);
-extern struct sock *reuseport_select_sock(struct sock *sk,
-					  u32 hash,
-					  struct sk_buff *skb,
-					  int hdr_len);
+bool __reuseport_get_info(struct sock *sk, struct sk_buff *skb, int hdr_len,
+			  struct reuseport_info *info);
+static inline struct sock *__reuseport_select_sock(struct reuseport_info *info,
+						   u32 hash)
+{
+	return info->reuse->socks[reciprocal_scale(hash, info->socks)];
+}
+
+#define reuseport_select_sock(sk, skb, net, hlen, fn, saddr, sport, daddr, dport) \
+({									      \
+	struct reuseport_info info;					      \
+	info.sk = NULL;							      \
+	if (sk->sk_reuseport) {						      \
+		rcu_read_lock();					      \
+		if (__reuseport_get_info(sk, skb, hlen, &info) && !info.sk)   \
+			info.sk = __reuseport_select_sock(&info,	      \
+					 fn(net, daddr, hnum, saddr, sport)); \
+		rcu_read_unlock();					      \
+	}								      \
+	info.sk;							      \
+})
+
 extern struct bpf_prog *reuseport_attach_prog(struct sock *sk,
 					      struct bpf_prog *prog);
 
diff --git a/net/core/sock_reuseport.c b/net/core/sock_reuseport.c
index c5bb52bc73a1..8d66e66239a2 100644
--- a/net/core/sock_reuseport.c
+++ b/net/core/sock_reuseport.c
@@ -201,31 +201,30 @@ static struct sock *run_bpf(struct sock_reuseport *reuse, u16 socks,
 }
 
 /**
- *  reuseport_select_sock - Select a socket from an SO_REUSEPORT group.
+ *  __reuseport_get_info - Retrieve information for reuseport socket selection
  *  @sk: First socket in the group.
- *  @hash: When no BPF filter is available, use this hash to select.
  *  @skb: skb to run through BPF filter.
  *  @hdr_len: BPF filter expects skb data pointer at payload data.  If
  *    the skb does not yet point at the payload, this parameter represents
  *    how far the pointer needs to advance to reach the payload.
- *  Returns a socket that should receive the packet (or NULL on error).
+ *  @info: reuseport information, filled only if return value is true
+ *  Returns true if @sk is a reuseport socket, and fill @info accordingly.
+ *  if @info.sk is NULL, the caller must retrieve the selected reuseport socket
+ *  calling __reuseport_select_sock(). The caller must hold the RCU lock.
  */
-struct sock *reuseport_select_sock(struct sock *sk,
-				   u32 hash,
-				   struct sk_buff *skb,
-				   int hdr_len)
+bool __reuseport_get_info(struct sock *sk, struct sk_buff *skb, int hdr_len,
+			  struct reuseport_info *info)
 {
 	struct sock_reuseport *reuse;
 	struct bpf_prog *prog;
-	struct sock *sk2 = NULL;
 	u16 socks;
 
-	rcu_read_lock();
+	info->sk = NULL;
 	reuse = rcu_dereference(sk->sk_reuseport_cb);
 
 	/* if memory allocation failed or add call is not yet complete */
 	if (!reuse)
-		goto out;
+		return false;
 
 	prog = rcu_dereference(reuse->prog);
 	socks = READ_ONCE(reuse->num_socks);
@@ -234,18 +233,15 @@ struct sock *reuseport_select_sock(struct sock *sk,
 		smp_rmb();
 
 		if (prog && skb)
-			sk2 = run_bpf(reuse, socks, prog, skb, hdr_len);
+			info->sk = run_bpf(reuse, socks, prog, skb, hdr_len);
 
-		/* no bpf or invalid bpf result: fall back to hash usage */
-		if (!sk2)
-			sk2 = reuse->socks[reciprocal_scale(hash, socks)];
+		info->reuse = reuse;
+		info->socks = socks;
+		return true;
 	}
-
-out:
-	rcu_read_unlock();
-	return sk2;
+	return false;
 }
-EXPORT_SYMBOL(reuseport_select_sock);
+EXPORT_SYMBOL(__reuseport_get_info);
 
 struct bpf_prog *
 reuseport_attach_prog(struct sock *sk, struct bpf_prog *prog)
diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c
index f6f58108b4c5..eed48aab05f5 100644
--- a/net/ipv4/inet_hashtables.c
+++ b/net/ipv4/inet_hashtables.c
@@ -273,21 +273,17 @@ static struct sock *inet_lhash2_lookup(struct net *net,
 	struct inet_connection_sock *icsk;
 	struct sock *sk, *result = NULL;
 	int score, hiscore = 0;
-	u32 phash = 0;
 
 	inet_lhash2_for_each_icsk_rcu(icsk, &ilb2->head) {
 		sk = (struct sock *)icsk;
 		score = compute_score(sk, net, hnum, daddr,
 				      dif, sdif, exact_dif);
 		if (score > hiscore) {
-			if (sk->sk_reuseport) {
-				phash = inet_ehashfn(net, daddr, hnum,
-						     saddr, sport);
-				result = reuseport_select_sock(sk, phash,
-							       skb, doff);
-				if (result)
-					return result;
-			}
+			result = reuseport_select_sock(sk, skb, net, doff,
+						       inet_ehashfn, saddr,
+						       sport, daddr, hnum);
+			if (result)
+				return result;
 			result = sk;
 			hiscore = score;
 		}
@@ -310,7 +306,6 @@ struct sock *__inet_lookup_listener(struct net *net,
 	struct sock *sk, *result = NULL;
 	int score, hiscore = 0;
 	unsigned int hash2;
-	u32 phash = 0;
 
 	if (ilb->count <= 10 || !hashinfo->lhash2)
 		goto port_lookup;
@@ -346,14 +341,11 @@ struct sock *__inet_lookup_listener(struct net *net,
 		score = compute_score(sk, net, hnum, daddr,
 				      dif, sdif, exact_dif);
 		if (score > hiscore) {
-			if (sk->sk_reuseport) {
-				phash = inet_ehashfn(net, daddr, hnum,
-						     saddr, sport);
-				result = reuseport_select_sock(sk, phash,
-							       skb, doff);
-				if (result)
-					return result;
-			}
+			result = reuseport_select_sock(sk, skb, net, doff,
+						       inet_ehashfn, saddr,
+						       sport, daddr, hnum);
+			if (result)
+				return result;
 			result = sk;
 			hiscore = score;
 		}
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index e9c0d1e1772e..8072755bb5fc 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -440,7 +440,6 @@ static struct sock *udp4_lib_lookup2(struct net *net,
 {
 	struct sock *sk, *result;
 	int score, badness;
-	u32 hash = 0;
 
 	result = NULL;
 	badness = 0;
@@ -448,14 +447,12 @@ static struct sock *udp4_lib_lookup2(struct net *net,
 		score = compute_score(sk, net, saddr, sport,
 				      daddr, hnum, dif, sdif, exact_dif);
 		if (score > badness) {
-			if (sk->sk_reuseport) {
-				hash = udp_ehashfn(net, daddr, hnum,
-						   saddr, sport);
-				result = reuseport_select_sock(sk, hash, skb,
-							sizeof(struct udphdr));
-				if (result)
-					return result;
-			}
+			result = reuseport_select_sock(sk, skb, net,
+						       sizeof(struct udphdr),
+						       udp_ehashfn, saddr,
+						       sport, daddr, hnum);
+			if (result)
+				return result;
 			badness = score;
 			result = sk;
 		}
@@ -476,7 +473,6 @@ struct sock *__udp4_lib_lookup(struct net *net, __be32 saddr,
 	struct udp_hslot *hslot2, *hslot = &udptable->hash[slot];
 	bool exact_dif = udp_lib_exact_dif_match(net, skb);
 	int score, badness;
-	u32 hash = 0;
 
 	if (hslot->count > 10) {
 		hash2 = ipv4_portaddr_hash(net, daddr, hnum);
@@ -513,14 +509,12 @@ struct sock *__udp4_lib_lookup(struct net *net, __be32 saddr,
 		score = compute_score(sk, net, saddr, sport,
 				      daddr, hnum, dif, sdif, exact_dif);
 		if (score > badness) {
-			if (sk->sk_reuseport) {
-				hash = udp_ehashfn(net, daddr, hnum,
-						   saddr, sport);
-				result = reuseport_select_sock(sk, hash, skb,
-							sizeof(struct udphdr));
-				if (result)
-					return result;
-			}
+			result = reuseport_select_sock(sk, skb, net,
+						       sizeof(struct udphdr),
+						       udp_ehashfn, saddr,
+						       sport, daddr, hnum);
+			if (result)
+				return result;
 			result = sk;
 			badness = score;
 		}
diff --git a/net/ipv6/inet6_hashtables.c b/net/ipv6/inet6_hashtables.c
index 2febe26de6a1..f6167e647672 100644
--- a/net/ipv6/inet6_hashtables.c
+++ b/net/ipv6/inet6_hashtables.c
@@ -136,21 +136,17 @@ static struct sock *inet6_lhash2_lookup(struct net *net,
 	struct inet_connection_sock *icsk;
 	struct sock *sk, *result = NULL;
 	int score, hiscore = 0;
-	u32 phash = 0;
 
 	inet_lhash2_for_each_icsk_rcu(icsk, &ilb2->head) {
 		sk = (struct sock *)icsk;
 		score = compute_score(sk, net, hnum, daddr, dif, sdif,
 				      exact_dif);
 		if (score > hiscore) {
-			if (sk->sk_reuseport) {
-				phash = inet6_ehashfn(net, daddr, hnum,
-						      saddr, sport);
-				result = reuseport_select_sock(sk, phash,
-							       skb, doff);
-				if (result)
-					return result;
-			}
+			result = reuseport_select_sock(sk, skb, net, doff,
+						       inet6_ehashfn, saddr,
+						       sport, daddr, hnum);
+			if (result)
+				return result;
 			result = sk;
 			hiscore = score;
 		}
@@ -173,7 +169,6 @@ struct sock *inet6_lookup_listener(struct net *net,
 	struct sock *sk, *result = NULL;
 	int score, hiscore = 0;
 	unsigned int hash2;
-	u32 phash = 0;
 
 	if (ilb->count <= 10 || !hashinfo->lhash2)
 		goto port_lookup;
@@ -208,14 +203,11 @@ struct sock *inet6_lookup_listener(struct net *net,
 	sk_for_each(sk, &ilb->head) {
 		score = compute_score(sk, net, hnum, daddr, dif, sdif, exact_dif);
 		if (score > hiscore) {
-			if (sk->sk_reuseport) {
-				phash = inet6_ehashfn(net, daddr, hnum,
-						      saddr, sport);
-				result = reuseport_select_sock(sk, phash,
-							       skb, doff);
-				if (result)
-					return result;
-			}
+			result = reuseport_select_sock(sk, skb, net, doff,
+						       inet6_ehashfn, saddr,
+						       sport, daddr, hnum);
+			if (result)
+				return result;
 			result = sk;
 			hiscore = score;
 		}
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index eecf9f0faf29..936c2a5c7147 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -169,7 +169,6 @@ static struct sock *udp6_lib_lookup2(struct net *net,
 {
 	struct sock *sk, *result;
 	int score, badness;
-	u32 hash = 0;
 
 	result = NULL;
 	badness = -1;
@@ -177,15 +176,12 @@ static struct sock *udp6_lib_lookup2(struct net *net,
 		score = compute_score(sk, net, saddr, sport,
 				      daddr, hnum, dif, sdif, exact_dif);
 		if (score > badness) {
-			if (sk->sk_reuseport) {
-				hash = udp6_ehashfn(net, daddr, hnum,
-						    saddr, sport);
-
-				result = reuseport_select_sock(sk, hash, skb,
-							sizeof(struct udphdr));
-				if (result)
-					return result;
-			}
+			result = reuseport_select_sock(sk, skb, net,
+						       sizeof(struct udphdr),
+						       udp6_ehashfn, saddr,
+						       sport, daddr, hnum);
+			if (result)
+				return result;
 			result = sk;
 			badness = score;
 		}
@@ -206,7 +202,6 @@ struct sock *__udp6_lib_lookup(struct net *net,
 	struct udp_hslot *hslot2, *hslot = &udptable->hash[slot];
 	bool exact_dif = udp6_lib_exact_dif_match(net, skb);
 	int score, badness;
-	u32 hash = 0;
 
 	if (hslot->count > 10) {
 		hash2 = ipv6_portaddr_hash(net, daddr, hnum);
@@ -244,14 +239,12 @@ struct sock *__udp6_lib_lookup(struct net *net,
 		score = compute_score(sk, net, saddr, sport, daddr, hnum, dif,
 				      sdif, exact_dif);
 		if (score > badness) {
-			if (sk->sk_reuseport) {
-				hash = udp6_ehashfn(net, daddr, hnum,
-						    saddr, sport);
-				result = reuseport_select_sock(sk, hash, skb,
-							sizeof(struct udphdr));
-				if (result)
-					return result;
-			}
+			result = reuseport_select_sock(sk, skb, net,
+						       sizeof(struct udphdr),
+						       udp6_ehashfn, saddr,
+						       sport, daddr, hnum);
+			if (result)
+				return result;
 			result = sk;
 			badness = score;
 		}
-- 
2.14.3

^ permalink raw reply related	[flat|nested] 6+ messages in thread
* Re: [RFC PATCH] reuseport: compute the ehash only if needed
@ 2017-12-12 17:44 Craig Gallek
  2017-12-12 18:25 ` Paolo Abeni
  0 siblings, 1 reply; 6+ messages in thread
From: Craig Gallek @ 2017-12-12 17:44 UTC (permalink / raw)
  To: Paolo Abeni; +Cc: netdev, David S. Miller, Eric Dumazet

On Tue, Dec 12, 2017 at 8:09 AM, Paolo Abeni <pabeni@redhat.com> wrote:
> When a reuseport socket group is using a BPF filter to distribute
> the packets among the sockets, we don't need to compute any hash
> value, but the current reuseport_select_sock() requires the
> caller to compute such hash in advance.
>
> This patch reworks reuseport_select_sock() to compute the hash value
> only if needed - missing or failing BPF filter. Since different
> hash functions have different argument types - ipv4 addresses vs ipv6
> ones - to avoid over-complicate the interface, reuseport_select_sock()
> is now a macro.
Purely subjective, but I think a slightly more complicated function
signature for reuseport_select_sock (and reuseport_select_sock6?)
would look a little better than this macro.  It would avoid needing to
expose the reuseport_info struct and would keep the rcu semantics
entirely within the function call (the fast-path memory access
semantics here are already non-trivial...)

> Additionally, the sk_reuseport test is move inside reuseport_select_sock,
> to avoid some code duplication.
>
> Overall this gives small but measurable performance improvement
> under UDP flood while using SO_REUSEPORT + BPF.
Exciting, do you have some specific numbers here?  I'd be interested
in knowing what kinds of loads you end up seeing improvements for.

> Signed-off-by: Paolo Abeni <pabeni@redhat.com>

^ permalink raw reply	[flat|nested] 6+ messages in thread

end of thread, other threads:[~2017-12-14 13:42 UTC | newest]

Thread overview: 6+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2017-12-12 13:09 [RFC PATCH] reuseport: compute the ehash only if needed Paolo Abeni
2017-12-13 20:08 ` David Miller
2017-12-14  8:29   ` Paolo Abeni
2017-12-14 13:41     ` David Miller
  -- strict thread matches above, loose matches on Subject: below --
2017-12-12 17:44 Craig Gallek
2017-12-12 18:25 ` Paolo Abeni

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).