netdev.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH net-next] soreuseport: pass skb to secondary UDP socket lookup
@ 2016-01-05 20:08 Craig Gallek
  2016-01-05 21:58 ` Eric Dumazet
                   ` (2 more replies)
  0 siblings, 3 replies; 10+ messages in thread
From: Craig Gallek @ 2016-01-05 20:08 UTC (permalink / raw)
  To: netdev, David Miller

From: Craig Gallek <kraig@google.com>

This socket-lookup path did not pass along the skb in question
in my original BPF-based socket selection patch.  The skb in the
udpN_lib_lookup2 path can be used for BPF-based socket selection just
like it is in the 'traditional' udpN_lib_lookup path.

udpN_lib_lookup2 kicks in when there are greater than 10 sockets in
the same hlist slot.  Coincidentally, I chose 10 sockets per
reuseport group in my functional test, so the lookup2 path was not
excersised. This adds an additional set of tests with 20 sockets.

Fixes: 538950a1b752 ("soreuseport: setsockopt SO_ATTACH_REUSEPORT_[CE]BPF")
Fixes: 3ca8e4029969 ("soreuseport: BPF selection functional test")
Suggested-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: Craig Gallek <kraig@google.com>
---
 net/ipv4/udp.c                              | 10 +++---
 net/ipv6/udp.c                              | 10 +++---
 tools/testing/selftests/net/reuseport_bpf.c | 47 +++++++++++++++++++++++++++++
 3 files changed, 59 insertions(+), 8 deletions(-)

diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 835378365f25..3a66731e3af6 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -493,7 +493,8 @@ static u32 udp_ehashfn(const struct net *net, const __be32 laddr,
 static struct sock *udp4_lib_lookup2(struct net *net,
 		__be32 saddr, __be16 sport,
 		__be32 daddr, unsigned int hnum, int dif,
-		struct udp_hslot *hslot2, unsigned int slot2)
+		struct udp_hslot *hslot2, unsigned int slot2,
+		struct sk_buff *skb)
 {
 	struct sock *sk, *result;
 	struct hlist_nulls_node *node;
@@ -514,7 +515,8 @@ begin:
 				struct sock *sk2;
 				hash = udp_ehashfn(net, daddr, hnum,
 						   saddr, sport);
-				sk2 = reuseport_select_sock(sk, hash, NULL, 0);
+				sk2 = reuseport_select_sock(sk, hash, skb,
+							    sizeof(struct udphdr));
 				if (sk2) {
 					result = sk2;
 					goto found;
@@ -573,7 +575,7 @@ struct sock *__udp4_lib_lookup(struct net *net, __be32 saddr,
 
 		result = udp4_lib_lookup2(net, saddr, sport,
 					  daddr, hnum, dif,
-					  hslot2, slot2);
+					  hslot2, slot2, skb);
 		if (!result) {
 			hash2 = udp4_portaddr_hash(net, htonl(INADDR_ANY), hnum);
 			slot2 = hash2 & udptable->mask;
@@ -583,7 +585,7 @@ struct sock *__udp4_lib_lookup(struct net *net, __be32 saddr,
 
 			result = udp4_lib_lookup2(net, saddr, sport,
 						  htonl(INADDR_ANY), hnum, dif,
-						  hslot2, slot2);
+						  hslot2, slot2, skb);
 		}
 		rcu_read_unlock();
 		return result;
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index 56fcb55fda31..5d2c2afffe7b 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -251,7 +251,8 @@ static inline int compute_score2(struct sock *sk, struct net *net,
 static struct sock *udp6_lib_lookup2(struct net *net,
 		const struct in6_addr *saddr, __be16 sport,
 		const struct in6_addr *daddr, unsigned int hnum, int dif,
-		struct udp_hslot *hslot2, unsigned int slot2)
+		struct udp_hslot *hslot2, unsigned int slot2,
+		struct sk_buff *skb)
 {
 	struct sock *sk, *result;
 	struct hlist_nulls_node *node;
@@ -272,7 +273,8 @@ begin:
 				struct sock *sk2;
 				hash = udp6_ehashfn(net, daddr, hnum,
 						    saddr, sport);
-				sk2 = reuseport_select_sock(sk, hash, NULL, 0);
+				sk2 = reuseport_select_sock(sk, hash, skb,
+							    sizeof(struct udphdr));
 				if (sk2) {
 					result = sk2;
 					goto found;
@@ -331,7 +333,7 @@ struct sock *__udp6_lib_lookup(struct net *net,
 
 		result = udp6_lib_lookup2(net, saddr, sport,
 					  daddr, hnum, dif,
-					  hslot2, slot2);
+					  hslot2, slot2, skb);
 		if (!result) {
 			hash2 = udp6_portaddr_hash(net, &in6addr_any, hnum);
 			slot2 = hash2 & udptable->mask;
@@ -341,7 +343,7 @@ struct sock *__udp6_lib_lookup(struct net *net,
 
 			result = udp6_lib_lookup2(net, saddr, sport,
 						  &in6addr_any, hnum, dif,
-						  hslot2, slot2);
+						  hslot2, slot2, skb);
 		}
 		rcu_read_unlock();
 		return result;
diff --git a/tools/testing/selftests/net/reuseport_bpf.c b/tools/testing/selftests/net/reuseport_bpf.c
index 74ff09988958..bec1b5dd2530 100644
--- a/tools/testing/selftests/net/reuseport_bpf.c
+++ b/tools/testing/selftests/net/reuseport_bpf.c
@@ -123,6 +123,8 @@ static void attach_ebpf(int fd, uint16_t mod)
 	if (setsockopt(fd, SOL_SOCKET, SO_ATTACH_REUSEPORT_EBPF, &bpf_fd,
 			sizeof(bpf_fd)))
 		error(1, errno, "failed to set SO_ATTACH_REUSEPORT_EBPF");
+
+	close(bpf_fd);
 }
 
 static void attach_cbpf(int fd, uint16_t mod)
@@ -396,6 +398,9 @@ static void test_filter_without_bind(void)
 int main(void)
 {
 	fprintf(stderr, "---- IPv4 UDP ----\n");
+	/* NOTE: UDP socket lookups traverse a different code path when there
+	 * are > 10 sockets in a group.  Run the bpf test through both paths.
+	 */
 	test_reuseport_ebpf((struct test_params) {
 		.recv_family = AF_INET,
 		.send_family = AF_INET,
@@ -403,6 +408,13 @@ int main(void)
 		.recv_socks = 10,
 		.recv_port = 8000,
 		.send_port_min = 9000});
+	test_reuseport_ebpf((struct test_params) {
+		.recv_family = AF_INET,
+		.send_family = AF_INET,
+		.protocol = SOCK_DGRAM,
+		.recv_socks = 20,
+		.recv_port = 8000,
+		.send_port_min = 9000});
 	test_reuseport_cbpf((struct test_params) {
 		.recv_family = AF_INET,
 		.send_family = AF_INET,
@@ -410,6 +422,13 @@ int main(void)
 		.recv_socks = 10,
 		.recv_port = 8001,
 		.send_port_min = 9020});
+	test_reuseport_cbpf((struct test_params) {
+		.recv_family = AF_INET,
+		.send_family = AF_INET,
+		.protocol = SOCK_DGRAM,
+		.recv_socks = 20,
+		.recv_port = 8001,
+		.send_port_min = 9020});
 	test_extra_filter((struct test_params) {
 		.recv_family = AF_INET,
 		.protocol = SOCK_DGRAM,
@@ -427,6 +446,13 @@ int main(void)
 		.recv_socks = 10,
 		.recv_port = 8003,
 		.send_port_min = 9040});
+	test_reuseport_ebpf((struct test_params) {
+		.recv_family = AF_INET6,
+		.send_family = AF_INET6,
+		.protocol = SOCK_DGRAM,
+		.recv_socks = 20,
+		.recv_port = 8003,
+		.send_port_min = 9040});
 	test_reuseport_cbpf((struct test_params) {
 		.recv_family = AF_INET6,
 		.send_family = AF_INET6,
@@ -434,6 +460,13 @@ int main(void)
 		.recv_socks = 10,
 		.recv_port = 8004,
 		.send_port_min = 9060});
+	test_reuseport_cbpf((struct test_params) {
+		.recv_family = AF_INET6,
+		.send_family = AF_INET6,
+		.protocol = SOCK_DGRAM,
+		.recv_socks = 20,
+		.recv_port = 8004,
+		.send_port_min = 9060});
 	test_extra_filter((struct test_params) {
 		.recv_family = AF_INET6,
 		.protocol = SOCK_DGRAM,
@@ -448,6 +481,13 @@ int main(void)
 		.recv_family = AF_INET6,
 		.send_family = AF_INET,
 		.protocol = SOCK_DGRAM,
+		.recv_socks = 20,
+		.recv_port = 8006,
+		.send_port_min = 9080});
+	test_reuseport_ebpf((struct test_params) {
+		.recv_family = AF_INET6,
+		.send_family = AF_INET,
+		.protocol = SOCK_DGRAM,
 		.recv_socks = 10,
 		.recv_port = 8006,
 		.send_port_min = 9080});
@@ -458,6 +498,13 @@ int main(void)
 		.recv_socks = 10,
 		.recv_port = 8007,
 		.send_port_min = 9100});
+	test_reuseport_cbpf((struct test_params) {
+		.recv_family = AF_INET6,
+		.send_family = AF_INET,
+		.protocol = SOCK_DGRAM,
+		.recv_socks = 20,
+		.recv_port = 8007,
+		.send_port_min = 9100});
 
 
 	test_filter_without_bind();
-- 
2.6.0.rc2.230.g3dd15c0

^ permalink raw reply related	[flat|nested] 10+ messages in thread

* Re: [PATCH net-next] soreuseport: pass skb to secondary UDP socket lookup
  2016-01-05 20:08 [PATCH net-next] soreuseport: pass skb to secondary UDP socket lookup Craig Gallek
@ 2016-01-05 21:58 ` Eric Dumazet
  2016-01-05 22:08 ` [PATCH net-next] udp_diag: fix udp_dump_one() vs SO_REUSEPORT Eric Dumazet
  2016-01-06  6:28 ` [PATCH net-next] soreuseport: pass skb to secondary UDP socket lookup David Miller
  2 siblings, 0 replies; 10+ messages in thread
From: Eric Dumazet @ 2016-01-05 21:58 UTC (permalink / raw)
  To: Craig Gallek; +Cc: netdev, David Miller

On Tue, 2016-01-05 at 15:08 -0500, Craig Gallek wrote:
> From: Craig Gallek <kraig@google.com>
> 
> This socket-lookup path did not pass along the skb in question
> in my original BPF-based socket selection patch.  The skb in the
> udpN_lib_lookup2 path can be used for BPF-based socket selection just
> like it is in the 'traditional' udpN_lib_lookup path.
> 
> udpN_lib_lookup2 kicks in when there are greater than 10 sockets in
> the same hlist slot.  Coincidentally, I chose 10 sockets per
> reuseport group in my functional test, so the lookup2 path was not
> excersised. This adds an additional set of tests with 20 sockets.
> 
> Fixes: 538950a1b752 ("soreuseport: setsockopt SO_ATTACH_REUSEPORT_[CE]BPF")
> Fixes: 3ca8e4029969 ("soreuseport: BPF selection functional test")
> Suggested-by: Eric Dumazet <eric.dumazet@gmail.com>
> Signed-off-by: Craig Gallek <kraig@google.com>
> ---

Great. I'll send the patch to fix udp_dump_one() so that we can remove
the test against skb in fast path.

Acked-by: Eric Dumazet <edumazet@google.com>

^ permalink raw reply	[flat|nested] 10+ messages in thread

* [PATCH net-next] udp_diag: fix udp_dump_one() vs SO_REUSEPORT
  2016-01-05 20:08 [PATCH net-next] soreuseport: pass skb to secondary UDP socket lookup Craig Gallek
  2016-01-05 21:58 ` Eric Dumazet
@ 2016-01-05 22:08 ` Eric Dumazet
  2016-01-05 22:22   ` Eric Dumazet
  2016-01-06  6:28 ` [PATCH net-next] soreuseport: pass skb to secondary UDP socket lookup David Miller
  2 siblings, 1 reply; 10+ messages in thread
From: Eric Dumazet @ 2016-01-05 22:08 UTC (permalink / raw)
  To: Craig Gallek; +Cc: netdev, David Miller

From: Eric Dumazet <edumazet@google.com>

udp_dump_one() uses __udp4_lib_lookup() & __udp6_lib_lookup()
which cannot properly handle the provided cookie when SO_REUSEPORT
is used, as many sockets share the same 4-tuple

Instead, let's use the provided 64bit cookie to uniquely identify
the socket.

This will allow us to remove a check against skb being NULL in
reuseport_select_sock(), as we no longer share __udp4_lib_lookup() &
__udp6_lib_lookup() with packet processing path.

Signed-off-by: Eric Dumazet <edumazet@google.com>
---
 net/ipv4/udp_diag.c |   44 ++++++++++++++++++------------------------
 1 file changed, 19 insertions(+), 25 deletions(-)

diff --git a/net/ipv4/udp_diag.c b/net/ipv4/udp_diag.c
index df1966f3b6ec..09d0e1cffeb4 100644
--- a/net/ipv4/udp_diag.c
+++ b/net/ipv4/udp_diag.c
@@ -35,36 +35,30 @@ static int udp_dump_one(struct udp_table *tbl, struct sk_buff *in_skb,
 			const struct nlmsghdr *nlh,
 			const struct inet_diag_req_v2 *req)
 {
-	int err = -EINVAL;
-	struct sock *sk;
+	struct sock *aux, *sk = NULL;
 	struct sk_buff *rep;
 	struct net *net = sock_net(in_skb->sk);
-
-	if (req->sdiag_family == AF_INET)
-		sk = __udp4_lib_lookup(net,
-				req->id.idiag_src[0], req->id.idiag_sport,
-				req->id.idiag_dst[0], req->id.idiag_dport,
-				req->id.idiag_if, tbl, NULL);
-#if IS_ENABLED(CONFIG_IPV6)
-	else if (req->sdiag_family == AF_INET6)
-		sk = __udp6_lib_lookup(net,
-				(struct in6_addr *)req->id.idiag_src,
-				req->id.idiag_sport,
-				(struct in6_addr *)req->id.idiag_dst,
-				req->id.idiag_dport,
-				req->id.idiag_if, tbl, NULL);
-#endif
-	else
-		goto out_nosk;
-
-	err = -ENOENT;
+	unsigned short hnum = ntohs(req->id.idiag_dport);
+	unsigned int slot = udp_hashfn(net, hnum, tbl->mask);
+	struct udp_hslot *hslot = &tbl->hash[slot];
+	struct hlist_nulls_node *node;
+	int err = -ENOENT;
+
+	spin_lock_bh(&hslot->lock);
+	sk_nulls_for_each(aux, node, &hslot->head) {
+		if (net_eq(sock_net(aux), net) &&
+		    !sock_diag_check_cookie(aux, req->id.idiag_cookie) &&
+		    (req->sdiag_family == AF_UNSPEC ||
+		     req->sdiag_family == aux->sk_family)) {
+			sk = aux;
+			sock_hold(sk);
+			break;
+		}
+	}
+	spin_unlock_bh(&hslot->lock);
 	if (!sk)
 		goto out_nosk;
 
-	err = sock_diag_check_cookie(sk, req->id.idiag_cookie);
-	if (err)
-		goto out;
-
 	err = -ENOMEM;
 	rep = nlmsg_new(sizeof(struct inet_diag_msg) +
 			sizeof(struct inet_diag_meminfo) + 64,

^ permalink raw reply related	[flat|nested] 10+ messages in thread

* Re: [PATCH net-next] udp_diag: fix udp_dump_one() vs SO_REUSEPORT
  2016-01-05 22:08 ` [PATCH net-next] udp_diag: fix udp_dump_one() vs SO_REUSEPORT Eric Dumazet
@ 2016-01-05 22:22   ` Eric Dumazet
  2016-01-05 22:26     ` Craig Gallek
  2016-01-05 22:33     ` [PATCH v2 " Eric Dumazet
  0 siblings, 2 replies; 10+ messages in thread
From: Eric Dumazet @ 2016-01-05 22:22 UTC (permalink / raw)
  To: Craig Gallek; +Cc: netdev, David Miller

On Tue, 2016-01-05 at 14:08 -0800, Eric Dumazet wrote:
> From: Eric Dumazet <edumazet@google.com>
> 
> udp_dump_one() uses __udp4_lib_lookup() & __udp6_lib_lookup()
> which cannot properly handle the provided cookie when SO_REUSEPORT
> is used, as many sockets share the same 4-tuple
> 
> Instead, let's use the provided 64bit cookie to uniquely identify
> the socket.
> 
> This will allow us to remove a check against skb being NULL in
> reuseport_select_sock(), as we no longer share __udp4_lib_lookup() &
> __udp6_lib_lookup() with packet processing path.
> 
> Signed-off-by: Eric Dumazet <edumazet@google.com>
> ---

I'll send a v2, removing the EXPORT_SYMBOL(__udp4_lib_lookup) and add a
static qualifier. (Same for v6)

Also, INET_UDP_DIAG can depends on INET_DIAG instead of "depends on
INET_DIAG && (IPV6 || IPV6=n)"

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH net-next] udp_diag: fix udp_dump_one() vs SO_REUSEPORT
  2016-01-05 22:22   ` Eric Dumazet
@ 2016-01-05 22:26     ` Craig Gallek
  2016-01-05 22:46       ` Eric Dumazet
  2016-01-05 22:33     ` [PATCH v2 " Eric Dumazet
  1 sibling, 1 reply; 10+ messages in thread
From: Craig Gallek @ 2016-01-05 22:26 UTC (permalink / raw)
  To: Eric Dumazet; +Cc: netdev, David Miller

On Tue, Jan 5, 2016 at 5:22 PM, Eric Dumazet <eric.dumazet@gmail.com> wrote:
> On Tue, 2016-01-05 at 14:08 -0800, Eric Dumazet wrote:
>> From: Eric Dumazet <edumazet@google.com>
>>
>> udp_dump_one() uses __udp4_lib_lookup() & __udp6_lib_lookup()
>> which cannot properly handle the provided cookie when SO_REUSEPORT
>> is used, as many sockets share the same 4-tuple
>>
>> Instead, let's use the provided 64bit cookie to uniquely identify
>> the socket.
>>
>> This will allow us to remove a check against skb being NULL in
>> reuseport_select_sock(), as we no longer share __udp4_lib_lookup() &
>> __udp6_lib_lookup() with packet processing path.
>>
>> Signed-off-by: Eric Dumazet <edumazet@google.com>
>> ---
>
> I'll send a v2, removing the EXPORT_SYMBOL(__udp4_lib_lookup) and add a
> static qualifier. (Same for v6)
>
> Also, INET_UDP_DIAG can depends on INET_DIAG instead of "depends on
> INET_DIAG && (IPV6 || IPV6=n)"

Thanks Eric! Still reading through this, but I noticed a couple other
paths as well.  Anything that comes through the exported
udp4_lib_lookup.  Currently xt_TPROXY.c and xt_socket.c.  Though,
these could probably be changed to pass through the skb.  That may be
worth addressing in a separate patch, though...

^ permalink raw reply	[flat|nested] 10+ messages in thread

* [PATCH v2 net-next] udp_diag: fix udp_dump_one() vs SO_REUSEPORT
  2016-01-05 22:22   ` Eric Dumazet
  2016-01-05 22:26     ` Craig Gallek
@ 2016-01-05 22:33     ` Eric Dumazet
  2016-01-05 22:56       ` Craig Gallek
  1 sibling, 1 reply; 10+ messages in thread
From: Eric Dumazet @ 2016-01-05 22:33 UTC (permalink / raw)
  To: Craig Gallek, David Miller; +Cc: netdev

From: Eric Dumazet <edumazet@google.com>

udp_dump_one() uses __udp4_lib_lookup() & __udp6_lib_lookup()
which cannot properly handle the provided cookie when SO_REUSEPORT
is used, as many sockets share the same 4-tuple

Instead, let's use the provided 64bit cookie to uniquely identify
the socket.

This will allow us to remove a check against skb being NULL in
reuseport_select_sock(), as we no longer share __udp4_lib_lookup() &
__udp6_lib_lookup() with packet processing path.

We no longer need to export these two functions, and udp_diag IPv6
dependency can be removed.

Signed-off-by: Eric Dumazet <edumazet@google.com>
---
 include/net/udp.h   |    8 -------
 net/ipv4/Kconfig    |    2 -
 net/ipv4/udp.c      |    8 +++----
 net/ipv4/udp_diag.c |   44 ++++++++++++++++++------------------------
 net/ipv6/udp.c      |    3 --
 5 files changed, 25 insertions(+), 40 deletions(-)

diff --git a/include/net/udp.h b/include/net/udp.h
index 2842541e28e7..1e3bcadc2169 100644
--- a/include/net/udp.h
+++ b/include/net/udp.h
@@ -256,18 +256,10 @@ int udp_lib_setsockopt(struct sock *sk, int level, int optname,
 		       int (*push_pending_frames)(struct sock *));
 struct sock *udp4_lib_lookup(struct net *net, __be32 saddr, __be16 sport,
 			     __be32 daddr, __be16 dport, int dif);
-struct sock *__udp4_lib_lookup(struct net *net, __be32 saddr, __be16 sport,
-			       __be32 daddr, __be16 dport, int dif,
-			       struct udp_table *tbl, struct sk_buff *skb);
 struct sock *udp6_lib_lookup(struct net *net,
 			     const struct in6_addr *saddr, __be16 sport,
 			     const struct in6_addr *daddr, __be16 dport,
 			     int dif);
-struct sock *__udp6_lib_lookup(struct net *net,
-			       const struct in6_addr *saddr, __be16 sport,
-			       const struct in6_addr *daddr, __be16 dport,
-			       int dif, struct udp_table *tbl,
-			       struct sk_buff *skb);
 
 /*
  * 	SNMP statistics for UDP and UDP-Lite
diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig
index c22920525e5d..ff9b81ff733a 100644
--- a/net/ipv4/Kconfig
+++ b/net/ipv4/Kconfig
@@ -430,7 +430,7 @@ config INET_TCP_DIAG
 
 config INET_UDP_DIAG
 	tristate "UDP: socket monitoring interface"
-	depends on INET_DIAG && (IPV6 || IPV6=n)
+	depends on INET_DIAG
 	default n
 	---help---
 	  Support for UDP socket monitoring interface used by the ss tool.
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 835378365f25..6f58a9d86f41 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -551,9 +551,10 @@ found:
 /* UDP is nearly always wildcards out the wazoo, it makes no sense to try
  * harder than this. -DaveM
  */
-struct sock *__udp4_lib_lookup(struct net *net, __be32 saddr,
-		__be16 sport, __be32 daddr, __be16 dport,
-		int dif, struct udp_table *udptable, struct sk_buff *skb)
+static struct sock *__udp4_lib_lookup(struct net *net, __be32 saddr,
+				      __be16 sport, __be32 daddr, __be16 dport,
+				      int dif, struct udp_table *udptable,
+				      struct sk_buff *skb)
 {
 	struct sock *sk, *result;
 	struct hlist_nulls_node *node;
@@ -638,7 +639,6 @@ found:
 	rcu_read_unlock();
 	return result;
 }
-EXPORT_SYMBOL_GPL(__udp4_lib_lookup);
 
 static inline struct sock *__udp4_lib_lookup_skb(struct sk_buff *skb,
 						 __be16 sport, __be16 dport,
diff --git a/net/ipv4/udp_diag.c b/net/ipv4/udp_diag.c
index df1966f3b6ec..09d0e1cffeb4 100644
--- a/net/ipv4/udp_diag.c
+++ b/net/ipv4/udp_diag.c
@@ -35,36 +35,30 @@ static int udp_dump_one(struct udp_table *tbl, struct sk_buff *in_skb,
 			const struct nlmsghdr *nlh,
 			const struct inet_diag_req_v2 *req)
 {
-	int err = -EINVAL;
-	struct sock *sk;
+	struct sock *aux, *sk = NULL;
 	struct sk_buff *rep;
 	struct net *net = sock_net(in_skb->sk);
-
-	if (req->sdiag_family == AF_INET)
-		sk = __udp4_lib_lookup(net,
-				req->id.idiag_src[0], req->id.idiag_sport,
-				req->id.idiag_dst[0], req->id.idiag_dport,
-				req->id.idiag_if, tbl, NULL);
-#if IS_ENABLED(CONFIG_IPV6)
-	else if (req->sdiag_family == AF_INET6)
-		sk = __udp6_lib_lookup(net,
-				(struct in6_addr *)req->id.idiag_src,
-				req->id.idiag_sport,
-				(struct in6_addr *)req->id.idiag_dst,
-				req->id.idiag_dport,
-				req->id.idiag_if, tbl, NULL);
-#endif
-	else
-		goto out_nosk;
-
-	err = -ENOENT;
+	unsigned short hnum = ntohs(req->id.idiag_dport);
+	unsigned int slot = udp_hashfn(net, hnum, tbl->mask);
+	struct udp_hslot *hslot = &tbl->hash[slot];
+	struct hlist_nulls_node *node;
+	int err = -ENOENT;
+
+	spin_lock_bh(&hslot->lock);
+	sk_nulls_for_each(aux, node, &hslot->head) {
+		if (net_eq(sock_net(aux), net) &&
+		    !sock_diag_check_cookie(aux, req->id.idiag_cookie) &&
+		    (req->sdiag_family == AF_UNSPEC ||
+		     req->sdiag_family == aux->sk_family)) {
+			sk = aux;
+			sock_hold(sk);
+			break;
+		}
+	}
+	spin_unlock_bh(&hslot->lock);
 	if (!sk)
 		goto out_nosk;
 
-	err = sock_diag_check_cookie(sk, req->id.idiag_cookie);
-	if (err)
-		goto out;
-
 	err = -ENOMEM;
 	rep = nlmsg_new(sizeof(struct inet_diag_msg) +
 			sizeof(struct inet_diag_meminfo) + 64,
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index 56fcb55fda31..83bfd64cf37b 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -307,7 +307,7 @@ found:
 	return result;
 }
 
-struct sock *__udp6_lib_lookup(struct net *net,
+static struct sock *__udp6_lib_lookup(struct net *net,
 				      const struct in6_addr *saddr, __be16 sport,
 				      const struct in6_addr *daddr, __be16 dport,
 				      int dif, struct udp_table *udptable,
@@ -395,7 +395,6 @@ found:
 	rcu_read_unlock();
 	return result;
 }
-EXPORT_SYMBOL_GPL(__udp6_lib_lookup);
 
 static struct sock *__udp6_lib_lookup_skb(struct sk_buff *skb,
 					  __be16 sport, __be16 dport,

^ permalink raw reply related	[flat|nested] 10+ messages in thread

* Re: [PATCH net-next] udp_diag: fix udp_dump_one() vs SO_REUSEPORT
  2016-01-05 22:26     ` Craig Gallek
@ 2016-01-05 22:46       ` Eric Dumazet
  0 siblings, 0 replies; 10+ messages in thread
From: Eric Dumazet @ 2016-01-05 22:46 UTC (permalink / raw)
  To: Craig Gallek; +Cc: netdev, David Miller

On Tue, 2016-01-05 at 17:26 -0500, Craig Gallek wrote:

> Thanks Eric! Still reading through this, but I noticed a couple other
> paths as well.  Anything that comes through the exported
> udp4_lib_lookup.  Currently xt_TPROXY.c and xt_socket.c.  Though,
> these could probably be changed to pass through the skb.  That may be
> worth addressing in a separate patch, though...

Right, this will be done on a separate patch.

Note that I did not change reuseport_select_sock() yet ;)

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH v2 net-next] udp_diag: fix udp_dump_one() vs SO_REUSEPORT
  2016-01-05 22:33     ` [PATCH v2 " Eric Dumazet
@ 2016-01-05 22:56       ` Craig Gallek
  2016-01-06  0:08         ` Eric Dumazet
  0 siblings, 1 reply; 10+ messages in thread
From: Craig Gallek @ 2016-01-05 22:56 UTC (permalink / raw)
  To: Eric Dumazet; +Cc: David Miller, netdev

On Tue, Jan 5, 2016 at 5:33 PM, Eric Dumazet <eric.dumazet@gmail.com> wrote:
> From: Eric Dumazet <edumazet@google.com>
> +       unsigned short hnum = ntohs(req->id.idiag_dport);
> +       unsigned int slot = udp_hashfn(net, hnum, tbl->mask);
> +       struct udp_hslot *hslot = &tbl->hash[slot];
> +       struct hlist_nulls_node *node;
> +       int err = -ENOENT;
> +
> +       spin_lock_bh(&hslot->lock);
> +       sk_nulls_for_each(aux, node, &hslot->head) {
> +               if (net_eq(sock_net(aux), net) &&
> +                   !sock_diag_check_cookie(aux, req->id.idiag_cookie) &&
> +                   (req->sdiag_family == AF_UNSPEC ||
> +                    req->sdiag_family == aux->sk_family)) {
> +                       sk = aux;
> +                       sock_hold(sk);
> +                       break;
> +               }
> +       }
> +       spin_unlock_bh(&hslot->lock);
>         if (!sk)
>                 goto out_nosk;
>
> -       err = sock_diag_check_cookie(sk, req->id.idiag_cookie);
> -       if (err)
> -               goto out;
> -
sock_diag_check_cookie will return successfully if the cookie in the
request is INET_DIAG_NOCOOKIE before even considering the socket.  I
think this could cause this loop to prematurely terminate.

Also, previously this would return ESTALE on a cookie error, now it
returns ENOENT.  Not sure if this is a big deal or not, though...

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH v2 net-next] udp_diag: fix udp_dump_one() vs SO_REUSEPORT
  2016-01-05 22:56       ` Craig Gallek
@ 2016-01-06  0:08         ` Eric Dumazet
  0 siblings, 0 replies; 10+ messages in thread
From: Eric Dumazet @ 2016-01-06  0:08 UTC (permalink / raw)
  To: Craig Gallek; +Cc: David Miller, netdev

On Tue, 2016-01-05 at 17:56 -0500, Craig Gallek wrote:
> On Tue, Jan 5, 2016 at 5:33 PM, Eric Dumazet <eric.dumazet@gmail.com> wrote:
> > From: Eric Dumazet <edumazet@google.com>
> > +       unsigned short hnum = ntohs(req->id.idiag_dport);
> > +       unsigned int slot = udp_hashfn(net, hnum, tbl->mask);
> > +       struct udp_hslot *hslot = &tbl->hash[slot];
> > +       struct hlist_nulls_node *node;
> > +       int err = -ENOENT;
> > +
> > +       spin_lock_bh(&hslot->lock);
> > +       sk_nulls_for_each(aux, node, &hslot->head) {
> > +               if (net_eq(sock_net(aux), net) &&
> > +                   !sock_diag_check_cookie(aux, req->id.idiag_cookie) &&
> > +                   (req->sdiag_family == AF_UNSPEC ||
> > +                    req->sdiag_family == aux->sk_family)) {
> > +                       sk = aux;
> > +                       sock_hold(sk);
> > +                       break;
> > +               }
> > +       }
> > +       spin_unlock_bh(&hslot->lock);
> >         if (!sk)
> >                 goto out_nosk;
> >
> > -       err = sock_diag_check_cookie(sk, req->id.idiag_cookie);
> > -       if (err)
> > -               goto out;
> > -
> sock_diag_check_cookie will return successfully if the cookie in the
> request is INET_DIAG_NOCOOKIE before even considering the socket.  I
> think this could cause this loop to prematurely terminate.

Hmmm...

I never considered passing a INET_DIAG_NOCOOKIE there.

(Actually I am not sure if this interface is used. It is not in
iproute2)


> 
> Also, previously this would return ESTALE on a cookie error, now it
> returns ENOENT.  Not sure if this is a big deal or not, though...

Probably not a big deal.

I'll send a V3, thanks.

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH net-next] soreuseport: pass skb to secondary UDP socket lookup
  2016-01-05 20:08 [PATCH net-next] soreuseport: pass skb to secondary UDP socket lookup Craig Gallek
  2016-01-05 21:58 ` Eric Dumazet
  2016-01-05 22:08 ` [PATCH net-next] udp_diag: fix udp_dump_one() vs SO_REUSEPORT Eric Dumazet
@ 2016-01-06  6:28 ` David Miller
  2 siblings, 0 replies; 10+ messages in thread
From: David Miller @ 2016-01-06  6:28 UTC (permalink / raw)
  To: kraigatgoog; +Cc: netdev

From: Craig Gallek <kraigatgoog@gmail.com>
Date: Tue,  5 Jan 2016 15:08:07 -0500

> From: Craig Gallek <kraig@google.com>
> 
> This socket-lookup path did not pass along the skb in question
> in my original BPF-based socket selection patch.  The skb in the
> udpN_lib_lookup2 path can be used for BPF-based socket selection just
> like it is in the 'traditional' udpN_lib_lookup path.
> 
> udpN_lib_lookup2 kicks in when there are greater than 10 sockets in
> the same hlist slot.  Coincidentally, I chose 10 sockets per
> reuseport group in my functional test, so the lookup2 path was not
> excersised. This adds an additional set of tests with 20 sockets.
> 
> Fixes: 538950a1b752 ("soreuseport: setsockopt SO_ATTACH_REUSEPORT_[CE]BPF")
> Fixes: 3ca8e4029969 ("soreuseport: BPF selection functional test")
> Suggested-by: Eric Dumazet <eric.dumazet@gmail.com>
> Signed-off-by: Craig Gallek <kraig@google.com>

Applied, thanks Craig.

^ permalink raw reply	[flat|nested] 10+ messages in thread

end of thread, other threads:[~2016-01-06  6:28 UTC | newest]

Thread overview: 10+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2016-01-05 20:08 [PATCH net-next] soreuseport: pass skb to secondary UDP socket lookup Craig Gallek
2016-01-05 21:58 ` Eric Dumazet
2016-01-05 22:08 ` [PATCH net-next] udp_diag: fix udp_dump_one() vs SO_REUSEPORT Eric Dumazet
2016-01-05 22:22   ` Eric Dumazet
2016-01-05 22:26     ` Craig Gallek
2016-01-05 22:46       ` Eric Dumazet
2016-01-05 22:33     ` [PATCH v2 " Eric Dumazet
2016-01-05 22:56       ` Craig Gallek
2016-01-06  0:08         ` Eric Dumazet
2016-01-06  6:28 ` [PATCH net-next] soreuseport: pass skb to secondary UDP socket lookup David Miller

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).