* [PATCH net-next] soreuseport: pass skb to secondary UDP socket lookup
@ 2016-01-05 20:08 Craig Gallek
2016-01-05 21:58 ` Eric Dumazet
` (2 more replies)
0 siblings, 3 replies; 10+ messages in thread
From: Craig Gallek @ 2016-01-05 20:08 UTC (permalink / raw)
To: netdev, David Miller
From: Craig Gallek <kraig@google.com>
This socket-lookup path did not pass along the skb in question
in my original BPF-based socket selection patch. The skb in the
udpN_lib_lookup2 path can be used for BPF-based socket selection just
like it is in the 'traditional' udpN_lib_lookup path.
udpN_lib_lookup2 kicks in when there are greater than 10 sockets in
the same hlist slot. Coincidentally, I chose 10 sockets per
reuseport group in my functional test, so the lookup2 path was not
excersised. This adds an additional set of tests with 20 sockets.
Fixes: 538950a1b752 ("soreuseport: setsockopt SO_ATTACH_REUSEPORT_[CE]BPF")
Fixes: 3ca8e4029969 ("soreuseport: BPF selection functional test")
Suggested-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: Craig Gallek <kraig@google.com>
---
net/ipv4/udp.c | 10 +++---
net/ipv6/udp.c | 10 +++---
tools/testing/selftests/net/reuseport_bpf.c | 47 +++++++++++++++++++++++++++++
3 files changed, 59 insertions(+), 8 deletions(-)
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 835378365f25..3a66731e3af6 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -493,7 +493,8 @@ static u32 udp_ehashfn(const struct net *net, const __be32 laddr,
static struct sock *udp4_lib_lookup2(struct net *net,
__be32 saddr, __be16 sport,
__be32 daddr, unsigned int hnum, int dif,
- struct udp_hslot *hslot2, unsigned int slot2)
+ struct udp_hslot *hslot2, unsigned int slot2,
+ struct sk_buff *skb)
{
struct sock *sk, *result;
struct hlist_nulls_node *node;
@@ -514,7 +515,8 @@ begin:
struct sock *sk2;
hash = udp_ehashfn(net, daddr, hnum,
saddr, sport);
- sk2 = reuseport_select_sock(sk, hash, NULL, 0);
+ sk2 = reuseport_select_sock(sk, hash, skb,
+ sizeof(struct udphdr));
if (sk2) {
result = sk2;
goto found;
@@ -573,7 +575,7 @@ struct sock *__udp4_lib_lookup(struct net *net, __be32 saddr,
result = udp4_lib_lookup2(net, saddr, sport,
daddr, hnum, dif,
- hslot2, slot2);
+ hslot2, slot2, skb);
if (!result) {
hash2 = udp4_portaddr_hash(net, htonl(INADDR_ANY), hnum);
slot2 = hash2 & udptable->mask;
@@ -583,7 +585,7 @@ struct sock *__udp4_lib_lookup(struct net *net, __be32 saddr,
result = udp4_lib_lookup2(net, saddr, sport,
htonl(INADDR_ANY), hnum, dif,
- hslot2, slot2);
+ hslot2, slot2, skb);
}
rcu_read_unlock();
return result;
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index 56fcb55fda31..5d2c2afffe7b 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -251,7 +251,8 @@ static inline int compute_score2(struct sock *sk, struct net *net,
static struct sock *udp6_lib_lookup2(struct net *net,
const struct in6_addr *saddr, __be16 sport,
const struct in6_addr *daddr, unsigned int hnum, int dif,
- struct udp_hslot *hslot2, unsigned int slot2)
+ struct udp_hslot *hslot2, unsigned int slot2,
+ struct sk_buff *skb)
{
struct sock *sk, *result;
struct hlist_nulls_node *node;
@@ -272,7 +273,8 @@ begin:
struct sock *sk2;
hash = udp6_ehashfn(net, daddr, hnum,
saddr, sport);
- sk2 = reuseport_select_sock(sk, hash, NULL, 0);
+ sk2 = reuseport_select_sock(sk, hash, skb,
+ sizeof(struct udphdr));
if (sk2) {
result = sk2;
goto found;
@@ -331,7 +333,7 @@ struct sock *__udp6_lib_lookup(struct net *net,
result = udp6_lib_lookup2(net, saddr, sport,
daddr, hnum, dif,
- hslot2, slot2);
+ hslot2, slot2, skb);
if (!result) {
hash2 = udp6_portaddr_hash(net, &in6addr_any, hnum);
slot2 = hash2 & udptable->mask;
@@ -341,7 +343,7 @@ struct sock *__udp6_lib_lookup(struct net *net,
result = udp6_lib_lookup2(net, saddr, sport,
&in6addr_any, hnum, dif,
- hslot2, slot2);
+ hslot2, slot2, skb);
}
rcu_read_unlock();
return result;
diff --git a/tools/testing/selftests/net/reuseport_bpf.c b/tools/testing/selftests/net/reuseport_bpf.c
index 74ff09988958..bec1b5dd2530 100644
--- a/tools/testing/selftests/net/reuseport_bpf.c
+++ b/tools/testing/selftests/net/reuseport_bpf.c
@@ -123,6 +123,8 @@ static void attach_ebpf(int fd, uint16_t mod)
if (setsockopt(fd, SOL_SOCKET, SO_ATTACH_REUSEPORT_EBPF, &bpf_fd,
sizeof(bpf_fd)))
error(1, errno, "failed to set SO_ATTACH_REUSEPORT_EBPF");
+
+ close(bpf_fd);
}
static void attach_cbpf(int fd, uint16_t mod)
@@ -396,6 +398,9 @@ static void test_filter_without_bind(void)
int main(void)
{
fprintf(stderr, "---- IPv4 UDP ----\n");
+ /* NOTE: UDP socket lookups traverse a different code path when there
+ * are > 10 sockets in a group. Run the bpf test through both paths.
+ */
test_reuseport_ebpf((struct test_params) {
.recv_family = AF_INET,
.send_family = AF_INET,
@@ -403,6 +408,13 @@ int main(void)
.recv_socks = 10,
.recv_port = 8000,
.send_port_min = 9000});
+ test_reuseport_ebpf((struct test_params) {
+ .recv_family = AF_INET,
+ .send_family = AF_INET,
+ .protocol = SOCK_DGRAM,
+ .recv_socks = 20,
+ .recv_port = 8000,
+ .send_port_min = 9000});
test_reuseport_cbpf((struct test_params) {
.recv_family = AF_INET,
.send_family = AF_INET,
@@ -410,6 +422,13 @@ int main(void)
.recv_socks = 10,
.recv_port = 8001,
.send_port_min = 9020});
+ test_reuseport_cbpf((struct test_params) {
+ .recv_family = AF_INET,
+ .send_family = AF_INET,
+ .protocol = SOCK_DGRAM,
+ .recv_socks = 20,
+ .recv_port = 8001,
+ .send_port_min = 9020});
test_extra_filter((struct test_params) {
.recv_family = AF_INET,
.protocol = SOCK_DGRAM,
@@ -427,6 +446,13 @@ int main(void)
.recv_socks = 10,
.recv_port = 8003,
.send_port_min = 9040});
+ test_reuseport_ebpf((struct test_params) {
+ .recv_family = AF_INET6,
+ .send_family = AF_INET6,
+ .protocol = SOCK_DGRAM,
+ .recv_socks = 20,
+ .recv_port = 8003,
+ .send_port_min = 9040});
test_reuseport_cbpf((struct test_params) {
.recv_family = AF_INET6,
.send_family = AF_INET6,
@@ -434,6 +460,13 @@ int main(void)
.recv_socks = 10,
.recv_port = 8004,
.send_port_min = 9060});
+ test_reuseport_cbpf((struct test_params) {
+ .recv_family = AF_INET6,
+ .send_family = AF_INET6,
+ .protocol = SOCK_DGRAM,
+ .recv_socks = 20,
+ .recv_port = 8004,
+ .send_port_min = 9060});
test_extra_filter((struct test_params) {
.recv_family = AF_INET6,
.protocol = SOCK_DGRAM,
@@ -448,6 +481,13 @@ int main(void)
.recv_family = AF_INET6,
.send_family = AF_INET,
.protocol = SOCK_DGRAM,
+ .recv_socks = 20,
+ .recv_port = 8006,
+ .send_port_min = 9080});
+ test_reuseport_ebpf((struct test_params) {
+ .recv_family = AF_INET6,
+ .send_family = AF_INET,
+ .protocol = SOCK_DGRAM,
.recv_socks = 10,
.recv_port = 8006,
.send_port_min = 9080});
@@ -458,6 +498,13 @@ int main(void)
.recv_socks = 10,
.recv_port = 8007,
.send_port_min = 9100});
+ test_reuseport_cbpf((struct test_params) {
+ .recv_family = AF_INET6,
+ .send_family = AF_INET,
+ .protocol = SOCK_DGRAM,
+ .recv_socks = 20,
+ .recv_port = 8007,
+ .send_port_min = 9100});
test_filter_without_bind();
--
2.6.0.rc2.230.g3dd15c0
^ permalink raw reply related [flat|nested] 10+ messages in thread
* Re: [PATCH net-next] soreuseport: pass skb to secondary UDP socket lookup
2016-01-05 20:08 [PATCH net-next] soreuseport: pass skb to secondary UDP socket lookup Craig Gallek
@ 2016-01-05 21:58 ` Eric Dumazet
2016-01-05 22:08 ` [PATCH net-next] udp_diag: fix udp_dump_one() vs SO_REUSEPORT Eric Dumazet
2016-01-06 6:28 ` [PATCH net-next] soreuseport: pass skb to secondary UDP socket lookup David Miller
2 siblings, 0 replies; 10+ messages in thread
From: Eric Dumazet @ 2016-01-05 21:58 UTC (permalink / raw)
To: Craig Gallek; +Cc: netdev, David Miller
On Tue, 2016-01-05 at 15:08 -0500, Craig Gallek wrote:
> From: Craig Gallek <kraig@google.com>
>
> This socket-lookup path did not pass along the skb in question
> in my original BPF-based socket selection patch. The skb in the
> udpN_lib_lookup2 path can be used for BPF-based socket selection just
> like it is in the 'traditional' udpN_lib_lookup path.
>
> udpN_lib_lookup2 kicks in when there are greater than 10 sockets in
> the same hlist slot. Coincidentally, I chose 10 sockets per
> reuseport group in my functional test, so the lookup2 path was not
> excersised. This adds an additional set of tests with 20 sockets.
>
> Fixes: 538950a1b752 ("soreuseport: setsockopt SO_ATTACH_REUSEPORT_[CE]BPF")
> Fixes: 3ca8e4029969 ("soreuseport: BPF selection functional test")
> Suggested-by: Eric Dumazet <eric.dumazet@gmail.com>
> Signed-off-by: Craig Gallek <kraig@google.com>
> ---
Great. I'll send the patch to fix udp_dump_one() so that we can remove
the test against skb in fast path.
Acked-by: Eric Dumazet <edumazet@google.com>
^ permalink raw reply [flat|nested] 10+ messages in thread
* [PATCH net-next] udp_diag: fix udp_dump_one() vs SO_REUSEPORT
2016-01-05 20:08 [PATCH net-next] soreuseport: pass skb to secondary UDP socket lookup Craig Gallek
2016-01-05 21:58 ` Eric Dumazet
@ 2016-01-05 22:08 ` Eric Dumazet
2016-01-05 22:22 ` Eric Dumazet
2016-01-06 6:28 ` [PATCH net-next] soreuseport: pass skb to secondary UDP socket lookup David Miller
2 siblings, 1 reply; 10+ messages in thread
From: Eric Dumazet @ 2016-01-05 22:08 UTC (permalink / raw)
To: Craig Gallek; +Cc: netdev, David Miller
From: Eric Dumazet <edumazet@google.com>
udp_dump_one() uses __udp4_lib_lookup() & __udp6_lib_lookup()
which cannot properly handle the provided cookie when SO_REUSEPORT
is used, as many sockets share the same 4-tuple
Instead, let's use the provided 64bit cookie to uniquely identify
the socket.
This will allow us to remove a check against skb being NULL in
reuseport_select_sock(), as we no longer share __udp4_lib_lookup() &
__udp6_lib_lookup() with packet processing path.
Signed-off-by: Eric Dumazet <edumazet@google.com>
---
net/ipv4/udp_diag.c | 44 ++++++++++++++++++------------------------
1 file changed, 19 insertions(+), 25 deletions(-)
diff --git a/net/ipv4/udp_diag.c b/net/ipv4/udp_diag.c
index df1966f3b6ec..09d0e1cffeb4 100644
--- a/net/ipv4/udp_diag.c
+++ b/net/ipv4/udp_diag.c
@@ -35,36 +35,30 @@ static int udp_dump_one(struct udp_table *tbl, struct sk_buff *in_skb,
const struct nlmsghdr *nlh,
const struct inet_diag_req_v2 *req)
{
- int err = -EINVAL;
- struct sock *sk;
+ struct sock *aux, *sk = NULL;
struct sk_buff *rep;
struct net *net = sock_net(in_skb->sk);
-
- if (req->sdiag_family == AF_INET)
- sk = __udp4_lib_lookup(net,
- req->id.idiag_src[0], req->id.idiag_sport,
- req->id.idiag_dst[0], req->id.idiag_dport,
- req->id.idiag_if, tbl, NULL);
-#if IS_ENABLED(CONFIG_IPV6)
- else if (req->sdiag_family == AF_INET6)
- sk = __udp6_lib_lookup(net,
- (struct in6_addr *)req->id.idiag_src,
- req->id.idiag_sport,
- (struct in6_addr *)req->id.idiag_dst,
- req->id.idiag_dport,
- req->id.idiag_if, tbl, NULL);
-#endif
- else
- goto out_nosk;
-
- err = -ENOENT;
+ unsigned short hnum = ntohs(req->id.idiag_dport);
+ unsigned int slot = udp_hashfn(net, hnum, tbl->mask);
+ struct udp_hslot *hslot = &tbl->hash[slot];
+ struct hlist_nulls_node *node;
+ int err = -ENOENT;
+
+ spin_lock_bh(&hslot->lock);
+ sk_nulls_for_each(aux, node, &hslot->head) {
+ if (net_eq(sock_net(aux), net) &&
+ !sock_diag_check_cookie(aux, req->id.idiag_cookie) &&
+ (req->sdiag_family == AF_UNSPEC ||
+ req->sdiag_family == aux->sk_family)) {
+ sk = aux;
+ sock_hold(sk);
+ break;
+ }
+ }
+ spin_unlock_bh(&hslot->lock);
if (!sk)
goto out_nosk;
- err = sock_diag_check_cookie(sk, req->id.idiag_cookie);
- if (err)
- goto out;
-
err = -ENOMEM;
rep = nlmsg_new(sizeof(struct inet_diag_msg) +
sizeof(struct inet_diag_meminfo) + 64,
^ permalink raw reply related [flat|nested] 10+ messages in thread
* Re: [PATCH net-next] udp_diag: fix udp_dump_one() vs SO_REUSEPORT
2016-01-05 22:08 ` [PATCH net-next] udp_diag: fix udp_dump_one() vs SO_REUSEPORT Eric Dumazet
@ 2016-01-05 22:22 ` Eric Dumazet
2016-01-05 22:26 ` Craig Gallek
2016-01-05 22:33 ` [PATCH v2 " Eric Dumazet
0 siblings, 2 replies; 10+ messages in thread
From: Eric Dumazet @ 2016-01-05 22:22 UTC (permalink / raw)
To: Craig Gallek; +Cc: netdev, David Miller
On Tue, 2016-01-05 at 14:08 -0800, Eric Dumazet wrote:
> From: Eric Dumazet <edumazet@google.com>
>
> udp_dump_one() uses __udp4_lib_lookup() & __udp6_lib_lookup()
> which cannot properly handle the provided cookie when SO_REUSEPORT
> is used, as many sockets share the same 4-tuple
>
> Instead, let's use the provided 64bit cookie to uniquely identify
> the socket.
>
> This will allow us to remove a check against skb being NULL in
> reuseport_select_sock(), as we no longer share __udp4_lib_lookup() &
> __udp6_lib_lookup() with packet processing path.
>
> Signed-off-by: Eric Dumazet <edumazet@google.com>
> ---
I'll send a v2, removing the EXPORT_SYMBOL(__udp4_lib_lookup) and add a
static qualifier. (Same for v6)
Also, INET_UDP_DIAG can depends on INET_DIAG instead of "depends on
INET_DIAG && (IPV6 || IPV6=n)"
^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [PATCH net-next] udp_diag: fix udp_dump_one() vs SO_REUSEPORT
2016-01-05 22:22 ` Eric Dumazet
@ 2016-01-05 22:26 ` Craig Gallek
2016-01-05 22:46 ` Eric Dumazet
2016-01-05 22:33 ` [PATCH v2 " Eric Dumazet
1 sibling, 1 reply; 10+ messages in thread
From: Craig Gallek @ 2016-01-05 22:26 UTC (permalink / raw)
To: Eric Dumazet; +Cc: netdev, David Miller
On Tue, Jan 5, 2016 at 5:22 PM, Eric Dumazet <eric.dumazet@gmail.com> wrote:
> On Tue, 2016-01-05 at 14:08 -0800, Eric Dumazet wrote:
>> From: Eric Dumazet <edumazet@google.com>
>>
>> udp_dump_one() uses __udp4_lib_lookup() & __udp6_lib_lookup()
>> which cannot properly handle the provided cookie when SO_REUSEPORT
>> is used, as many sockets share the same 4-tuple
>>
>> Instead, let's use the provided 64bit cookie to uniquely identify
>> the socket.
>>
>> This will allow us to remove a check against skb being NULL in
>> reuseport_select_sock(), as we no longer share __udp4_lib_lookup() &
>> __udp6_lib_lookup() with packet processing path.
>>
>> Signed-off-by: Eric Dumazet <edumazet@google.com>
>> ---
>
> I'll send a v2, removing the EXPORT_SYMBOL(__udp4_lib_lookup) and add a
> static qualifier. (Same for v6)
>
> Also, INET_UDP_DIAG can depends on INET_DIAG instead of "depends on
> INET_DIAG && (IPV6 || IPV6=n)"
Thanks Eric! Still reading through this, but I noticed a couple other
paths as well. Anything that comes through the exported
udp4_lib_lookup. Currently xt_TPROXY.c and xt_socket.c. Though,
these could probably be changed to pass through the skb. That may be
worth addressing in a separate patch, though...
^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [PATCH net-next] udp_diag: fix udp_dump_one() vs SO_REUSEPORT
2016-01-05 22:26 ` Craig Gallek
@ 2016-01-05 22:46 ` Eric Dumazet
0 siblings, 0 replies; 10+ messages in thread
From: Eric Dumazet @ 2016-01-05 22:46 UTC (permalink / raw)
To: Craig Gallek; +Cc: netdev, David Miller
On Tue, 2016-01-05 at 17:26 -0500, Craig Gallek wrote:
> Thanks Eric! Still reading through this, but I noticed a couple other
> paths as well. Anything that comes through the exported
> udp4_lib_lookup. Currently xt_TPROXY.c and xt_socket.c. Though,
> these could probably be changed to pass through the skb. That may be
> worth addressing in a separate patch, though...
Right, this will be done on a separate patch.
Note that I did not change reuseport_select_sock() yet ;)
^ permalink raw reply [flat|nested] 10+ messages in thread
* [PATCH v2 net-next] udp_diag: fix udp_dump_one() vs SO_REUSEPORT
2016-01-05 22:22 ` Eric Dumazet
2016-01-05 22:26 ` Craig Gallek
@ 2016-01-05 22:33 ` Eric Dumazet
2016-01-05 22:56 ` Craig Gallek
1 sibling, 1 reply; 10+ messages in thread
From: Eric Dumazet @ 2016-01-05 22:33 UTC (permalink / raw)
To: Craig Gallek, David Miller; +Cc: netdev
From: Eric Dumazet <edumazet@google.com>
udp_dump_one() uses __udp4_lib_lookup() & __udp6_lib_lookup()
which cannot properly handle the provided cookie when SO_REUSEPORT
is used, as many sockets share the same 4-tuple
Instead, let's use the provided 64bit cookie to uniquely identify
the socket.
This will allow us to remove a check against skb being NULL in
reuseport_select_sock(), as we no longer share __udp4_lib_lookup() &
__udp6_lib_lookup() with packet processing path.
We no longer need to export these two functions, and udp_diag IPv6
dependency can be removed.
Signed-off-by: Eric Dumazet <edumazet@google.com>
---
include/net/udp.h | 8 -------
net/ipv4/Kconfig | 2 -
net/ipv4/udp.c | 8 +++----
net/ipv4/udp_diag.c | 44 ++++++++++++++++++------------------------
net/ipv6/udp.c | 3 --
5 files changed, 25 insertions(+), 40 deletions(-)
diff --git a/include/net/udp.h b/include/net/udp.h
index 2842541e28e7..1e3bcadc2169 100644
--- a/include/net/udp.h
+++ b/include/net/udp.h
@@ -256,18 +256,10 @@ int udp_lib_setsockopt(struct sock *sk, int level, int optname,
int (*push_pending_frames)(struct sock *));
struct sock *udp4_lib_lookup(struct net *net, __be32 saddr, __be16 sport,
__be32 daddr, __be16 dport, int dif);
-struct sock *__udp4_lib_lookup(struct net *net, __be32 saddr, __be16 sport,
- __be32 daddr, __be16 dport, int dif,
- struct udp_table *tbl, struct sk_buff *skb);
struct sock *udp6_lib_lookup(struct net *net,
const struct in6_addr *saddr, __be16 sport,
const struct in6_addr *daddr, __be16 dport,
int dif);
-struct sock *__udp6_lib_lookup(struct net *net,
- const struct in6_addr *saddr, __be16 sport,
- const struct in6_addr *daddr, __be16 dport,
- int dif, struct udp_table *tbl,
- struct sk_buff *skb);
/*
* SNMP statistics for UDP and UDP-Lite
diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig
index c22920525e5d..ff9b81ff733a 100644
--- a/net/ipv4/Kconfig
+++ b/net/ipv4/Kconfig
@@ -430,7 +430,7 @@ config INET_TCP_DIAG
config INET_UDP_DIAG
tristate "UDP: socket monitoring interface"
- depends on INET_DIAG && (IPV6 || IPV6=n)
+ depends on INET_DIAG
default n
---help---
Support for UDP socket monitoring interface used by the ss tool.
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 835378365f25..6f58a9d86f41 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -551,9 +551,10 @@ found:
/* UDP is nearly always wildcards out the wazoo, it makes no sense to try
* harder than this. -DaveM
*/
-struct sock *__udp4_lib_lookup(struct net *net, __be32 saddr,
- __be16 sport, __be32 daddr, __be16 dport,
- int dif, struct udp_table *udptable, struct sk_buff *skb)
+static struct sock *__udp4_lib_lookup(struct net *net, __be32 saddr,
+ __be16 sport, __be32 daddr, __be16 dport,
+ int dif, struct udp_table *udptable,
+ struct sk_buff *skb)
{
struct sock *sk, *result;
struct hlist_nulls_node *node;
@@ -638,7 +639,6 @@ found:
rcu_read_unlock();
return result;
}
-EXPORT_SYMBOL_GPL(__udp4_lib_lookup);
static inline struct sock *__udp4_lib_lookup_skb(struct sk_buff *skb,
__be16 sport, __be16 dport,
diff --git a/net/ipv4/udp_diag.c b/net/ipv4/udp_diag.c
index df1966f3b6ec..09d0e1cffeb4 100644
--- a/net/ipv4/udp_diag.c
+++ b/net/ipv4/udp_diag.c
@@ -35,36 +35,30 @@ static int udp_dump_one(struct udp_table *tbl, struct sk_buff *in_skb,
const struct nlmsghdr *nlh,
const struct inet_diag_req_v2 *req)
{
- int err = -EINVAL;
- struct sock *sk;
+ struct sock *aux, *sk = NULL;
struct sk_buff *rep;
struct net *net = sock_net(in_skb->sk);
-
- if (req->sdiag_family == AF_INET)
- sk = __udp4_lib_lookup(net,
- req->id.idiag_src[0], req->id.idiag_sport,
- req->id.idiag_dst[0], req->id.idiag_dport,
- req->id.idiag_if, tbl, NULL);
-#if IS_ENABLED(CONFIG_IPV6)
- else if (req->sdiag_family == AF_INET6)
- sk = __udp6_lib_lookup(net,
- (struct in6_addr *)req->id.idiag_src,
- req->id.idiag_sport,
- (struct in6_addr *)req->id.idiag_dst,
- req->id.idiag_dport,
- req->id.idiag_if, tbl, NULL);
-#endif
- else
- goto out_nosk;
-
- err = -ENOENT;
+ unsigned short hnum = ntohs(req->id.idiag_dport);
+ unsigned int slot = udp_hashfn(net, hnum, tbl->mask);
+ struct udp_hslot *hslot = &tbl->hash[slot];
+ struct hlist_nulls_node *node;
+ int err = -ENOENT;
+
+ spin_lock_bh(&hslot->lock);
+ sk_nulls_for_each(aux, node, &hslot->head) {
+ if (net_eq(sock_net(aux), net) &&
+ !sock_diag_check_cookie(aux, req->id.idiag_cookie) &&
+ (req->sdiag_family == AF_UNSPEC ||
+ req->sdiag_family == aux->sk_family)) {
+ sk = aux;
+ sock_hold(sk);
+ break;
+ }
+ }
+ spin_unlock_bh(&hslot->lock);
if (!sk)
goto out_nosk;
- err = sock_diag_check_cookie(sk, req->id.idiag_cookie);
- if (err)
- goto out;
-
err = -ENOMEM;
rep = nlmsg_new(sizeof(struct inet_diag_msg) +
sizeof(struct inet_diag_meminfo) + 64,
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index 56fcb55fda31..83bfd64cf37b 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -307,7 +307,7 @@ found:
return result;
}
-struct sock *__udp6_lib_lookup(struct net *net,
+static struct sock *__udp6_lib_lookup(struct net *net,
const struct in6_addr *saddr, __be16 sport,
const struct in6_addr *daddr, __be16 dport,
int dif, struct udp_table *udptable,
@@ -395,7 +395,6 @@ found:
rcu_read_unlock();
return result;
}
-EXPORT_SYMBOL_GPL(__udp6_lib_lookup);
static struct sock *__udp6_lib_lookup_skb(struct sk_buff *skb,
__be16 sport, __be16 dport,
^ permalink raw reply related [flat|nested] 10+ messages in thread
* Re: [PATCH v2 net-next] udp_diag: fix udp_dump_one() vs SO_REUSEPORT
2016-01-05 22:33 ` [PATCH v2 " Eric Dumazet
@ 2016-01-05 22:56 ` Craig Gallek
2016-01-06 0:08 ` Eric Dumazet
0 siblings, 1 reply; 10+ messages in thread
From: Craig Gallek @ 2016-01-05 22:56 UTC (permalink / raw)
To: Eric Dumazet; +Cc: David Miller, netdev
On Tue, Jan 5, 2016 at 5:33 PM, Eric Dumazet <eric.dumazet@gmail.com> wrote:
> From: Eric Dumazet <edumazet@google.com>
> + unsigned short hnum = ntohs(req->id.idiag_dport);
> + unsigned int slot = udp_hashfn(net, hnum, tbl->mask);
> + struct udp_hslot *hslot = &tbl->hash[slot];
> + struct hlist_nulls_node *node;
> + int err = -ENOENT;
> +
> + spin_lock_bh(&hslot->lock);
> + sk_nulls_for_each(aux, node, &hslot->head) {
> + if (net_eq(sock_net(aux), net) &&
> + !sock_diag_check_cookie(aux, req->id.idiag_cookie) &&
> + (req->sdiag_family == AF_UNSPEC ||
> + req->sdiag_family == aux->sk_family)) {
> + sk = aux;
> + sock_hold(sk);
> + break;
> + }
> + }
> + spin_unlock_bh(&hslot->lock);
> if (!sk)
> goto out_nosk;
>
> - err = sock_diag_check_cookie(sk, req->id.idiag_cookie);
> - if (err)
> - goto out;
> -
sock_diag_check_cookie will return successfully if the cookie in the
request is INET_DIAG_NOCOOKIE before even considering the socket. I
think this could cause this loop to prematurely terminate.
Also, previously this would return ESTALE on a cookie error, now it
returns ENOENT. Not sure if this is a big deal or not, though...
^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [PATCH v2 net-next] udp_diag: fix udp_dump_one() vs SO_REUSEPORT
2016-01-05 22:56 ` Craig Gallek
@ 2016-01-06 0:08 ` Eric Dumazet
0 siblings, 0 replies; 10+ messages in thread
From: Eric Dumazet @ 2016-01-06 0:08 UTC (permalink / raw)
To: Craig Gallek; +Cc: David Miller, netdev
On Tue, 2016-01-05 at 17:56 -0500, Craig Gallek wrote:
> On Tue, Jan 5, 2016 at 5:33 PM, Eric Dumazet <eric.dumazet@gmail.com> wrote:
> > From: Eric Dumazet <edumazet@google.com>
> > + unsigned short hnum = ntohs(req->id.idiag_dport);
> > + unsigned int slot = udp_hashfn(net, hnum, tbl->mask);
> > + struct udp_hslot *hslot = &tbl->hash[slot];
> > + struct hlist_nulls_node *node;
> > + int err = -ENOENT;
> > +
> > + spin_lock_bh(&hslot->lock);
> > + sk_nulls_for_each(aux, node, &hslot->head) {
> > + if (net_eq(sock_net(aux), net) &&
> > + !sock_diag_check_cookie(aux, req->id.idiag_cookie) &&
> > + (req->sdiag_family == AF_UNSPEC ||
> > + req->sdiag_family == aux->sk_family)) {
> > + sk = aux;
> > + sock_hold(sk);
> > + break;
> > + }
> > + }
> > + spin_unlock_bh(&hslot->lock);
> > if (!sk)
> > goto out_nosk;
> >
> > - err = sock_diag_check_cookie(sk, req->id.idiag_cookie);
> > - if (err)
> > - goto out;
> > -
> sock_diag_check_cookie will return successfully if the cookie in the
> request is INET_DIAG_NOCOOKIE before even considering the socket. I
> think this could cause this loop to prematurely terminate.
Hmmm...
I never considered passing a INET_DIAG_NOCOOKIE there.
(Actually I am not sure if this interface is used. It is not in
iproute2)
>
> Also, previously this would return ESTALE on a cookie error, now it
> returns ENOENT. Not sure if this is a big deal or not, though...
Probably not a big deal.
I'll send a V3, thanks.
^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [PATCH net-next] soreuseport: pass skb to secondary UDP socket lookup
2016-01-05 20:08 [PATCH net-next] soreuseport: pass skb to secondary UDP socket lookup Craig Gallek
2016-01-05 21:58 ` Eric Dumazet
2016-01-05 22:08 ` [PATCH net-next] udp_diag: fix udp_dump_one() vs SO_REUSEPORT Eric Dumazet
@ 2016-01-06 6:28 ` David Miller
2 siblings, 0 replies; 10+ messages in thread
From: David Miller @ 2016-01-06 6:28 UTC (permalink / raw)
To: kraigatgoog; +Cc: netdev
From: Craig Gallek <kraigatgoog@gmail.com>
Date: Tue, 5 Jan 2016 15:08:07 -0500
> From: Craig Gallek <kraig@google.com>
>
> This socket-lookup path did not pass along the skb in question
> in my original BPF-based socket selection patch. The skb in the
> udpN_lib_lookup2 path can be used for BPF-based socket selection just
> like it is in the 'traditional' udpN_lib_lookup path.
>
> udpN_lib_lookup2 kicks in when there are greater than 10 sockets in
> the same hlist slot. Coincidentally, I chose 10 sockets per
> reuseport group in my functional test, so the lookup2 path was not
> excersised. This adds an additional set of tests with 20 sockets.
>
> Fixes: 538950a1b752 ("soreuseport: setsockopt SO_ATTACH_REUSEPORT_[CE]BPF")
> Fixes: 3ca8e4029969 ("soreuseport: BPF selection functional test")
> Suggested-by: Eric Dumazet <eric.dumazet@gmail.com>
> Signed-off-by: Craig Gallek <kraig@google.com>
Applied, thanks Craig.
^ permalink raw reply [flat|nested] 10+ messages in thread
end of thread, other threads:[~2016-01-06 6:28 UTC | newest]
Thread overview: 10+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2016-01-05 20:08 [PATCH net-next] soreuseport: pass skb to secondary UDP socket lookup Craig Gallek
2016-01-05 21:58 ` Eric Dumazet
2016-01-05 22:08 ` [PATCH net-next] udp_diag: fix udp_dump_one() vs SO_REUSEPORT Eric Dumazet
2016-01-05 22:22 ` Eric Dumazet
2016-01-05 22:26 ` Craig Gallek
2016-01-05 22:46 ` Eric Dumazet
2016-01-05 22:33 ` [PATCH v2 " Eric Dumazet
2016-01-05 22:56 ` Craig Gallek
2016-01-06 0:08 ` Eric Dumazet
2016-01-06 6:28 ` [PATCH net-next] soreuseport: pass skb to secondary UDP socket lookup David Miller
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).