* [PATCH 1/6 net-next] inet: collapse ipv4/v6 rcv_saddr_equal functions into one
2016-12-22 21:26 [PATCH 0/6 net-next][V2] Rework inet_csk_get_port Josef Bacik
@ 2016-12-22 21:26 ` Josef Bacik
0 siblings, 0 replies; 13+ messages in thread
From: Josef Bacik @ 2016-12-22 21:26 UTC (permalink / raw)
To: davem, hannes, kraigatgoog, eric.dumazet, tom, netdev,
kernel-team
We pass these per-protocol equal functions around in various places, but
we can just have one function that checks the sk->sk_family and then do
the right comparison function. I've also changed the ipv4 version to
not cast to inet_sock since it is unneeded.
Signed-off-by: Josef Bacik <jbacik@fb.com>
---
include/net/addrconf.h | 4 +--
include/net/inet_hashtables.h | 5 +--
include/net/udp.h | 1 -
net/ipv4/inet_connection_sock.c | 72 ++++++++++++++++++++++++++++++++++++++++
net/ipv4/inet_hashtables.c | 16 +++------
net/ipv4/udp.c | 58 +++++++-------------------------
net/ipv6/inet6_connection_sock.c | 4 +--
net/ipv6/inet6_hashtables.c | 46 +------------------------
net/ipv6/udp.c | 2 +-
9 files changed, 95 insertions(+), 113 deletions(-)
diff --git a/include/net/addrconf.h b/include/net/addrconf.h
index 8f998af..17c6fd8 100644
--- a/include/net/addrconf.h
+++ b/include/net/addrconf.h
@@ -88,9 +88,7 @@ int __ipv6_get_lladdr(struct inet6_dev *idev, struct in6_addr *addr,
u32 banned_flags);
int ipv6_get_lladdr(struct net_device *dev, struct in6_addr *addr,
u32 banned_flags);
-int ipv4_rcv_saddr_equal(const struct sock *sk, const struct sock *sk2,
- bool match_wildcard);
-int ipv6_rcv_saddr_equal(const struct sock *sk, const struct sock *sk2,
+int inet_rcv_saddr_equal(const struct sock *sk, const struct sock *sk2,
bool match_wildcard);
void addrconf_join_solict(struct net_device *dev, const struct in6_addr *addr);
void addrconf_leave_solict(struct inet6_dev *idev, const struct in6_addr *addr);
diff --git a/include/net/inet_hashtables.h b/include/net/inet_hashtables.h
index 0574493..756ed16 100644
--- a/include/net/inet_hashtables.h
+++ b/include/net/inet_hashtables.h
@@ -203,10 +203,7 @@ void inet_hashinfo_init(struct inet_hashinfo *h);
bool inet_ehash_insert(struct sock *sk, struct sock *osk);
bool inet_ehash_nolisten(struct sock *sk, struct sock *osk);
-int __inet_hash(struct sock *sk, struct sock *osk,
- int (*saddr_same)(const struct sock *sk1,
- const struct sock *sk2,
- bool match_wildcard));
+int __inet_hash(struct sock *sk, struct sock *osk);
int inet_hash(struct sock *sk);
void inet_unhash(struct sock *sk);
diff --git a/include/net/udp.h b/include/net/udp.h
index 1661791..c9d8b8e 100644
--- a/include/net/udp.h
+++ b/include/net/udp.h
@@ -204,7 +204,6 @@ static inline void udp_lib_close(struct sock *sk, long timeout)
}
int udp_lib_get_port(struct sock *sk, unsigned short snum,
- int (*)(const struct sock *, const struct sock *, bool),
unsigned int hash2_nulladdr);
u32 udp_flow_hashrnd(void);
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index 19ea045..ba597cb 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -31,6 +31,78 @@ const char inet_csk_timer_bug_msg[] = "inet_csk BUG: unknown timer value\n";
EXPORT_SYMBOL(inet_csk_timer_bug_msg);
#endif
+#if IS_ENABLED(CONFIG_IPV6)
+/* match_wildcard == true: IPV6_ADDR_ANY equals to any IPv6 addresses if IPv6
+ * only, and any IPv4 addresses if not IPv6 only
+ * match_wildcard == false: addresses must be exactly the same, i.e.
+ * IPV6_ADDR_ANY only equals to IPV6_ADDR_ANY,
+ * and 0.0.0.0 equals to 0.0.0.0 only
+ */
+static int ipv6_rcv_saddr_equal(const struct sock *sk, const struct sock *sk2,
+ bool match_wildcard)
+{
+ const struct in6_addr *sk2_rcv_saddr6 = inet6_rcv_saddr(sk2);
+ int sk2_ipv6only = inet_v6_ipv6only(sk2);
+ int addr_type = ipv6_addr_type(&sk->sk_v6_rcv_saddr);
+ int addr_type2 = sk2_rcv_saddr6 ? ipv6_addr_type(sk2_rcv_saddr6) : IPV6_ADDR_MAPPED;
+
+ /* if both are mapped, treat as IPv4 */
+ if (addr_type == IPV6_ADDR_MAPPED && addr_type2 == IPV6_ADDR_MAPPED) {
+ if (!sk2_ipv6only) {
+ if (sk->sk_rcv_saddr == sk2->sk_rcv_saddr)
+ return 1;
+ if (!sk->sk_rcv_saddr || !sk2->sk_rcv_saddr)
+ return match_wildcard;
+ }
+ return 0;
+ }
+
+ if (addr_type == IPV6_ADDR_ANY && addr_type2 == IPV6_ADDR_ANY)
+ return 1;
+
+ if (addr_type2 == IPV6_ADDR_ANY && match_wildcard &&
+ !(sk2_ipv6only && addr_type == IPV6_ADDR_MAPPED))
+ return 1;
+
+ if (addr_type == IPV6_ADDR_ANY && match_wildcard &&
+ !(ipv6_only_sock(sk) && addr_type2 == IPV6_ADDR_MAPPED))
+ return 1;
+
+ if (sk2_rcv_saddr6 &&
+ ipv6_addr_equal(&sk->sk_v6_rcv_saddr, sk2_rcv_saddr6))
+ return 1;
+
+ return 0;
+}
+#endif
+
+/* match_wildcard == true: 0.0.0.0 equals to any IPv4 addresses
+ * match_wildcard == false: addresses must be exactly the same, i.e.
+ * 0.0.0.0 only equals to 0.0.0.0
+ */
+static int ipv4_rcv_saddr_equal(const struct sock *sk, const struct sock *sk2,
+ bool match_wildcard)
+{
+ if (!ipv6_only_sock(sk2)) {
+ if (sk->sk_rcv_saddr == sk2->sk_rcv_saddr)
+ return 1;
+ if (!sk->sk_rcv_saddr || !sk2->sk_rcv_saddr)
+ return match_wildcard;
+ }
+ return 0;
+}
+
+int inet_rcv_saddr_equal(const struct sock *sk, const struct sock *sk2,
+ bool match_wildcard)
+{
+#if IS_ENABLED(CONFIG_IPV6)
+ if (sk->sk_family == AF_INET6)
+ return ipv6_rcv_saddr_equal(sk, sk2, match_wildcard);
+#endif
+ return ipv4_rcv_saddr_equal(sk, sk2, match_wildcard);
+}
+EXPORT_SYMBOL(inet_rcv_saddr_equal);
+
void inet_get_local_port_range(struct net *net, int *low, int *high)
{
unsigned int seq;
diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c
index ca97835..2ef9b01 100644
--- a/net/ipv4/inet_hashtables.c
+++ b/net/ipv4/inet_hashtables.c
@@ -435,10 +435,7 @@ bool inet_ehash_nolisten(struct sock *sk, struct sock *osk)
EXPORT_SYMBOL_GPL(inet_ehash_nolisten);
static int inet_reuseport_add_sock(struct sock *sk,
- struct inet_listen_hashbucket *ilb,
- int (*saddr_same)(const struct sock *sk1,
- const struct sock *sk2,
- bool match_wildcard))
+ struct inet_listen_hashbucket *ilb)
{
struct inet_bind_bucket *tb = inet_csk(sk)->icsk_bind_hash;
struct sock *sk2;
@@ -451,7 +448,7 @@ static int inet_reuseport_add_sock(struct sock *sk,
sk2->sk_bound_dev_if == sk->sk_bound_dev_if &&
inet_csk(sk2)->icsk_bind_hash == tb &&
sk2->sk_reuseport && uid_eq(uid, sock_i_uid(sk2)) &&
- saddr_same(sk, sk2, false))
+ inet_rcv_saddr_equal(sk, sk2, false))
return reuseport_add_sock(sk, sk2);
}
@@ -461,10 +458,7 @@ static int inet_reuseport_add_sock(struct sock *sk,
return 0;
}
-int __inet_hash(struct sock *sk, struct sock *osk,
- int (*saddr_same)(const struct sock *sk1,
- const struct sock *sk2,
- bool match_wildcard))
+int __inet_hash(struct sock *sk, struct sock *osk)
{
struct inet_hashinfo *hashinfo = sk->sk_prot->h.hashinfo;
struct inet_listen_hashbucket *ilb;
@@ -479,7 +473,7 @@ int __inet_hash(struct sock *sk, struct sock *osk,
spin_lock(&ilb->lock);
if (sk->sk_reuseport) {
- err = inet_reuseport_add_sock(sk, ilb, saddr_same);
+ err = inet_reuseport_add_sock(sk, ilb);
if (err)
goto unlock;
}
@@ -503,7 +497,7 @@ int inet_hash(struct sock *sk)
if (sk->sk_state != TCP_CLOSE) {
local_bh_disable();
- err = __inet_hash(sk, NULL, ipv4_rcv_saddr_equal);
+ err = __inet_hash(sk, NULL);
local_bh_enable();
}
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 9ca279b..cffbb65 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -137,11 +137,7 @@ EXPORT_SYMBOL(udp_memory_allocated);
static int udp_lib_lport_inuse(struct net *net, __u16 num,
const struct udp_hslot *hslot,
unsigned long *bitmap,
- struct sock *sk,
- int (*saddr_comp)(const struct sock *sk1,
- const struct sock *sk2,
- bool match_wildcard),
- unsigned int log)
+ struct sock *sk, unsigned int log)
{
struct sock *sk2;
kuid_t uid = sock_i_uid(sk);
@@ -156,7 +152,7 @@ static int udp_lib_lport_inuse(struct net *net, __u16 num,
(!sk2->sk_reuseport || !sk->sk_reuseport ||
rcu_access_pointer(sk->sk_reuseport_cb) ||
!uid_eq(uid, sock_i_uid(sk2))) &&
- saddr_comp(sk, sk2, true)) {
+ inet_rcv_saddr_equal(sk, sk2, true)) {
if (!bitmap)
return 1;
__set_bit(udp_sk(sk2)->udp_port_hash >> log, bitmap);
@@ -171,10 +167,7 @@ static int udp_lib_lport_inuse(struct net *net, __u16 num,
*/
static int udp_lib_lport_inuse2(struct net *net, __u16 num,
struct udp_hslot *hslot2,
- struct sock *sk,
- int (*saddr_comp)(const struct sock *sk1,
- const struct sock *sk2,
- bool match_wildcard))
+ struct sock *sk)
{
struct sock *sk2;
kuid_t uid = sock_i_uid(sk);
@@ -191,7 +184,7 @@ static int udp_lib_lport_inuse2(struct net *net, __u16 num,
(!sk2->sk_reuseport || !sk->sk_reuseport ||
rcu_access_pointer(sk->sk_reuseport_cb) ||
!uid_eq(uid, sock_i_uid(sk2))) &&
- saddr_comp(sk, sk2, true)) {
+ inet_rcv_saddr_equal(sk, sk2, true)) {
res = 1;
break;
}
@@ -200,10 +193,7 @@ static int udp_lib_lport_inuse2(struct net *net, __u16 num,
return res;
}
-static int udp_reuseport_add_sock(struct sock *sk, struct udp_hslot *hslot,
- int (*saddr_same)(const struct sock *sk1,
- const struct sock *sk2,
- bool match_wildcard))
+static int udp_reuseport_add_sock(struct sock *sk, struct udp_hslot *hslot)
{
struct net *net = sock_net(sk);
kuid_t uid = sock_i_uid(sk);
@@ -217,7 +207,7 @@ static int udp_reuseport_add_sock(struct sock *sk, struct udp_hslot *hslot,
(udp_sk(sk2)->udp_port_hash == udp_sk(sk)->udp_port_hash) &&
(sk2->sk_bound_dev_if == sk->sk_bound_dev_if) &&
sk2->sk_reuseport && uid_eq(uid, sock_i_uid(sk2)) &&
- (*saddr_same)(sk, sk2, false)) {
+ inet_rcv_saddr_equal(sk, sk2, false)) {
return reuseport_add_sock(sk, sk2);
}
}
@@ -233,14 +223,10 @@ static int udp_reuseport_add_sock(struct sock *sk, struct udp_hslot *hslot,
*
* @sk: socket struct in question
* @snum: port number to look up
- * @saddr_comp: AF-dependent comparison of bound local IP addresses
* @hash2_nulladdr: AF-dependent hash value in secondary hash chains,
* with NULL address
*/
int udp_lib_get_port(struct sock *sk, unsigned short snum,
- int (*saddr_comp)(const struct sock *sk1,
- const struct sock *sk2,
- bool match_wildcard),
unsigned int hash2_nulladdr)
{
struct udp_hslot *hslot, *hslot2;
@@ -269,7 +255,7 @@ int udp_lib_get_port(struct sock *sk, unsigned short snum,
bitmap_zero(bitmap, PORTS_PER_CHAIN);
spin_lock_bh(&hslot->lock);
udp_lib_lport_inuse(net, snum, hslot, bitmap, sk,
- saddr_comp, udptable->log);
+ udptable->log);
snum = first;
/*
@@ -301,12 +287,11 @@ int udp_lib_get_port(struct sock *sk, unsigned short snum,
if (hslot->count < hslot2->count)
goto scan_primary_hash;
- exist = udp_lib_lport_inuse2(net, snum, hslot2,
- sk, saddr_comp);
+ exist = udp_lib_lport_inuse2(net, snum, hslot2, sk);
if (!exist && (hash2_nulladdr != slot2)) {
hslot2 = udp_hashslot2(udptable, hash2_nulladdr);
exist = udp_lib_lport_inuse2(net, snum, hslot2,
- sk, saddr_comp);
+ sk);
}
if (exist)
goto fail_unlock;
@@ -314,8 +299,7 @@ int udp_lib_get_port(struct sock *sk, unsigned short snum,
goto found;
}
scan_primary_hash:
- if (udp_lib_lport_inuse(net, snum, hslot, NULL, sk,
- saddr_comp, 0))
+ if (udp_lib_lport_inuse(net, snum, hslot, NULL, sk, 0))
goto fail_unlock;
}
found:
@@ -324,7 +308,7 @@ int udp_lib_get_port(struct sock *sk, unsigned short snum,
udp_sk(sk)->udp_portaddr_hash ^= snum;
if (sk_unhashed(sk)) {
if (sk->sk_reuseport &&
- udp_reuseport_add_sock(sk, hslot, saddr_comp)) {
+ udp_reuseport_add_sock(sk, hslot)) {
inet_sk(sk)->inet_num = 0;
udp_sk(sk)->udp_port_hash = 0;
udp_sk(sk)->udp_portaddr_hash ^= snum;
@@ -356,24 +340,6 @@ int udp_lib_get_port(struct sock *sk, unsigned short snum,
}
EXPORT_SYMBOL(udp_lib_get_port);
-/* match_wildcard == true: 0.0.0.0 equals to any IPv4 addresses
- * match_wildcard == false: addresses must be exactly the same, i.e.
- * 0.0.0.0 only equals to 0.0.0.0
- */
-int ipv4_rcv_saddr_equal(const struct sock *sk1, const struct sock *sk2,
- bool match_wildcard)
-{
- struct inet_sock *inet1 = inet_sk(sk1), *inet2 = inet_sk(sk2);
-
- if (!ipv6_only_sock(sk2)) {
- if (inet1->inet_rcv_saddr == inet2->inet_rcv_saddr)
- return 1;
- if (!inet1->inet_rcv_saddr || !inet2->inet_rcv_saddr)
- return match_wildcard;
- }
- return 0;
-}
-
static u32 udp4_portaddr_hash(const struct net *net, __be32 saddr,
unsigned int port)
{
@@ -389,7 +355,7 @@ int udp_v4_get_port(struct sock *sk, unsigned short snum)
/* precompute partial secondary hash */
udp_sk(sk)->udp_portaddr_hash = hash2_partial;
- return udp_lib_get_port(sk, snum, ipv4_rcv_saddr_equal, hash2_nulladdr);
+ return udp_lib_get_port(sk, snum, hash2_nulladdr);
}
static int compute_score(struct sock *sk, struct net *net,
diff --git a/net/ipv6/inet6_connection_sock.c b/net/ipv6/inet6_connection_sock.c
index 7396e75..55ee2ea 100644
--- a/net/ipv6/inet6_connection_sock.c
+++ b/net/ipv6/inet6_connection_sock.c
@@ -54,12 +54,12 @@ int inet6_csk_bind_conflict(const struct sock *sk,
(sk2->sk_state != TCP_TIME_WAIT &&
!uid_eq(uid,
sock_i_uid((struct sock *)sk2))))) {
- if (ipv6_rcv_saddr_equal(sk, sk2, true))
+ if (inet_rcv_saddr_equal(sk, sk2, true))
break;
}
if (!relax && reuse && sk2->sk_reuse &&
sk2->sk_state != TCP_LISTEN &&
- ipv6_rcv_saddr_equal(sk, sk2, true))
+ inet_rcv_saddr_equal(sk, sk2, true))
break;
}
}
diff --git a/net/ipv6/inet6_hashtables.c b/net/ipv6/inet6_hashtables.c
index 02761c9..d090091 100644
--- a/net/ipv6/inet6_hashtables.c
+++ b/net/ipv6/inet6_hashtables.c
@@ -268,54 +268,10 @@ int inet6_hash(struct sock *sk)
if (sk->sk_state != TCP_CLOSE) {
local_bh_disable();
- err = __inet_hash(sk, NULL, ipv6_rcv_saddr_equal);
+ err = __inet_hash(sk, NULL);
local_bh_enable();
}
return err;
}
EXPORT_SYMBOL_GPL(inet6_hash);
-
-/* match_wildcard == true: IPV6_ADDR_ANY equals to any IPv6 addresses if IPv6
- * only, and any IPv4 addresses if not IPv6 only
- * match_wildcard == false: addresses must be exactly the same, i.e.
- * IPV6_ADDR_ANY only equals to IPV6_ADDR_ANY,
- * and 0.0.0.0 equals to 0.0.0.0 only
- */
-int ipv6_rcv_saddr_equal(const struct sock *sk, const struct sock *sk2,
- bool match_wildcard)
-{
- const struct in6_addr *sk2_rcv_saddr6 = inet6_rcv_saddr(sk2);
- int sk2_ipv6only = inet_v6_ipv6only(sk2);
- int addr_type = ipv6_addr_type(&sk->sk_v6_rcv_saddr);
- int addr_type2 = sk2_rcv_saddr6 ? ipv6_addr_type(sk2_rcv_saddr6) : IPV6_ADDR_MAPPED;
-
- /* if both are mapped, treat as IPv4 */
- if (addr_type == IPV6_ADDR_MAPPED && addr_type2 == IPV6_ADDR_MAPPED) {
- if (!sk2_ipv6only) {
- if (sk->sk_rcv_saddr == sk2->sk_rcv_saddr)
- return 1;
- if (!sk->sk_rcv_saddr || !sk2->sk_rcv_saddr)
- return match_wildcard;
- }
- return 0;
- }
-
- if (addr_type == IPV6_ADDR_ANY && addr_type2 == IPV6_ADDR_ANY)
- return 1;
-
- if (addr_type2 == IPV6_ADDR_ANY && match_wildcard &&
- !(sk2_ipv6only && addr_type == IPV6_ADDR_MAPPED))
- return 1;
-
- if (addr_type == IPV6_ADDR_ANY && match_wildcard &&
- !(ipv6_only_sock(sk) && addr_type2 == IPV6_ADDR_MAPPED))
- return 1;
-
- if (sk2_rcv_saddr6 &&
- ipv6_addr_equal(&sk->sk_v6_rcv_saddr, sk2_rcv_saddr6))
- return 1;
-
- return 0;
-}
-EXPORT_SYMBOL_GPL(ipv6_rcv_saddr_equal);
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index 649efc2..9d2886f 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -103,7 +103,7 @@ int udp_v6_get_port(struct sock *sk, unsigned short snum)
/* precompute partial secondary hash */
udp_sk(sk)->udp_portaddr_hash = hash2_partial;
- return udp_lib_get_port(sk, snum, ipv6_rcv_saddr_equal, hash2_nulladdr);
+ return udp_lib_get_port(sk, snum, hash2_nulladdr);
}
static void udp_v6_rehash(struct sock *sk)
--
2.5.5
^ permalink raw reply related [flat|nested] 13+ messages in thread
* [PATCH 0/6 net-next][V3] Rework inet_csk_get_port
@ 2017-01-11 20:06 Josef Bacik
2017-01-11 20:19 ` [PATCH 1/6 net-next] inet: collapse ipv4/v6 rcv_saddr_equal functions into one Josef Bacik
2017-01-11 20:22 ` [PATCH 2/6 net-next] inet: drop ->bind_conflict Josef Bacik
0 siblings, 2 replies; 13+ messages in thread
From: Josef Bacik @ 2017-01-11 20:06 UTC (permalink / raw)
To: davem, hannes, kraigatgoog, eric.dumazet, tom, netdev,
kernel-team
Sorry the holidays delayed this a bit. I've made the changes requested and I
also sat down and wrote a reproducer so everybody could see what was happening.
Without my patches my reproducer takes about 50 seconds to run on my 48 CPU
devel box. With my patches it takes 8 seconds.
V2->V3:
-Dropped the fastsock from the tb and instead just carry the saddrs, family, and
ipv6 only flag.
-Reworked the helper functions to deal with this change so I could still use
them when checking the fast path.
-Killed tb->num_owners as per Eric's request.
-Attached a reproducer to the bottom of this email.
V1->V2:
-Added a new patch 'inet: collapse ipv4/v6 rcv_saddr_equal functions into one'
at Hannes' suggestion.
-Dropped ->bind_conflict and just use the new helper.
-Fixed a compile bug from the original ->bind_conflict patch.
The original description of the series follows
=========================================================
At some point recently the guys working on our load balancer added the ability
to use SO_REUSEPORT. When they restarted their app with this option enabled
they immediately hit a softlockup on what appeared to be the
inet_bind_bucket->lock. Eventually what all of our debugging and discussion led
us to was the fact that the application comes up without SO_REUSEPORT, shuts
down which creates around 100k twsk's, and then comes up and tries to open a
bunch of sockets using SO_REUSEPORT, which meant traversing the inet_bind_bucket
owners list under the lock. Since this lock is needed for dealing with the
twsk's and basically anything else related to connections we would softlockup,
and sometimes not ever recover.
To solve this problem I did what you see in Path 5/5. Once we have a
SO_REUSEPORT socket on the tb->owners list we know that the socket has no
conflicts with any of the other sockets on that list. So we can add a copy of
the sock_common (really all we need is the recv_saddr but it seemed ugly to copy
just the ipv6, ipv4, and flag to indicate if we were ipv6 only in there so I've
copied the whole common) in order to check subsequent SO_REUSEPORT sockets. If
they match the previous one then we can skip the expensive
inet_csk_bind_conflict check. This is what eliminated the soft lockup that we
were seeing.
Patches 1-4 are cleanups and re-workings. For instance when we specify port ==
0 we need to find an open port, but we would do two passes through
inet_csk_bind_conflict every time we found a possible port. We would also keep
track of the smallest_port value in order to try and use it if we found no
port our first run through. This however made no sense as it would have had to
fail the first pass through inet_csk_bind_conflict, so would not actually pass
the second pass through either. Finally I split the function into two functions
in order to make it easier to read and to distinguish between the two behaviors.
I have tested this on one of our load balancing boxes during peak traffic and it
hasn't fallen over. But this is not my area, so obviously feel free to point
out where I'm being stupid and I'll get it fixed up and retested. Thanks,
Josef
#include <sys/types.h>
#include <sys/socket.h>
#include <sys/wait.h>
#include <sys/un.h>
#include <errno.h>
#include <netdb.h>
#include <signal.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include <pthread.h>
static int ready = 0;
static int done = 0;
static void sigint_handler(int s)
{
ready = 1;
}
static int create_sock(int socktype, int soreuseport)
{
struct addrinfo hints;
struct addrinfo *ai, *ai_orig;
const char *addrs[] = {"::", "0.0.0.0", NULL};
const char *port = "9999";
int sock;
int num_socks = 0;
int i, ret;
memset(&hints, 0, sizeof(hints));
hints.ai_flags = AI_PASSIVE | AI_ADDRCONFIG;
hints.ai_socktype = SOCK_STREAM;
hints.ai_family = (socktype == 0) ? AF_INET6 : AF_INET;
hints.ai_protocol = IPPROTO_TCP;
ret = getaddrinfo(addrs[socktype], port, &hints, &ai);
if (ret < 0) {
fprintf(stderr, "couldn't get addr info %d\n", errno);
return -1;
}
ai_orig = ai;
while (ai != NULL) {
int yes = 1;
sock = socket(ai->ai_family, ai->ai_socktype,
ai->ai_protocol);
if (sock < 0) {
fprintf(stderr, "socket failed %d\n", errno);
goto next;
}
if (soreuseport) {
if (setsockopt(sock, SOL_SOCKET, SO_REUSEPORT, &yes,
sizeof(yes)) < 0) {
fprintf(stderr, "setsockopt failed %d\n", errno);
close(sock);
goto next;
}
}
if (bind(sock, ai->ai_addr, ai->ai_addrlen)) {
fprintf(stderr, "bind failed %d\n", errno);
close(sock);
goto next;
}
if (listen(sock, 1024) < 0) {
fprintf(stderr, "listen failed %d\n", errno);
close(sock);
goto next;
}
num_socks++;
break;
next:
ai = ai->ai_next;
}
freeaddrinfo(ai_orig);
if (!num_socks) {
fprintf(stderr, "failed to open any sockets\n");
return -1;
}
return sock;
}
#define SOCK_PATH "THIS_SHIT_IS_FUCKING_INSANE"
#define TAKEOVER_REQUEST 1
#define CLOSE_REQUEST 2
static int *takeover_sockets(int *num_fds_ret)
{
struct sockaddr_un remote;
struct msghdr msg = {};
struct cmsghdr *cmsg;
struct iovec iov;
int *fds;
int req, num_fds, fd, i, len;
char *buf;
*num_fds_ret = 0;
if ((fd = socket(AF_UNIX, SOCK_STREAM, 0)) == -1) {
fprintf(stderr, "Couldn't open our unix sock\n");
return NULL;
}
remote.sun_family = AF_UNIX;
strcpy(remote.sun_path, SOCK_PATH);
len = strlen(remote.sun_path) + sizeof(remote.sun_family);
if (connect(fd, (struct sockaddr *)&remote, len) < 0) {
fprintf(stderr, "Couldn't connect, %d\n", errno);
close(fd);
return NULL;
}
req = TAKEOVER_REQUEST;
if (send(fd, &req, sizeof(req), 0) < 0) {
fprintf(stderr, "Couldn't send our request, %d\n", errno);
close(fd);
return NULL;
}
buf = malloc(CMSG_SPACE(sizeof(int) * 32));
if (!buf) {
fprintf(stderr, "Failed to allocate cmsg buffer\n");
free(fds);
close(fd);
return NULL;
}
iov.iov_base = &num_fds;
iov.iov_len = sizeof(num_fds);
msg.msg_iov = &iov;
msg.msg_iovlen = 1;
msg.msg_control = buf;
msg.msg_controllen = CMSG_SPACE(sizeof(int) * 1);
if (recvmsg(fd, &msg, 0) < 0) {
fprintf(stderr, "Couldn't get our array of fd's, %d\n", errno);
free(fds);
close(fd);
return NULL;
}
fds = malloc(sizeof(int) * num_fds);
if (!fds) {
fprintf(stderr, "Couldn't allocate an array for %d fd's\n", num_fds);
close(fd);
return NULL;
}
for (cmsg = CMSG_FIRSTHDR(&msg); cmsg != NULL; cmsg = CMSG_NXTHDR(&msg, cmsg)) {
int *fdsptr = (int *)CMSG_DATA(cmsg);
for (i = 0; i < num_fds; i++) {
int newfd = fdsptr[i];
int yes = 1;
if (newfd < 0) {
fprintf(stderr, "Couldn't dup the old socket, %d\n", errno);
close(fd);
num_fds = i;
break;
}
if (setsockopt(newfd, SOL_SOCKET, SO_REUSEPORT, &yes,
sizeof(yes)) < 0) {
fprintf(stderr, "setsockopt failed on taken over socket %d\n",
errno);
close(newfd);
close(fd);
break;
}
fds[i] = newfd;
}
}
req = CLOSE_REQUEST;
if (send(fd, &req, sizeof(req), 0) < 0)
fprintf(stderr, "Couldn't send the close request, %d\n", errno);
close(fd);
*num_fds_ret = num_fds;
return fds;
}
static int listen_for_takeover(pid_t child, int *fds, int num_fds)
{
struct sockaddr_un local, remote;
int sock, len;
if ((sock = socket(AF_UNIX, SOCK_STREAM, 0)) < 0) {
fprintf(stderr, "Couldn't open unix sock %d\n", errno);
kill(child, SIGINT);
return -1;
}
local.sun_family = AF_UNIX;
strcpy(local.sun_path, SOCK_PATH);
unlink(local.sun_path);
len = strlen(local.sun_path) + sizeof(local.sun_family);
if (bind(sock, (struct sockaddr *)&local, len) < 0) {
fprintf(stderr, "Couldn't bind to unix sock %d\n", errno);
kill(child, SIGINT);
return -1;
}
if (listen(sock, 1) < 0) {
fprintf(stderr, "Couldn't listen on unix sock %d\n", errno);
kill(child, SIGINT);
return -1;
}
kill(child, SIGINT);
while (!done) {
int newsock, req, len = sizeof(remote);
if ((newsock = accept(sock, (struct sockaddr *)&remote,
&len)) < 0) {
fprintf(stderr, "Couldn't accept connection %d\n", errno);
return -1;
}
while (recv(newsock, &req, sizeof(req), 0) > 0) {
if (req == TAKEOVER_REQUEST) {
struct msghdr msg = {};
struct cmsghdr *cmsg;
struct iovec iov = {.iov_base = &num_fds, .iov_len = sizeof(num_fds), };
char *buf;
int *fdptr;
buf = malloc(CMSG_SPACE(sizeof(int) * num_fds));
if (!buf) {
fprintf(stderr, "Couldn't allocate cmsg buf\n");
return -1;
}
msg.msg_iov = &iov;
msg.msg_iovlen = 1;
msg.msg_control = buf;
msg.msg_controllen = CMSG_SPACE(sizeof(int) * num_fds);
cmsg = CMSG_FIRSTHDR(&msg);
cmsg->cmsg_level = SOL_SOCKET;
cmsg->cmsg_type = SCM_RIGHTS;
cmsg->cmsg_len = CMSG_LEN(sizeof(int) * num_fds);
fdptr = (int *)CMSG_DATA(cmsg);
memcpy(fdptr, fds, num_fds * sizeof(int));
msg.msg_controllen = cmsg->cmsg_len;
if (sendmsg(newsock, &msg, 0) < 0) {
fprintf(stderr, "Failed to send fds %d\n", errno);
return -1;
}
} else if (req == CLOSE_REQUEST) {
break;
}
}
done = 1;
}
return 0;
}
static void *thread_main(void *arg)
{
int i;
for (i = 0; i < 256; i++) {
int fd = create_sock(0, 1);
if (fd < 0)
return (void *)(unsigned long)fd;
}
return NULL;
}
static int do_spawn_threads(int num_threads)
{
struct sigaction sa;
pthread_t *threads;
int *fds;
int num_fds;
int error = 0;
int i;
threads = calloc(sizeof(pthread_t), num_threads);
if (!threads) {
fprintf(stderr, "Not enough memory\n");
return -1;
}
sa.sa_handler = sigint_handler;
sigemptyset(&sa.sa_mask);
sa.sa_flags = SA_RESTART;
if (sigaction(SIGINT, &sa, NULL) < 0) {
fprintf(stderr, "Couldn't set our sigaction\n");
return -1;
}
while (!ready)
sleep(1);
fds = takeover_sockets(&num_fds);
printf("Starting new threads\n");
for (i = 0; i < num_threads; i++) {
if (pthread_create(&threads[i], NULL, thread_main, NULL) < 0) {
done = 1;
break;
}
}
while (i--) {
void *retval;
pthread_join(threads[i], &retval);
if ((unsigned long)retval != 0)
error = (int)(unsigned long)retval;
}
return error;
}
int main(int argc, char **argv)
{
int *fds;
int num_fds;
pthread_t *threads;
unsigned long i;
int ret = -1;
int childstatus;
pid_t child;
child = fork();
if (child < 0) {
fprintf(stderr, "Failed to create child! %d\n", errno);
return -1;
}
if (!child) {
return do_spawn_threads(64);
}
fds = malloc(sizeof(int));
if (!fds) {
kill(child, SIGINT);
goto out;
}
fds[0] = create_sock(0, 0);
if (fds[0] < 0) {
free(fds);
kill(child, SIGINT);
fprintf(stderr, "Couldn't create a socket\n");
goto out;
}
ret = listen_for_takeover(child, fds, 1);
out:
waitpid(child, &childstatus, 0);
if (!ret)
ret = WEXITSTATUS(childstatus);
return ret;
}
^ permalink raw reply [flat|nested] 13+ messages in thread
* [PATCH 1/6 net-next] inet: collapse ipv4/v6 rcv_saddr_equal functions into one
2017-01-11 20:06 [PATCH 0/6 net-next][V3] Rework inet_csk_get_port Josef Bacik
@ 2017-01-11 20:19 ` Josef Bacik
2017-01-11 20:22 ` [PATCH 2/6 net-next] inet: drop ->bind_conflict Josef Bacik
1 sibling, 0 replies; 13+ messages in thread
From: Josef Bacik @ 2017-01-11 20:19 UTC (permalink / raw)
To: davem, hannes, kraigatgoog, eric.dumazet, tom, netdev,
kernel-team
We pass these per-protocol equal functions around in various places, but
we can just have one function that checks the sk->sk_family and then do
the right comparison function. I've also changed the ipv4 version to
not cast to inet_sock since it is unneeded.
Signed-off-by: Josef Bacik <jbacik@fb.com>
---
include/net/addrconf.h | 4 +--
include/net/inet_hashtables.h | 5 +--
include/net/udp.h | 1 -
net/ipv4/inet_connection_sock.c | 72 ++++++++++++++++++++++++++++++++++++++++
net/ipv4/inet_hashtables.c | 16 +++------
net/ipv4/udp.c | 58 +++++++-------------------------
net/ipv6/inet6_connection_sock.c | 4 +--
net/ipv6/inet6_hashtables.c | 46 +------------------------
net/ipv6/udp.c | 2 +-
9 files changed, 95 insertions(+), 113 deletions(-)
diff --git a/include/net/addrconf.h b/include/net/addrconf.h
index 8f998af..17c6fd8 100644
--- a/include/net/addrconf.h
+++ b/include/net/addrconf.h
@@ -88,9 +88,7 @@ int __ipv6_get_lladdr(struct inet6_dev *idev, struct in6_addr *addr,
u32 banned_flags);
int ipv6_get_lladdr(struct net_device *dev, struct in6_addr *addr,
u32 banned_flags);
-int ipv4_rcv_saddr_equal(const struct sock *sk, const struct sock *sk2,
- bool match_wildcard);
-int ipv6_rcv_saddr_equal(const struct sock *sk, const struct sock *sk2,
+int inet_rcv_saddr_equal(const struct sock *sk, const struct sock *sk2,
bool match_wildcard);
void addrconf_join_solict(struct net_device *dev, const struct in6_addr *addr);
void addrconf_leave_solict(struct inet6_dev *idev, const struct in6_addr *addr);
diff --git a/include/net/inet_hashtables.h b/include/net/inet_hashtables.h
index 0574493..756ed16 100644
--- a/include/net/inet_hashtables.h
+++ b/include/net/inet_hashtables.h
@@ -203,10 +203,7 @@ void inet_hashinfo_init(struct inet_hashinfo *h);
bool inet_ehash_insert(struct sock *sk, struct sock *osk);
bool inet_ehash_nolisten(struct sock *sk, struct sock *osk);
-int __inet_hash(struct sock *sk, struct sock *osk,
- int (*saddr_same)(const struct sock *sk1,
- const struct sock *sk2,
- bool match_wildcard));
+int __inet_hash(struct sock *sk, struct sock *osk);
int inet_hash(struct sock *sk);
void inet_unhash(struct sock *sk);
diff --git a/include/net/udp.h b/include/net/udp.h
index 1661791..c9d8b8e 100644
--- a/include/net/udp.h
+++ b/include/net/udp.h
@@ -204,7 +204,6 @@ static inline void udp_lib_close(struct sock *sk, long timeout)
}
int udp_lib_get_port(struct sock *sk, unsigned short snum,
- int (*)(const struct sock *, const struct sock *, bool),
unsigned int hash2_nulladdr);
u32 udp_flow_hashrnd(void);
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index 19ea045..ba597cb 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -31,6 +31,78 @@ const char inet_csk_timer_bug_msg[] = "inet_csk BUG: unknown timer value\n";
EXPORT_SYMBOL(inet_csk_timer_bug_msg);
#endif
+#if IS_ENABLED(CONFIG_IPV6)
+/* match_wildcard == true: IPV6_ADDR_ANY equals to any IPv6 addresses if IPv6
+ * only, and any IPv4 addresses if not IPv6 only
+ * match_wildcard == false: addresses must be exactly the same, i.e.
+ * IPV6_ADDR_ANY only equals to IPV6_ADDR_ANY,
+ * and 0.0.0.0 equals to 0.0.0.0 only
+ */
+static int ipv6_rcv_saddr_equal(const struct sock *sk, const struct sock *sk2,
+ bool match_wildcard)
+{
+ const struct in6_addr *sk2_rcv_saddr6 = inet6_rcv_saddr(sk2);
+ int sk2_ipv6only = inet_v6_ipv6only(sk2);
+ int addr_type = ipv6_addr_type(&sk->sk_v6_rcv_saddr);
+ int addr_type2 = sk2_rcv_saddr6 ? ipv6_addr_type(sk2_rcv_saddr6) : IPV6_ADDR_MAPPED;
+
+ /* if both are mapped, treat as IPv4 */
+ if (addr_type == IPV6_ADDR_MAPPED && addr_type2 == IPV6_ADDR_MAPPED) {
+ if (!sk2_ipv6only) {
+ if (sk->sk_rcv_saddr == sk2->sk_rcv_saddr)
+ return 1;
+ if (!sk->sk_rcv_saddr || !sk2->sk_rcv_saddr)
+ return match_wildcard;
+ }
+ return 0;
+ }
+
+ if (addr_type == IPV6_ADDR_ANY && addr_type2 == IPV6_ADDR_ANY)
+ return 1;
+
+ if (addr_type2 == IPV6_ADDR_ANY && match_wildcard &&
+ !(sk2_ipv6only && addr_type == IPV6_ADDR_MAPPED))
+ return 1;
+
+ if (addr_type == IPV6_ADDR_ANY && match_wildcard &&
+ !(ipv6_only_sock(sk) && addr_type2 == IPV6_ADDR_MAPPED))
+ return 1;
+
+ if (sk2_rcv_saddr6 &&
+ ipv6_addr_equal(&sk->sk_v6_rcv_saddr, sk2_rcv_saddr6))
+ return 1;
+
+ return 0;
+}
+#endif
+
+/* match_wildcard == true: 0.0.0.0 equals to any IPv4 addresses
+ * match_wildcard == false: addresses must be exactly the same, i.e.
+ * 0.0.0.0 only equals to 0.0.0.0
+ */
+static int ipv4_rcv_saddr_equal(const struct sock *sk, const struct sock *sk2,
+ bool match_wildcard)
+{
+ if (!ipv6_only_sock(sk2)) {
+ if (sk->sk_rcv_saddr == sk2->sk_rcv_saddr)
+ return 1;
+ if (!sk->sk_rcv_saddr || !sk2->sk_rcv_saddr)
+ return match_wildcard;
+ }
+ return 0;
+}
+
+int inet_rcv_saddr_equal(const struct sock *sk, const struct sock *sk2,
+ bool match_wildcard)
+{
+#if IS_ENABLED(CONFIG_IPV6)
+ if (sk->sk_family == AF_INET6)
+ return ipv6_rcv_saddr_equal(sk, sk2, match_wildcard);
+#endif
+ return ipv4_rcv_saddr_equal(sk, sk2, match_wildcard);
+}
+EXPORT_SYMBOL(inet_rcv_saddr_equal);
+
void inet_get_local_port_range(struct net *net, int *low, int *high)
{
unsigned int seq;
diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c
index ca97835..2ef9b01 100644
--- a/net/ipv4/inet_hashtables.c
+++ b/net/ipv4/inet_hashtables.c
@@ -435,10 +435,7 @@ bool inet_ehash_nolisten(struct sock *sk, struct sock *osk)
EXPORT_SYMBOL_GPL(inet_ehash_nolisten);
static int inet_reuseport_add_sock(struct sock *sk,
- struct inet_listen_hashbucket *ilb,
- int (*saddr_same)(const struct sock *sk1,
- const struct sock *sk2,
- bool match_wildcard))
+ struct inet_listen_hashbucket *ilb)
{
struct inet_bind_bucket *tb = inet_csk(sk)->icsk_bind_hash;
struct sock *sk2;
@@ -451,7 +448,7 @@ static int inet_reuseport_add_sock(struct sock *sk,
sk2->sk_bound_dev_if == sk->sk_bound_dev_if &&
inet_csk(sk2)->icsk_bind_hash == tb &&
sk2->sk_reuseport && uid_eq(uid, sock_i_uid(sk2)) &&
- saddr_same(sk, sk2, false))
+ inet_rcv_saddr_equal(sk, sk2, false))
return reuseport_add_sock(sk, sk2);
}
@@ -461,10 +458,7 @@ static int inet_reuseport_add_sock(struct sock *sk,
return 0;
}
-int __inet_hash(struct sock *sk, struct sock *osk,
- int (*saddr_same)(const struct sock *sk1,
- const struct sock *sk2,
- bool match_wildcard))
+int __inet_hash(struct sock *sk, struct sock *osk)
{
struct inet_hashinfo *hashinfo = sk->sk_prot->h.hashinfo;
struct inet_listen_hashbucket *ilb;
@@ -479,7 +473,7 @@ int __inet_hash(struct sock *sk, struct sock *osk,
spin_lock(&ilb->lock);
if (sk->sk_reuseport) {
- err = inet_reuseport_add_sock(sk, ilb, saddr_same);
+ err = inet_reuseport_add_sock(sk, ilb);
if (err)
goto unlock;
}
@@ -503,7 +497,7 @@ int inet_hash(struct sock *sk)
if (sk->sk_state != TCP_CLOSE) {
local_bh_disable();
- err = __inet_hash(sk, NULL, ipv4_rcv_saddr_equal);
+ err = __inet_hash(sk, NULL);
local_bh_enable();
}
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 4318d72..d6dddcf 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -137,11 +137,7 @@ EXPORT_SYMBOL(udp_memory_allocated);
static int udp_lib_lport_inuse(struct net *net, __u16 num,
const struct udp_hslot *hslot,
unsigned long *bitmap,
- struct sock *sk,
- int (*saddr_comp)(const struct sock *sk1,
- const struct sock *sk2,
- bool match_wildcard),
- unsigned int log)
+ struct sock *sk, unsigned int log)
{
struct sock *sk2;
kuid_t uid = sock_i_uid(sk);
@@ -153,7 +149,7 @@ static int udp_lib_lport_inuse(struct net *net, __u16 num,
(!sk2->sk_reuse || !sk->sk_reuse) &&
(!sk2->sk_bound_dev_if || !sk->sk_bound_dev_if ||
sk2->sk_bound_dev_if == sk->sk_bound_dev_if) &&
- saddr_comp(sk, sk2, true)) {
+ inet_rcv_saddr_equal(sk, sk2, true)) {
if (sk2->sk_reuseport && sk->sk_reuseport &&
!rcu_access_pointer(sk->sk_reuseport_cb) &&
uid_eq(uid, sock_i_uid(sk2))) {
@@ -176,10 +172,7 @@ static int udp_lib_lport_inuse(struct net *net, __u16 num,
*/
static int udp_lib_lport_inuse2(struct net *net, __u16 num,
struct udp_hslot *hslot2,
- struct sock *sk,
- int (*saddr_comp)(const struct sock *sk1,
- const struct sock *sk2,
- bool match_wildcard))
+ struct sock *sk)
{
struct sock *sk2;
kuid_t uid = sock_i_uid(sk);
@@ -193,7 +186,7 @@ static int udp_lib_lport_inuse2(struct net *net, __u16 num,
(!sk2->sk_reuse || !sk->sk_reuse) &&
(!sk2->sk_bound_dev_if || !sk->sk_bound_dev_if ||
sk2->sk_bound_dev_if == sk->sk_bound_dev_if) &&
- saddr_comp(sk, sk2, true)) {
+ inet_rcv_saddr_equal(sk, sk2, true)) {
if (sk2->sk_reuseport && sk->sk_reuseport &&
!rcu_access_pointer(sk->sk_reuseport_cb) &&
uid_eq(uid, sock_i_uid(sk2))) {
@@ -208,10 +201,7 @@ static int udp_lib_lport_inuse2(struct net *net, __u16 num,
return res;
}
-static int udp_reuseport_add_sock(struct sock *sk, struct udp_hslot *hslot,
- int (*saddr_same)(const struct sock *sk1,
- const struct sock *sk2,
- bool match_wildcard))
+static int udp_reuseport_add_sock(struct sock *sk, struct udp_hslot *hslot)
{
struct net *net = sock_net(sk);
kuid_t uid = sock_i_uid(sk);
@@ -225,7 +215,7 @@ static int udp_reuseport_add_sock(struct sock *sk, struct udp_hslot *hslot,
(udp_sk(sk2)->udp_port_hash == udp_sk(sk)->udp_port_hash) &&
(sk2->sk_bound_dev_if == sk->sk_bound_dev_if) &&
sk2->sk_reuseport && uid_eq(uid, sock_i_uid(sk2)) &&
- (*saddr_same)(sk, sk2, false)) {
+ inet_rcv_saddr_equal(sk, sk2, false)) {
return reuseport_add_sock(sk, sk2);
}
}
@@ -241,14 +231,10 @@ static int udp_reuseport_add_sock(struct sock *sk, struct udp_hslot *hslot,
*
* @sk: socket struct in question
* @snum: port number to look up
- * @saddr_comp: AF-dependent comparison of bound local IP addresses
* @hash2_nulladdr: AF-dependent hash value in secondary hash chains,
* with NULL address
*/
int udp_lib_get_port(struct sock *sk, unsigned short snum,
- int (*saddr_comp)(const struct sock *sk1,
- const struct sock *sk2,
- bool match_wildcard),
unsigned int hash2_nulladdr)
{
struct udp_hslot *hslot, *hslot2;
@@ -277,7 +263,7 @@ int udp_lib_get_port(struct sock *sk, unsigned short snum,
bitmap_zero(bitmap, PORTS_PER_CHAIN);
spin_lock_bh(&hslot->lock);
udp_lib_lport_inuse(net, snum, hslot, bitmap, sk,
- saddr_comp, udptable->log);
+ udptable->log);
snum = first;
/*
@@ -310,12 +296,11 @@ int udp_lib_get_port(struct sock *sk, unsigned short snum,
if (hslot->count < hslot2->count)
goto scan_primary_hash;
- exist = udp_lib_lport_inuse2(net, snum, hslot2,
- sk, saddr_comp);
+ exist = udp_lib_lport_inuse2(net, snum, hslot2, sk);
if (!exist && (hash2_nulladdr != slot2)) {
hslot2 = udp_hashslot2(udptable, hash2_nulladdr);
exist = udp_lib_lport_inuse2(net, snum, hslot2,
- sk, saddr_comp);
+ sk);
}
if (exist)
goto fail_unlock;
@@ -323,8 +308,7 @@ int udp_lib_get_port(struct sock *sk, unsigned short snum,
goto found;
}
scan_primary_hash:
- if (udp_lib_lport_inuse(net, snum, hslot, NULL, sk,
- saddr_comp, 0))
+ if (udp_lib_lport_inuse(net, snum, hslot, NULL, sk, 0))
goto fail_unlock;
}
found:
@@ -333,7 +317,7 @@ int udp_lib_get_port(struct sock *sk, unsigned short snum,
udp_sk(sk)->udp_portaddr_hash ^= snum;
if (sk_unhashed(sk)) {
if (sk->sk_reuseport &&
- udp_reuseport_add_sock(sk, hslot, saddr_comp)) {
+ udp_reuseport_add_sock(sk, hslot)) {
inet_sk(sk)->inet_num = 0;
udp_sk(sk)->udp_port_hash = 0;
udp_sk(sk)->udp_portaddr_hash ^= snum;
@@ -365,24 +349,6 @@ int udp_lib_get_port(struct sock *sk, unsigned short snum,
}
EXPORT_SYMBOL(udp_lib_get_port);
-/* match_wildcard == true: 0.0.0.0 equals to any IPv4 addresses
- * match_wildcard == false: addresses must be exactly the same, i.e.
- * 0.0.0.0 only equals to 0.0.0.0
- */
-int ipv4_rcv_saddr_equal(const struct sock *sk1, const struct sock *sk2,
- bool match_wildcard)
-{
- struct inet_sock *inet1 = inet_sk(sk1), *inet2 = inet_sk(sk2);
-
- if (!ipv6_only_sock(sk2)) {
- if (inet1->inet_rcv_saddr == inet2->inet_rcv_saddr)
- return 1;
- if (!inet1->inet_rcv_saddr || !inet2->inet_rcv_saddr)
- return match_wildcard;
- }
- return 0;
-}
-
static u32 udp4_portaddr_hash(const struct net *net, __be32 saddr,
unsigned int port)
{
@@ -398,7 +364,7 @@ int udp_v4_get_port(struct sock *sk, unsigned short snum)
/* precompute partial secondary hash */
udp_sk(sk)->udp_portaddr_hash = hash2_partial;
- return udp_lib_get_port(sk, snum, ipv4_rcv_saddr_equal, hash2_nulladdr);
+ return udp_lib_get_port(sk, snum, hash2_nulladdr);
}
static int compute_score(struct sock *sk, struct net *net,
diff --git a/net/ipv6/inet6_connection_sock.c b/net/ipv6/inet6_connection_sock.c
index 7396e75..55ee2ea 100644
--- a/net/ipv6/inet6_connection_sock.c
+++ b/net/ipv6/inet6_connection_sock.c
@@ -54,12 +54,12 @@ int inet6_csk_bind_conflict(const struct sock *sk,
(sk2->sk_state != TCP_TIME_WAIT &&
!uid_eq(uid,
sock_i_uid((struct sock *)sk2))))) {
- if (ipv6_rcv_saddr_equal(sk, sk2, true))
+ if (inet_rcv_saddr_equal(sk, sk2, true))
break;
}
if (!relax && reuse && sk2->sk_reuse &&
sk2->sk_state != TCP_LISTEN &&
- ipv6_rcv_saddr_equal(sk, sk2, true))
+ inet_rcv_saddr_equal(sk, sk2, true))
break;
}
}
diff --git a/net/ipv6/inet6_hashtables.c b/net/ipv6/inet6_hashtables.c
index 02761c9..d090091 100644
--- a/net/ipv6/inet6_hashtables.c
+++ b/net/ipv6/inet6_hashtables.c
@@ -268,54 +268,10 @@ int inet6_hash(struct sock *sk)
if (sk->sk_state != TCP_CLOSE) {
local_bh_disable();
- err = __inet_hash(sk, NULL, ipv6_rcv_saddr_equal);
+ err = __inet_hash(sk, NULL);
local_bh_enable();
}
return err;
}
EXPORT_SYMBOL_GPL(inet6_hash);
-
-/* match_wildcard == true: IPV6_ADDR_ANY equals to any IPv6 addresses if IPv6
- * only, and any IPv4 addresses if not IPv6 only
- * match_wildcard == false: addresses must be exactly the same, i.e.
- * IPV6_ADDR_ANY only equals to IPV6_ADDR_ANY,
- * and 0.0.0.0 equals to 0.0.0.0 only
- */
-int ipv6_rcv_saddr_equal(const struct sock *sk, const struct sock *sk2,
- bool match_wildcard)
-{
- const struct in6_addr *sk2_rcv_saddr6 = inet6_rcv_saddr(sk2);
- int sk2_ipv6only = inet_v6_ipv6only(sk2);
- int addr_type = ipv6_addr_type(&sk->sk_v6_rcv_saddr);
- int addr_type2 = sk2_rcv_saddr6 ? ipv6_addr_type(sk2_rcv_saddr6) : IPV6_ADDR_MAPPED;
-
- /* if both are mapped, treat as IPv4 */
- if (addr_type == IPV6_ADDR_MAPPED && addr_type2 == IPV6_ADDR_MAPPED) {
- if (!sk2_ipv6only) {
- if (sk->sk_rcv_saddr == sk2->sk_rcv_saddr)
- return 1;
- if (!sk->sk_rcv_saddr || !sk2->sk_rcv_saddr)
- return match_wildcard;
- }
- return 0;
- }
-
- if (addr_type == IPV6_ADDR_ANY && addr_type2 == IPV6_ADDR_ANY)
- return 1;
-
- if (addr_type2 == IPV6_ADDR_ANY && match_wildcard &&
- !(sk2_ipv6only && addr_type == IPV6_ADDR_MAPPED))
- return 1;
-
- if (addr_type == IPV6_ADDR_ANY && match_wildcard &&
- !(ipv6_only_sock(sk) && addr_type2 == IPV6_ADDR_MAPPED))
- return 1;
-
- if (sk2_rcv_saddr6 &&
- ipv6_addr_equal(&sk->sk_v6_rcv_saddr, sk2_rcv_saddr6))
- return 1;
-
- return 0;
-}
-EXPORT_SYMBOL_GPL(ipv6_rcv_saddr_equal);
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index 4d5c4ee..05d6932 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -103,7 +103,7 @@ int udp_v6_get_port(struct sock *sk, unsigned short snum)
/* precompute partial secondary hash */
udp_sk(sk)->udp_portaddr_hash = hash2_partial;
- return udp_lib_get_port(sk, snum, ipv6_rcv_saddr_equal, hash2_nulladdr);
+ return udp_lib_get_port(sk, snum, hash2_nulladdr);
}
static void udp_v6_rehash(struct sock *sk)
--
2.5.5
^ permalink raw reply related [flat|nested] 13+ messages in thread
* [PATCH 2/6 net-next] inet: drop ->bind_conflict
2017-01-11 20:06 [PATCH 0/6 net-next][V3] Rework inet_csk_get_port Josef Bacik
2017-01-11 20:19 ` [PATCH 1/6 net-next] inet: collapse ipv4/v6 rcv_saddr_equal functions into one Josef Bacik
@ 2017-01-11 20:22 ` Josef Bacik
2017-01-11 20:22 ` [PATCH 3/6 net-next] inet: kill smallest_size and smallest_port Josef Bacik
` (4 more replies)
1 sibling, 5 replies; 13+ messages in thread
From: Josef Bacik @ 2017-01-11 20:22 UTC (permalink / raw)
To: davem, hannes, kraigatgoog, eric.dumazet, tom, netdev,
kernel-team
The only difference between inet6_csk_bind_conflict and inet_csk_bind_conflict
is how they check the rcv_saddr, so delete this call back and simply
change inet_csk_bind_conflict to call inet_rcv_saddr_equal.
Signed-off-by: Josef Bacik <jbacik@fb.com>
---
include/net/inet6_connection_sock.h | 5 -----
include/net/inet_connection_sock.h | 6 ------
net/dccp/ipv4.c | 2 +-
net/dccp/ipv6.c | 2 --
net/ipv4/inet_connection_sock.c | 22 +++++++-------------
net/ipv4/tcp_ipv4.c | 2 +-
net/ipv6/inet6_connection_sock.c | 40 -------------------------------------
net/ipv6/tcp_ipv6.c | 2 --
8 files changed, 9 insertions(+), 72 deletions(-)
diff --git a/include/net/inet6_connection_sock.h b/include/net/inet6_connection_sock.h
index 3212b39..8ec87b6 100644
--- a/include/net/inet6_connection_sock.h
+++ b/include/net/inet6_connection_sock.h
@@ -15,16 +15,11 @@
#include <linux/types.h>
-struct inet_bind_bucket;
struct request_sock;
struct sk_buff;
struct sock;
struct sockaddr;
-int inet6_csk_bind_conflict(const struct sock *sk,
- const struct inet_bind_bucket *tb, bool relax,
- bool soreuseport_ok);
-
struct dst_entry *inet6_csk_route_req(const struct sock *sk, struct flowi6 *fl6,
const struct request_sock *req, u8 proto);
diff --git a/include/net/inet_connection_sock.h b/include/net/inet_connection_sock.h
index 85ee387..add75c7 100644
--- a/include/net/inet_connection_sock.h
+++ b/include/net/inet_connection_sock.h
@@ -62,9 +62,6 @@ struct inet_connection_sock_af_ops {
char __user *optval, int __user *optlen);
#endif
void (*addr2sockaddr)(struct sock *sk, struct sockaddr *);
- int (*bind_conflict)(const struct sock *sk,
- const struct inet_bind_bucket *tb,
- bool relax, bool soreuseport_ok);
void (*mtu_reduced)(struct sock *sk);
};
@@ -261,9 +258,6 @@ inet_csk_rto_backoff(const struct inet_connection_sock *icsk,
struct sock *inet_csk_accept(struct sock *sk, int flags, int *err);
-int inet_csk_bind_conflict(const struct sock *sk,
- const struct inet_bind_bucket *tb, bool relax,
- bool soreuseport_ok);
int inet_csk_get_port(struct sock *sk, unsigned short snum);
struct dst_entry *inet_csk_route_req(const struct sock *sk, struct flowi4 *fl4,
diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c
index d859a5c..ed6f99b 100644
--- a/net/dccp/ipv4.c
+++ b/net/dccp/ipv4.c
@@ -17,6 +17,7 @@
#include <linux/skbuff.h>
#include <linux/random.h>
+#include <net/addrconf.h>
#include <net/icmp.h>
#include <net/inet_common.h>
#include <net/inet_hashtables.h>
@@ -904,7 +905,6 @@ static const struct inet_connection_sock_af_ops dccp_ipv4_af_ops = {
.getsockopt = ip_getsockopt,
.addr2sockaddr = inet_csk_addr2sockaddr,
.sockaddr_len = sizeof(struct sockaddr_in),
- .bind_conflict = inet_csk_bind_conflict,
#ifdef CONFIG_COMPAT
.compat_setsockopt = compat_ip_setsockopt,
.compat_getsockopt = compat_ip_getsockopt,
diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c
index adfc790..08bcdc3 100644
--- a/net/dccp/ipv6.c
+++ b/net/dccp/ipv6.c
@@ -937,7 +937,6 @@ static const struct inet_connection_sock_af_ops dccp_ipv6_af_ops = {
.getsockopt = ipv6_getsockopt,
.addr2sockaddr = inet6_csk_addr2sockaddr,
.sockaddr_len = sizeof(struct sockaddr_in6),
- .bind_conflict = inet6_csk_bind_conflict,
#ifdef CONFIG_COMPAT
.compat_setsockopt = compat_ipv6_setsockopt,
.compat_getsockopt = compat_ipv6_getsockopt,
@@ -958,7 +957,6 @@ static const struct inet_connection_sock_af_ops dccp_ipv6_mapped = {
.getsockopt = ipv6_getsockopt,
.addr2sockaddr = inet6_csk_addr2sockaddr,
.sockaddr_len = sizeof(struct sockaddr_in6),
- .bind_conflict = inet6_csk_bind_conflict,
#ifdef CONFIG_COMPAT
.compat_setsockopt = compat_ipv6_setsockopt,
.compat_getsockopt = compat_ipv6_getsockopt,
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index ba597cb..a1c9055 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -116,9 +116,9 @@ void inet_get_local_port_range(struct net *net, int *low, int *high)
}
EXPORT_SYMBOL(inet_get_local_port_range);
-int inet_csk_bind_conflict(const struct sock *sk,
- const struct inet_bind_bucket *tb, bool relax,
- bool reuseport_ok)
+static int inet_csk_bind_conflict(const struct sock *sk,
+ const struct inet_bind_bucket *tb,
+ bool relax, bool reuseport_ok)
{
struct sock *sk2;
bool reuse = sk->sk_reuse;
@@ -134,7 +134,6 @@ int inet_csk_bind_conflict(const struct sock *sk,
sk_for_each_bound(sk2, &tb->owners) {
if (sk != sk2 &&
- !inet_v6_ipv6only(sk2) &&
(!sk->sk_bound_dev_if ||
!sk2->sk_bound_dev_if ||
sk->sk_bound_dev_if == sk2->sk_bound_dev_if)) {
@@ -144,23 +143,18 @@ int inet_csk_bind_conflict(const struct sock *sk,
rcu_access_pointer(sk->sk_reuseport_cb) ||
(sk2->sk_state != TCP_TIME_WAIT &&
!uid_eq(uid, sock_i_uid(sk2))))) {
-
- if (!sk2->sk_rcv_saddr || !sk->sk_rcv_saddr ||
- sk2->sk_rcv_saddr == sk->sk_rcv_saddr)
+ if (inet_rcv_saddr_equal(sk, sk2, true))
break;
}
if (!relax && reuse && sk2->sk_reuse &&
sk2->sk_state != TCP_LISTEN) {
-
- if (!sk2->sk_rcv_saddr || !sk->sk_rcv_saddr ||
- sk2->sk_rcv_saddr == sk->sk_rcv_saddr)
+ if (inet_rcv_saddr_equal(sk, sk2, true))
break;
}
}
}
return sk2 != NULL;
}
-EXPORT_SYMBOL_GPL(inet_csk_bind_conflict);
/* Obtain a reference to a local port for the given sock,
* if snum is zero it means select any available local port.
@@ -239,8 +233,7 @@ int inet_csk_get_port(struct sock *sk, unsigned short snum)
smallest_size = tb->num_owners;
smallest_port = port;
}
- if (!inet_csk(sk)->icsk_af_ops->bind_conflict(sk, tb, false,
- reuseport_ok))
+ if (!inet_csk_bind_conflict(sk, tb, false, reuseport_ok))
goto tb_found;
goto next_port;
}
@@ -281,8 +274,7 @@ int inet_csk_get_port(struct sock *sk, unsigned short snum)
sk->sk_reuseport && uid_eq(tb->fastuid, uid))) &&
smallest_size == -1)
goto success;
- if (inet_csk(sk)->icsk_af_ops->bind_conflict(sk, tb, true,
- reuseport_ok)) {
+ if (inet_csk_bind_conflict(sk, tb, true, reuseport_ok)) {
if ((reuse ||
(tb->fastreuseport > 0 &&
sk->sk_reuseport &&
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 56d756e..dc07734 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -63,6 +63,7 @@
#include <linux/times.h>
#include <linux/slab.h>
+#include <net/addrconf.h>
#include <net/net_namespace.h>
#include <net/icmp.h>
#include <net/inet_hashtables.h>
@@ -1817,7 +1818,6 @@ const struct inet_connection_sock_af_ops ipv4_specific = {
.getsockopt = ip_getsockopt,
.addr2sockaddr = inet_csk_addr2sockaddr,
.sockaddr_len = sizeof(struct sockaddr_in),
- .bind_conflict = inet_csk_bind_conflict,
#ifdef CONFIG_COMPAT
.compat_setsockopt = compat_ip_setsockopt,
.compat_getsockopt = compat_ip_getsockopt,
diff --git a/net/ipv6/inet6_connection_sock.c b/net/ipv6/inet6_connection_sock.c
index 55ee2ea..97074c4 100644
--- a/net/ipv6/inet6_connection_sock.c
+++ b/net/ipv6/inet6_connection_sock.c
@@ -28,46 +28,6 @@
#include <net/inet6_connection_sock.h>
#include <net/sock_reuseport.h>
-int inet6_csk_bind_conflict(const struct sock *sk,
- const struct inet_bind_bucket *tb, bool relax,
- bool reuseport_ok)
-{
- const struct sock *sk2;
- bool reuse = !!sk->sk_reuse;
- bool reuseport = !!sk->sk_reuseport && reuseport_ok;
- kuid_t uid = sock_i_uid((struct sock *)sk);
-
- /* We must walk the whole port owner list in this case. -DaveM */
- /*
- * See comment in inet_csk_bind_conflict about sock lookup
- * vs net namespaces issues.
- */
- sk_for_each_bound(sk2, &tb->owners) {
- if (sk != sk2 &&
- (!sk->sk_bound_dev_if ||
- !sk2->sk_bound_dev_if ||
- sk->sk_bound_dev_if == sk2->sk_bound_dev_if)) {
- if ((!reuse || !sk2->sk_reuse ||
- sk2->sk_state == TCP_LISTEN) &&
- (!reuseport || !sk2->sk_reuseport ||
- rcu_access_pointer(sk->sk_reuseport_cb) ||
- (sk2->sk_state != TCP_TIME_WAIT &&
- !uid_eq(uid,
- sock_i_uid((struct sock *)sk2))))) {
- if (inet_rcv_saddr_equal(sk, sk2, true))
- break;
- }
- if (!relax && reuse && sk2->sk_reuse &&
- sk2->sk_state != TCP_LISTEN &&
- inet_rcv_saddr_equal(sk, sk2, true))
- break;
- }
- }
-
- return sk2 != NULL;
-}
-EXPORT_SYMBOL_GPL(inet6_csk_bind_conflict);
-
struct dst_entry *inet6_csk_route_req(const struct sock *sk,
struct flowi6 *fl6,
const struct request_sock *req,
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 228965d..13ba53a 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -1621,7 +1621,6 @@ static const struct inet_connection_sock_af_ops ipv6_specific = {
.getsockopt = ipv6_getsockopt,
.addr2sockaddr = inet6_csk_addr2sockaddr,
.sockaddr_len = sizeof(struct sockaddr_in6),
- .bind_conflict = inet6_csk_bind_conflict,
#ifdef CONFIG_COMPAT
.compat_setsockopt = compat_ipv6_setsockopt,
.compat_getsockopt = compat_ipv6_getsockopt,
@@ -1652,7 +1651,6 @@ static const struct inet_connection_sock_af_ops ipv6_mapped = {
.getsockopt = ipv6_getsockopt,
.addr2sockaddr = inet6_csk_addr2sockaddr,
.sockaddr_len = sizeof(struct sockaddr_in6),
- .bind_conflict = inet6_csk_bind_conflict,
#ifdef CONFIG_COMPAT
.compat_setsockopt = compat_ipv6_setsockopt,
.compat_getsockopt = compat_ipv6_getsockopt,
--
2.5.5
^ permalink raw reply related [flat|nested] 13+ messages in thread
* [PATCH 3/6 net-next] inet: kill smallest_size and smallest_port
2017-01-11 20:22 ` [PATCH 2/6 net-next] inet: drop ->bind_conflict Josef Bacik
@ 2017-01-11 20:22 ` Josef Bacik
2017-01-11 20:22 ` [PATCH 4/6 net-next] inet: don't check for bind conflicts twice when searching for a port Josef Bacik
` (3 subsequent siblings)
4 siblings, 0 replies; 13+ messages in thread
From: Josef Bacik @ 2017-01-11 20:22 UTC (permalink / raw)
To: davem, hannes, kraigatgoog, eric.dumazet, tom, netdev,
kernel-team
In inet_csk_get_port we seem to be using smallest_port to figure out where the
best place to look for a SO_REUSEPORT sk that matches with an existing set of
SO_REUSEPORT's. However if we get to the logic
if (smallest_size != -1) {
port = smallest_port;
goto have_port;
}
we will do a useless search, because we would have already done the
inet_csk_bind_conflict for that port and it would have returned 1, otherwise we
would have gone to found_tb and succeeded. Since this logic makes us do yet
another trip through inet_csk_bind_conflict for a port we know won't work just
delete this code and save us the time.
Signed-off-by: Josef Bacik <jbacik@fb.com>
---
include/net/inet_hashtables.h | 1 -
net/ipv4/inet_connection_sock.c | 26 ++++----------------------
net/ipv4/inet_hashtables.c | 3 ---
3 files changed, 4 insertions(+), 26 deletions(-)
diff --git a/include/net/inet_hashtables.h b/include/net/inet_hashtables.h
index 756ed16..3fc0366 100644
--- a/include/net/inet_hashtables.h
+++ b/include/net/inet_hashtables.h
@@ -80,7 +80,6 @@ struct inet_bind_bucket {
signed char fastreuse;
signed char fastreuseport;
kuid_t fastuid;
- int num_owners;
struct hlist_node node;
struct hlist_head owners;
};
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index a1c9055..d352366 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -165,7 +165,6 @@ int inet_csk_get_port(struct sock *sk, unsigned short snum)
bool reuse = sk->sk_reuse && sk->sk_state != TCP_LISTEN;
struct inet_hashinfo *hinfo = sk->sk_prot->h.hashinfo;
int ret = 1, attempts = 5, port = snum;
- int smallest_size = -1, smallest_port;
struct inet_bind_hashbucket *head;
struct net *net = sock_net(sk);
int i, low, high, attempt_half;
@@ -175,7 +174,6 @@ int inet_csk_get_port(struct sock *sk, unsigned short snum)
bool reuseport_ok = !!snum;
if (port) {
-have_port:
head = &hinfo->bhash[inet_bhashfn(net, port,
hinfo->bhash_size)];
spin_lock_bh(&head->lock);
@@ -209,8 +207,6 @@ int inet_csk_get_port(struct sock *sk, unsigned short snum)
* We do the opposite to not pollute connect() users.
*/
offset |= 1U;
- smallest_size = -1;
- smallest_port = low; /* avoid compiler warning */
other_parity_scan:
port = low + offset;
@@ -224,15 +220,6 @@ int inet_csk_get_port(struct sock *sk, unsigned short snum)
spin_lock_bh(&head->lock);
inet_bind_bucket_for_each(tb, &head->chain)
if (net_eq(ib_net(tb), net) && tb->port == port) {
- if (((tb->fastreuse > 0 && reuse) ||
- (tb->fastreuseport > 0 &&
- sk->sk_reuseport &&
- !rcu_access_pointer(sk->sk_reuseport_cb) &&
- uid_eq(tb->fastuid, uid))) &&
- (tb->num_owners < smallest_size || smallest_size == -1)) {
- smallest_size = tb->num_owners;
- smallest_port = port;
- }
if (!inet_csk_bind_conflict(sk, tb, false, reuseport_ok))
goto tb_found;
goto next_port;
@@ -243,10 +230,6 @@ int inet_csk_get_port(struct sock *sk, unsigned short snum)
cond_resched();
}
- if (smallest_size != -1) {
- port = smallest_port;
- goto have_port;
- }
offset--;
if (!(offset & 1))
goto other_parity_scan;
@@ -268,19 +251,18 @@ int inet_csk_get_port(struct sock *sk, unsigned short snum)
if (sk->sk_reuse == SK_FORCE_REUSE)
goto success;
- if (((tb->fastreuse > 0 && reuse) ||
+ if ((tb->fastreuse > 0 && reuse) ||
(tb->fastreuseport > 0 &&
!rcu_access_pointer(sk->sk_reuseport_cb) &&
- sk->sk_reuseport && uid_eq(tb->fastuid, uid))) &&
- smallest_size == -1)
+ sk->sk_reuseport && uid_eq(tb->fastuid, uid)))
goto success;
if (inet_csk_bind_conflict(sk, tb, true, reuseport_ok)) {
if ((reuse ||
(tb->fastreuseport > 0 &&
sk->sk_reuseport &&
!rcu_access_pointer(sk->sk_reuseport_cb) &&
- uid_eq(tb->fastuid, uid))) &&
- !snum && smallest_size != -1 && --attempts >= 0) {
+ uid_eq(tb->fastuid, uid))) && !snum &&
+ --attempts >= 0) {
spin_unlock_bh(&head->lock);
goto again;
}
diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c
index 2ef9b01..8bea742 100644
--- a/net/ipv4/inet_hashtables.c
+++ b/net/ipv4/inet_hashtables.c
@@ -73,7 +73,6 @@ struct inet_bind_bucket *inet_bind_bucket_create(struct kmem_cache *cachep,
tb->port = snum;
tb->fastreuse = 0;
tb->fastreuseport = 0;
- tb->num_owners = 0;
INIT_HLIST_HEAD(&tb->owners);
hlist_add_head(&tb->node, &head->chain);
}
@@ -96,7 +95,6 @@ void inet_bind_hash(struct sock *sk, struct inet_bind_bucket *tb,
{
inet_sk(sk)->inet_num = snum;
sk_add_bind_node(sk, &tb->owners);
- tb->num_owners++;
inet_csk(sk)->icsk_bind_hash = tb;
}
@@ -114,7 +112,6 @@ static void __inet_put_port(struct sock *sk)
spin_lock(&head->lock);
tb = inet_csk(sk)->icsk_bind_hash;
__sk_del_bind_node(sk);
- tb->num_owners--;
inet_csk(sk)->icsk_bind_hash = NULL;
inet_sk(sk)->inet_num = 0;
inet_bind_bucket_destroy(hashinfo->bind_bucket_cachep, tb);
--
2.5.5
^ permalink raw reply related [flat|nested] 13+ messages in thread
* [PATCH 4/6 net-next] inet: don't check for bind conflicts twice when searching for a port
2017-01-11 20:22 ` [PATCH 2/6 net-next] inet: drop ->bind_conflict Josef Bacik
2017-01-11 20:22 ` [PATCH 3/6 net-next] inet: kill smallest_size and smallest_port Josef Bacik
@ 2017-01-11 20:22 ` Josef Bacik
2017-01-11 20:22 ` [PATCH 5/6 net-next] inet: split inet_csk_get_port into two functions Josef Bacik
` (2 subsequent siblings)
4 siblings, 0 replies; 13+ messages in thread
From: Josef Bacik @ 2017-01-11 20:22 UTC (permalink / raw)
To: davem, hannes, kraigatgoog, eric.dumazet, tom, netdev,
kernel-team
This is just wasted time, we've already found a tb that doesn't have a bind
conflict, and we don't drop the head lock so scanning again isn't going to give
us a different answer. Instead move the tb->reuse setting logic outside of the
found_tb path and put it in the success: path. Then make it so that we don't
goto again if we find a bind conflict in the found_tb path as we won't reach
this anymore when we are scanning for an ephemeral port.
Signed-off-by: Josef Bacik <jbacik@fb.com>
---
net/ipv4/inet_connection_sock.c | 31 +++++++++++--------------------
1 file changed, 11 insertions(+), 20 deletions(-)
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index d352366..f7e844d 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -164,7 +164,7 @@ int inet_csk_get_port(struct sock *sk, unsigned short snum)
{
bool reuse = sk->sk_reuse && sk->sk_state != TCP_LISTEN;
struct inet_hashinfo *hinfo = sk->sk_prot->h.hashinfo;
- int ret = 1, attempts = 5, port = snum;
+ int ret = 1, port = snum;
struct inet_bind_hashbucket *head;
struct net *net = sock_net(sk);
int i, low, high, attempt_half;
@@ -183,7 +183,6 @@ int inet_csk_get_port(struct sock *sk, unsigned short snum)
goto tb_not_found;
}
-again:
attempt_half = (sk->sk_reuse == SK_CAN_REUSE) ? 1 : 0;
other_half_scan:
inet_get_local_port_range(net, &low, &high);
@@ -221,7 +220,7 @@ int inet_csk_get_port(struct sock *sk, unsigned short snum)
inet_bind_bucket_for_each(tb, &head->chain)
if (net_eq(ib_net(tb), net) && tb->port == port) {
if (!inet_csk_bind_conflict(sk, tb, false, reuseport_ok))
- goto tb_found;
+ goto success;
goto next_port;
}
goto tb_not_found;
@@ -256,23 +255,11 @@ int inet_csk_get_port(struct sock *sk, unsigned short snum)
!rcu_access_pointer(sk->sk_reuseport_cb) &&
sk->sk_reuseport && uid_eq(tb->fastuid, uid)))
goto success;
- if (inet_csk_bind_conflict(sk, tb, true, reuseport_ok)) {
- if ((reuse ||
- (tb->fastreuseport > 0 &&
- sk->sk_reuseport &&
- !rcu_access_pointer(sk->sk_reuseport_cb) &&
- uid_eq(tb->fastuid, uid))) && !snum &&
- --attempts >= 0) {
- spin_unlock_bh(&head->lock);
- goto again;
- }
+ if (inet_csk_bind_conflict(sk, tb, true, reuseport_ok))
goto fail_unlock;
- }
- if (!reuse)
- tb->fastreuse = 0;
- if (!sk->sk_reuseport || !uid_eq(tb->fastuid, uid))
- tb->fastreuseport = 0;
- } else {
+ }
+success:
+ if (!hlist_empty(&tb->owners)) {
tb->fastreuse = reuse;
if (sk->sk_reuseport) {
tb->fastreuseport = 1;
@@ -280,8 +267,12 @@ int inet_csk_get_port(struct sock *sk, unsigned short snum)
} else {
tb->fastreuseport = 0;
}
+ } else {
+ if (!reuse)
+ tb->fastreuse = 0;
+ if (!sk->sk_reuseport || !uid_eq(tb->fastuid, uid))
+ tb->fastreuseport = 0;
}
-success:
if (!inet_csk(sk)->icsk_bind_hash)
inet_bind_hash(sk, tb, port);
WARN_ON(inet_csk(sk)->icsk_bind_hash != tb);
--
2.5.5
^ permalink raw reply related [flat|nested] 13+ messages in thread
* [PATCH 5/6 net-next] inet: split inet_csk_get_port into two functions
2017-01-11 20:22 ` [PATCH 2/6 net-next] inet: drop ->bind_conflict Josef Bacik
2017-01-11 20:22 ` [PATCH 3/6 net-next] inet: kill smallest_size and smallest_port Josef Bacik
2017-01-11 20:22 ` [PATCH 4/6 net-next] inet: don't check for bind conflicts twice when searching for a port Josef Bacik
@ 2017-01-11 20:22 ` Josef Bacik
2017-01-11 20:22 ` [PATCH 6/6 net-next] inet: reset tb->fastreuseport when adding a reuseport sk Josef Bacik
2017-01-12 19:56 ` [PATCH 2/6 net-next] inet: drop ->bind_conflict David Miller
4 siblings, 0 replies; 13+ messages in thread
From: Josef Bacik @ 2017-01-11 20:22 UTC (permalink / raw)
To: davem, hannes, kraigatgoog, eric.dumazet, tom, netdev,
kernel-team
inet_csk_get_port does two different things, it either scans for an open port,
or it tries to see if the specified port is available for use. Since these two
operations have different rules and are basically independent lets split them
into two different functions to make them both more readable.
Signed-off-by: Josef Bacik <jbacik@fb.com>
---
net/ipv4/inet_connection_sock.c | 66 +++++++++++++++++++++++++++--------------
1 file changed, 44 insertions(+), 22 deletions(-)
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index f7e844d..bbe2892 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -156,33 +156,21 @@ static int inet_csk_bind_conflict(const struct sock *sk,
return sk2 != NULL;
}
-/* Obtain a reference to a local port for the given sock,
- * if snum is zero it means select any available local port.
- * We try to allocate an odd port (and leave even ports for connect())
+/*
+ * Find an open port number for the socket. Returns with the
+ * inet_bind_hashbucket lock held.
*/
-int inet_csk_get_port(struct sock *sk, unsigned short snum)
+static struct inet_bind_hashbucket *
+inet_csk_find_open_port(struct sock *sk, struct inet_bind_bucket **tb_ret, int *port_ret)
{
- bool reuse = sk->sk_reuse && sk->sk_state != TCP_LISTEN;
struct inet_hashinfo *hinfo = sk->sk_prot->h.hashinfo;
- int ret = 1, port = snum;
+ int port = 0;
struct inet_bind_hashbucket *head;
struct net *net = sock_net(sk);
int i, low, high, attempt_half;
struct inet_bind_bucket *tb;
- kuid_t uid = sock_i_uid(sk);
u32 remaining, offset;
- bool reuseport_ok = !!snum;
- if (port) {
- head = &hinfo->bhash[inet_bhashfn(net, port,
- hinfo->bhash_size)];
- spin_lock_bh(&head->lock);
- inet_bind_bucket_for_each(tb, &head->chain)
- if (net_eq(ib_net(tb), net) && tb->port == port)
- goto tb_found;
-
- goto tb_not_found;
- }
attempt_half = (sk->sk_reuse == SK_CAN_REUSE) ? 1 : 0;
other_half_scan:
inet_get_local_port_range(net, &low, &high);
@@ -219,11 +207,12 @@ int inet_csk_get_port(struct sock *sk, unsigned short snum)
spin_lock_bh(&head->lock);
inet_bind_bucket_for_each(tb, &head->chain)
if (net_eq(ib_net(tb), net) && tb->port == port) {
- if (!inet_csk_bind_conflict(sk, tb, false, reuseport_ok))
+ if (!inet_csk_bind_conflict(sk, tb, false, false))
goto success;
goto next_port;
}
- goto tb_not_found;
+ tb = NULL;
+ goto success;
next_port:
spin_unlock_bh(&head->lock);
cond_resched();
@@ -238,8 +227,41 @@ int inet_csk_get_port(struct sock *sk, unsigned short snum)
attempt_half = 2;
goto other_half_scan;
}
- return ret;
+ return NULL;
+success:
+ *port_ret = port;
+ *tb_ret = tb;
+ return head;
+}
+
+/* Obtain a reference to a local port for the given sock,
+ * if snum is zero it means select any available local port.
+ * We try to allocate an odd port (and leave even ports for connect())
+ */
+int inet_csk_get_port(struct sock *sk, unsigned short snum)
+{
+ bool reuse = sk->sk_reuse && sk->sk_state != TCP_LISTEN;
+ struct inet_hashinfo *hinfo = sk->sk_prot->h.hashinfo;
+ int ret = 1, port = snum;
+ struct inet_bind_hashbucket *head;
+ struct net *net = sock_net(sk);
+ struct inet_bind_bucket *tb = NULL;
+ kuid_t uid = sock_i_uid(sk);
+ if (!port) {
+ head = inet_csk_find_open_port(sk, &tb, &port);
+ if (!head)
+ return ret;
+ if (!tb)
+ goto tb_not_found;
+ goto success;
+ }
+ head = &hinfo->bhash[inet_bhashfn(net, port,
+ hinfo->bhash_size)];
+ spin_lock_bh(&head->lock);
+ inet_bind_bucket_for_each(tb, &head->chain)
+ if (net_eq(ib_net(tb), net) && tb->port == port)
+ goto tb_found;
tb_not_found:
tb = inet_bind_bucket_create(hinfo->bind_bucket_cachep,
net, head, port);
@@ -255,7 +277,7 @@ int inet_csk_get_port(struct sock *sk, unsigned short snum)
!rcu_access_pointer(sk->sk_reuseport_cb) &&
sk->sk_reuseport && uid_eq(tb->fastuid, uid)))
goto success;
- if (inet_csk_bind_conflict(sk, tb, true, reuseport_ok))
+ if (inet_csk_bind_conflict(sk, tb, true, true))
goto fail_unlock;
}
success:
--
2.5.5
^ permalink raw reply related [flat|nested] 13+ messages in thread
* [PATCH 6/6 net-next] inet: reset tb->fastreuseport when adding a reuseport sk
2017-01-11 20:22 ` [PATCH 2/6 net-next] inet: drop ->bind_conflict Josef Bacik
` (2 preceding siblings ...)
2017-01-11 20:22 ` [PATCH 5/6 net-next] inet: split inet_csk_get_port into two functions Josef Bacik
@ 2017-01-11 20:22 ` Josef Bacik
2017-01-12 19:56 ` [PATCH 2/6 net-next] inet: drop ->bind_conflict David Miller
4 siblings, 0 replies; 13+ messages in thread
From: Josef Bacik @ 2017-01-11 20:22 UTC (permalink / raw)
To: davem, hannes, kraigatgoog, eric.dumazet, tom, netdev,
kernel-team
If we have non reuseport sockets on a tb we will set tb->fastreuseport to 0 and
never set it again. Which means that in the future if we end up adding a bunch
of reuseport sk's to that tb we'll have to do the expensive scan every time.
Instead add the ipv4/ipv6 saddr fields to the bind bucket, as well as the family
so we know what comparison to make, and the ipv6 only setting so we can make
sure to compare with new sockets appropriately. Once one sk has made it onto
the list we know that there are no potential bind conflicts on the owners list
that match that sk's rcv_addr. So copy the sk's information into our bind
bucket and set tb->fastruseport to FASTREUSESOCK_STRICT so we know we have to do
an extra check for subsequent reuseport sockets and skip the expensive bind
conflict check.
Signed-off-by: Josef Bacik <jbacik@fb.com>
---
include/net/inet_hashtables.h | 9 ++++
net/ipv4/inet_connection_sock.c | 106 ++++++++++++++++++++++++++++++++--------
2 files changed, 95 insertions(+), 20 deletions(-)
diff --git a/include/net/inet_hashtables.h b/include/net/inet_hashtables.h
index 3fc0366..1178931 100644
--- a/include/net/inet_hashtables.h
+++ b/include/net/inet_hashtables.h
@@ -74,12 +74,21 @@ struct inet_ehash_bucket {
* users logged onto your box, isn't it nice to know that new data
* ports are created in O(1) time? I thought so. ;-) -DaveM
*/
+#define FASTREUSEPORT_ANY 1
+#define FASTREUSEPORT_STRICT 2
+
struct inet_bind_bucket {
possible_net_t ib_net;
unsigned short port;
signed char fastreuse;
signed char fastreuseport;
kuid_t fastuid;
+#if IS_ENABLED(CONFIG_IPV6)
+ struct in6_addr fast_v6_rcv_saddr;
+#endif
+ __be32 fast_rcv_saddr;
+ unsigned short fast_sk_family;
+ bool fast_ipv6_only;
struct hlist_node node;
struct hlist_head owners;
};
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index bbe2892..096a085 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -38,20 +38,21 @@ EXPORT_SYMBOL(inet_csk_timer_bug_msg);
* IPV6_ADDR_ANY only equals to IPV6_ADDR_ANY,
* and 0.0.0.0 equals to 0.0.0.0 only
*/
-static int ipv6_rcv_saddr_equal(const struct sock *sk, const struct sock *sk2,
+static int ipv6_rcv_saddr_equal(const struct in6_addr *sk1_rcv_saddr6,
+ const struct in6_addr *sk2_rcv_saddr6,
+ __be32 sk1_rcv_saddr, __be32 sk2_rcv_saddr,
+ bool sk1_ipv6only, bool sk2_ipv6only,
bool match_wildcard)
{
- const struct in6_addr *sk2_rcv_saddr6 = inet6_rcv_saddr(sk2);
- int sk2_ipv6only = inet_v6_ipv6only(sk2);
- int addr_type = ipv6_addr_type(&sk->sk_v6_rcv_saddr);
+ int addr_type = ipv6_addr_type(sk1_rcv_saddr6);
int addr_type2 = sk2_rcv_saddr6 ? ipv6_addr_type(sk2_rcv_saddr6) : IPV6_ADDR_MAPPED;
/* if both are mapped, treat as IPv4 */
if (addr_type == IPV6_ADDR_MAPPED && addr_type2 == IPV6_ADDR_MAPPED) {
if (!sk2_ipv6only) {
- if (sk->sk_rcv_saddr == sk2->sk_rcv_saddr)
+ if (sk1_rcv_saddr == sk2_rcv_saddr)
return 1;
- if (!sk->sk_rcv_saddr || !sk2->sk_rcv_saddr)
+ if (!sk1_rcv_saddr || !sk2_rcv_saddr)
return match_wildcard;
}
return 0;
@@ -65,11 +66,11 @@ static int ipv6_rcv_saddr_equal(const struct sock *sk, const struct sock *sk2,
return 1;
if (addr_type == IPV6_ADDR_ANY && match_wildcard &&
- !(ipv6_only_sock(sk) && addr_type2 == IPV6_ADDR_MAPPED))
+ !(sk1_ipv6only && addr_type2 == IPV6_ADDR_MAPPED))
return 1;
if (sk2_rcv_saddr6 &&
- ipv6_addr_equal(&sk->sk_v6_rcv_saddr, sk2_rcv_saddr6))
+ ipv6_addr_equal(sk1_rcv_saddr6, sk2_rcv_saddr6))
return 1;
return 0;
@@ -80,13 +81,13 @@ static int ipv6_rcv_saddr_equal(const struct sock *sk, const struct sock *sk2,
* match_wildcard == false: addresses must be exactly the same, i.e.
* 0.0.0.0 only equals to 0.0.0.0
*/
-static int ipv4_rcv_saddr_equal(const struct sock *sk, const struct sock *sk2,
- bool match_wildcard)
+static int ipv4_rcv_saddr_equal(__be32 sk1_rcv_saddr, __be32 sk2_rcv_saddr,
+ bool sk2_ipv6only, bool match_wildcard)
{
- if (!ipv6_only_sock(sk2)) {
- if (sk->sk_rcv_saddr == sk2->sk_rcv_saddr)
+ if (!sk2_ipv6only) {
+ if (sk1_rcv_saddr == sk2_rcv_saddr)
return 1;
- if (!sk->sk_rcv_saddr || !sk2->sk_rcv_saddr)
+ if (!sk1_rcv_saddr || !sk2_rcv_saddr)
return match_wildcard;
}
return 0;
@@ -97,9 +98,16 @@ int inet_rcv_saddr_equal(const struct sock *sk, const struct sock *sk2,
{
#if IS_ENABLED(CONFIG_IPV6)
if (sk->sk_family == AF_INET6)
- return ipv6_rcv_saddr_equal(sk, sk2, match_wildcard);
+ return ipv6_rcv_saddr_equal(&sk->sk_v6_rcv_saddr,
+ &sk2->sk_v6_rcv_saddr,
+ sk->sk_rcv_saddr,
+ sk2->sk_rcv_saddr,
+ ipv6_only_sock(sk),
+ ipv6_only_sock(sk2),
+ match_wildcard);
#endif
- return ipv4_rcv_saddr_equal(sk, sk2, match_wildcard);
+ return ipv4_rcv_saddr_equal(sk->sk_rcv_saddr, sk2->sk_rcv_saddr,
+ ipv6_only_sock(sk2), match_wildcard);
}
EXPORT_SYMBOL(inet_rcv_saddr_equal);
@@ -234,6 +242,39 @@ inet_csk_find_open_port(struct sock *sk, struct inet_bind_bucket **tb_ret, int *
return head;
}
+static inline int sk_reuseport_match(struct inet_bind_bucket *tb,
+ struct sock *sk)
+{
+ kuid_t uid = sock_i_uid(sk);
+
+ if (tb->fastreuseport <= 0)
+ return 0;
+ if (!sk->sk_reuseport)
+ return 0;
+ if (rcu_access_pointer(sk->sk_reuseport_cb))
+ return 0;
+ if (!uid_eq(tb->fastuid, uid))
+ return 0;
+ /* We only need to check the rcv_saddr if this tb was once marked
+ * without fastreuseport and then was reset, as we can only know that
+ * the fast_*rcv_saddr doesn't have any conflicts with the socks on the
+ * owners list.
+ */
+ if (tb->fastreuseport == FASTREUSEPORT_ANY)
+ return 1;
+#if IS_ENABLED(CONFIG_IPV6)
+ if (tb->fast_sk_family == AF_INET6)
+ return ipv6_rcv_saddr_equal(&tb->fast_v6_rcv_saddr,
+ &sk->sk_v6_rcv_saddr,
+ tb->fast_rcv_saddr,
+ sk->sk_rcv_saddr,
+ tb->fast_ipv6_only,
+ ipv6_only_sock(sk), true);
+#endif
+ return ipv4_rcv_saddr_equal(tb->fast_rcv_saddr, sk->sk_rcv_saddr,
+ ipv6_only_sock(sk), true);
+}
+
/* Obtain a reference to a local port for the given sock,
* if snum is zero it means select any available local port.
* We try to allocate an odd port (and leave even ports for connect())
@@ -273,9 +314,7 @@ int inet_csk_get_port(struct sock *sk, unsigned short snum)
goto success;
if ((tb->fastreuse > 0 && reuse) ||
- (tb->fastreuseport > 0 &&
- !rcu_access_pointer(sk->sk_reuseport_cb) &&
- sk->sk_reuseport && uid_eq(tb->fastuid, uid)))
+ sk_reuseport_match(tb, sk))
goto success;
if (inet_csk_bind_conflict(sk, tb, true, true))
goto fail_unlock;
@@ -284,16 +323,43 @@ int inet_csk_get_port(struct sock *sk, unsigned short snum)
if (!hlist_empty(&tb->owners)) {
tb->fastreuse = reuse;
if (sk->sk_reuseport) {
- tb->fastreuseport = 1;
+ tb->fastreuseport = FASTREUSEPORT_ANY;
tb->fastuid = uid;
+ tb->fast_rcv_saddr = sk->sk_rcv_saddr;
+ tb->fast_ipv6_only = ipv6_only_sock(sk);
+#if IS_ENABLED(CONFIG_IPV6)
+ tb->fast_v6_rcv_saddr = sk->sk_v6_rcv_saddr;
+#endif
} else {
tb->fastreuseport = 0;
}
} else {
if (!reuse)
tb->fastreuse = 0;
- if (!sk->sk_reuseport || !uid_eq(tb->fastuid, uid))
+ if (sk->sk_reuseport) {
+ /* We didn't match or we don't have fastreuseport set on
+ * the tb, but we have sk_reuseport set on this socket
+ * and we know that there are no bind conflicts with
+ * this socket in this tb, so reset our tb's reuseport
+ * settings so that any subsequent sockets that match
+ * our current socket will be put on the fast path.
+ *
+ * If we reset we need to set FASTREUSEPORT_STRICT so we
+ * do extra checking for all subsequent sk_reuseport
+ * socks.
+ */
+ if (!sk_reuseport_match(tb, sk)) {
+ tb->fastreuseport = FASTREUSEPORT_STRICT;
+ tb->fastuid = uid;
+ tb->fast_rcv_saddr = sk->sk_rcv_saddr;
+ tb->fast_ipv6_only = ipv6_only_sock(sk);
+#if IS_ENABLED(CONFIG_IPV6)
+ tb->fast_v6_rcv_saddr = sk->sk_v6_rcv_saddr;
+#endif
+ }
+ } else {
tb->fastreuseport = 0;
+ }
}
if (!inet_csk(sk)->icsk_bind_hash)
inet_bind_hash(sk, tb, port);
--
2.5.5
^ permalink raw reply related [flat|nested] 13+ messages in thread
* Re: [PATCH 1/6 net-next] inet: collapse ipv4/v6 rcv_saddr_equal functions into one
@ 2017-01-12 17:41 Craig Gallek
2017-01-12 18:13 ` Josef Bacik
0 siblings, 1 reply; 13+ messages in thread
From: Craig Gallek @ 2017-01-12 17:41 UTC (permalink / raw)
To: Josef Bacik
Cc: David Miller, Hannes Frederic Sowa, Eric Dumazet, Tom Herbert,
netdev, kernel-team
On Wed, Jan 11, 2017 at 3:19 PM, Josef Bacik <jbacik@fb.com> wrote:
> +int inet_rcv_saddr_equal(const struct sock *sk, const struct sock *sk2,
> + bool match_wildcard)
> +{
> +#if IS_ENABLED(CONFIG_IPV6)
> + if (sk->sk_family == AF_INET6)
Still wrapping my head around this, so take it with a grain of salt,
but it's not obvious to me that sk and sk2 are guaranteed to have the
same family here (or if it even matters). Especially in the context
of the next patch which removes the bind_conflict callback... Does
this need to be an OR check for the family of either socket? Or is it
safe as-is because the first socket passed to this function is always
the existing one and the second one is the possible conflict socket?
^ permalink raw reply [flat|nested] 13+ messages in thread
* Re: [PATCH 1/6 net-next] inet: collapse ipv4/v6 rcv_saddr_equal functions into one
2017-01-12 17:41 Craig Gallek
@ 2017-01-12 18:13 ` Josef Bacik
0 siblings, 0 replies; 13+ messages in thread
From: Josef Bacik @ 2017-01-12 18:13 UTC (permalink / raw)
To: Craig Gallek
Cc: David Miller, Hannes Frederic Sowa, Eric Dumazet, Tom Herbert,
netdev, kernel-team
On Thu, Jan 12, 2017 at 12:41 PM, Craig Gallek <kraigatgoog@gmail.com>
wrote:
> On Wed, Jan 11, 2017 at 3:19 PM, Josef Bacik <jbacik@fb.com> wrote:
>> +int inet_rcv_saddr_equal(const struct sock *sk, const struct sock
>> *sk2,
>> + bool match_wildcard)
>> +{
>> +#if IS_ENABLED(CONFIG_IPV6)
>> + if (sk->sk_family == AF_INET6)
> Still wrapping my head around this, so take it with a grain of salt,
> but it's not obvious to me that sk and sk2 are guaranteed to have the
> same family here (or if it even matters). Especially in the context
> of the next patch which removes the bind_conflict callback... Does
> this need to be an OR check for the family of either socket? Or is it
> safe as-is because the first socket passed to this function is always
> the existing one and the second one is the possible conflict socket?
It's safe as is, all we care is that sk1 is the INET6 socket. In the
compare function we use inet6_rcv_saddr(sk2) which will return NULL if
it isn't INET6 and then the function handles that case appropriately.
This stuff is subtle so it's easy to get confused, I always made sure
to run it on our production boxes to make sure I didn't break something
;). Thanks,
Josef
^ permalink raw reply [flat|nested] 13+ messages in thread
* Re: [PATCH 2/6 net-next] inet: drop ->bind_conflict
2017-01-11 20:22 ` [PATCH 2/6 net-next] inet: drop ->bind_conflict Josef Bacik
` (3 preceding siblings ...)
2017-01-11 20:22 ` [PATCH 6/6 net-next] inet: reset tb->fastreuseport when adding a reuseport sk Josef Bacik
@ 2017-01-12 19:56 ` David Miller
2017-01-12 21:04 ` Josef Bacik
4 siblings, 1 reply; 13+ messages in thread
From: David Miller @ 2017-01-12 19:56 UTC (permalink / raw)
To: jbacik; +Cc: hannes, kraigatgoog, eric.dumazet, tom, netdev, kernel-team
From: Josef Bacik <jbacik@fb.com>
Date: Wed, 11 Jan 2017 15:22:40 -0500
> diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
> index 56d756e..dc07734 100644
> --- a/net/ipv4/tcp_ipv4.c
> +++ b/net/ipv4/tcp_ipv4.c
> @@ -63,6 +63,7 @@
> #include <linux/times.h>
> #include <linux/slab.h>
>
> +#include <net/addrconf.h>
> #include <net/net_namespace.h>
> #include <net/icmp.h>
> #include <net/inet_hashtables.h>
I don't see what this has to do with this change.
^ permalink raw reply [flat|nested] 13+ messages in thread
* Re: [PATCH 2/6 net-next] inet: drop ->bind_conflict
2017-01-12 19:56 ` [PATCH 2/6 net-next] inet: drop ->bind_conflict David Miller
@ 2017-01-12 21:04 ` Josef Bacik
0 siblings, 0 replies; 13+ messages in thread
From: Josef Bacik @ 2017-01-12 21:04 UTC (permalink / raw)
To: David Miller; +Cc: hannes, kraigatgoog, eric.dumazet, tom, netdev, kernel-team
On Thu, Jan 12, 2017 at 2:56 PM, David Miller <davem@davemloft.net>
wrote:
> From: Josef Bacik <jbacik@fb.com>
> Date: Wed, 11 Jan 2017 15:22:40 -0500
>
>> diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
>> index 56d756e..dc07734 100644
>> --- a/net/ipv4/tcp_ipv4.c
>> +++ b/net/ipv4/tcp_ipv4.c
>> @@ -63,6 +63,7 @@
>> #include <linux/times.h>
>> #include <linux/slab.h>
>>
>> +#include <net/addrconf.h>
>> #include <net/net_namespace.h>
>> #include <net/icmp.h>
>> #include <net/inet_hashtables.h>
>
> I don't see what this has to do with this change.
Ugh sorry, that's a left over from when I had the protocol specific
callback for the saddr_equal stuff, I'll fix that up. Thanks,
Josef
^ permalink raw reply [flat|nested] 13+ messages in thread
* [PATCH 1/6 net-next] inet: collapse ipv4/v6 rcv_saddr_equal functions into one
2017-01-17 15:51 [PATCH 0/6 net-next][V4] Rework inet_csk_get_port Josef Bacik
@ 2017-01-17 15:51 ` Josef Bacik
0 siblings, 0 replies; 13+ messages in thread
From: Josef Bacik @ 2017-01-17 15:51 UTC (permalink / raw)
To: davem, hannes, kraigatgoog, eric.dumazet, tom, netdev,
kernel-team
We pass these per-protocol equal functions around in various places, but
we can just have one function that checks the sk->sk_family and then do
the right comparison function. I've also changed the ipv4 version to
not cast to inet_sock since it is unneeded.
Signed-off-by: Josef Bacik <jbacik@fb.com>
---
include/net/addrconf.h | 4 +--
include/net/inet_hashtables.h | 5 +--
include/net/udp.h | 1 -
net/ipv4/inet_connection_sock.c | 72 ++++++++++++++++++++++++++++++++++++++++
net/ipv4/inet_hashtables.c | 16 +++------
net/ipv4/udp.c | 58 +++++++-------------------------
net/ipv6/inet6_connection_sock.c | 4 +--
net/ipv6/inet6_hashtables.c | 46 +------------------------
net/ipv6/udp.c | 2 +-
9 files changed, 95 insertions(+), 113 deletions(-)
diff --git a/include/net/addrconf.h b/include/net/addrconf.h
index 8f998af..17c6fd8 100644
--- a/include/net/addrconf.h
+++ b/include/net/addrconf.h
@@ -88,9 +88,7 @@ int __ipv6_get_lladdr(struct inet6_dev *idev, struct in6_addr *addr,
u32 banned_flags);
int ipv6_get_lladdr(struct net_device *dev, struct in6_addr *addr,
u32 banned_flags);
-int ipv4_rcv_saddr_equal(const struct sock *sk, const struct sock *sk2,
- bool match_wildcard);
-int ipv6_rcv_saddr_equal(const struct sock *sk, const struct sock *sk2,
+int inet_rcv_saddr_equal(const struct sock *sk, const struct sock *sk2,
bool match_wildcard);
void addrconf_join_solict(struct net_device *dev, const struct in6_addr *addr);
void addrconf_leave_solict(struct inet6_dev *idev, const struct in6_addr *addr);
diff --git a/include/net/inet_hashtables.h b/include/net/inet_hashtables.h
index 0574493..756ed16 100644
--- a/include/net/inet_hashtables.h
+++ b/include/net/inet_hashtables.h
@@ -203,10 +203,7 @@ void inet_hashinfo_init(struct inet_hashinfo *h);
bool inet_ehash_insert(struct sock *sk, struct sock *osk);
bool inet_ehash_nolisten(struct sock *sk, struct sock *osk);
-int __inet_hash(struct sock *sk, struct sock *osk,
- int (*saddr_same)(const struct sock *sk1,
- const struct sock *sk2,
- bool match_wildcard));
+int __inet_hash(struct sock *sk, struct sock *osk);
int inet_hash(struct sock *sk);
void inet_unhash(struct sock *sk);
diff --git a/include/net/udp.h b/include/net/udp.h
index 1661791..c9d8b8e 100644
--- a/include/net/udp.h
+++ b/include/net/udp.h
@@ -204,7 +204,6 @@ static inline void udp_lib_close(struct sock *sk, long timeout)
}
int udp_lib_get_port(struct sock *sk, unsigned short snum,
- int (*)(const struct sock *, const struct sock *, bool),
unsigned int hash2_nulladdr);
u32 udp_flow_hashrnd(void);
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index 19ea045..ba597cb 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -31,6 +31,78 @@ const char inet_csk_timer_bug_msg[] = "inet_csk BUG: unknown timer value\n";
EXPORT_SYMBOL(inet_csk_timer_bug_msg);
#endif
+#if IS_ENABLED(CONFIG_IPV6)
+/* match_wildcard == true: IPV6_ADDR_ANY equals to any IPv6 addresses if IPv6
+ * only, and any IPv4 addresses if not IPv6 only
+ * match_wildcard == false: addresses must be exactly the same, i.e.
+ * IPV6_ADDR_ANY only equals to IPV6_ADDR_ANY,
+ * and 0.0.0.0 equals to 0.0.0.0 only
+ */
+static int ipv6_rcv_saddr_equal(const struct sock *sk, const struct sock *sk2,
+ bool match_wildcard)
+{
+ const struct in6_addr *sk2_rcv_saddr6 = inet6_rcv_saddr(sk2);
+ int sk2_ipv6only = inet_v6_ipv6only(sk2);
+ int addr_type = ipv6_addr_type(&sk->sk_v6_rcv_saddr);
+ int addr_type2 = sk2_rcv_saddr6 ? ipv6_addr_type(sk2_rcv_saddr6) : IPV6_ADDR_MAPPED;
+
+ /* if both are mapped, treat as IPv4 */
+ if (addr_type == IPV6_ADDR_MAPPED && addr_type2 == IPV6_ADDR_MAPPED) {
+ if (!sk2_ipv6only) {
+ if (sk->sk_rcv_saddr == sk2->sk_rcv_saddr)
+ return 1;
+ if (!sk->sk_rcv_saddr || !sk2->sk_rcv_saddr)
+ return match_wildcard;
+ }
+ return 0;
+ }
+
+ if (addr_type == IPV6_ADDR_ANY && addr_type2 == IPV6_ADDR_ANY)
+ return 1;
+
+ if (addr_type2 == IPV6_ADDR_ANY && match_wildcard &&
+ !(sk2_ipv6only && addr_type == IPV6_ADDR_MAPPED))
+ return 1;
+
+ if (addr_type == IPV6_ADDR_ANY && match_wildcard &&
+ !(ipv6_only_sock(sk) && addr_type2 == IPV6_ADDR_MAPPED))
+ return 1;
+
+ if (sk2_rcv_saddr6 &&
+ ipv6_addr_equal(&sk->sk_v6_rcv_saddr, sk2_rcv_saddr6))
+ return 1;
+
+ return 0;
+}
+#endif
+
+/* match_wildcard == true: 0.0.0.0 equals to any IPv4 addresses
+ * match_wildcard == false: addresses must be exactly the same, i.e.
+ * 0.0.0.0 only equals to 0.0.0.0
+ */
+static int ipv4_rcv_saddr_equal(const struct sock *sk, const struct sock *sk2,
+ bool match_wildcard)
+{
+ if (!ipv6_only_sock(sk2)) {
+ if (sk->sk_rcv_saddr == sk2->sk_rcv_saddr)
+ return 1;
+ if (!sk->sk_rcv_saddr || !sk2->sk_rcv_saddr)
+ return match_wildcard;
+ }
+ return 0;
+}
+
+int inet_rcv_saddr_equal(const struct sock *sk, const struct sock *sk2,
+ bool match_wildcard)
+{
+#if IS_ENABLED(CONFIG_IPV6)
+ if (sk->sk_family == AF_INET6)
+ return ipv6_rcv_saddr_equal(sk, sk2, match_wildcard);
+#endif
+ return ipv4_rcv_saddr_equal(sk, sk2, match_wildcard);
+}
+EXPORT_SYMBOL(inet_rcv_saddr_equal);
+
void inet_get_local_port_range(struct net *net, int *low, int *high)
{
unsigned int seq;
diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c
index ca97835..2ef9b01 100644
--- a/net/ipv4/inet_hashtables.c
+++ b/net/ipv4/inet_hashtables.c
@@ -435,10 +435,7 @@ bool inet_ehash_nolisten(struct sock *sk, struct sock *osk)
EXPORT_SYMBOL_GPL(inet_ehash_nolisten);
static int inet_reuseport_add_sock(struct sock *sk,
- struct inet_listen_hashbucket *ilb,
- int (*saddr_same)(const struct sock *sk1,
- const struct sock *sk2,
- bool match_wildcard))
+ struct inet_listen_hashbucket *ilb)
{
struct inet_bind_bucket *tb = inet_csk(sk)->icsk_bind_hash;
struct sock *sk2;
@@ -451,7 +448,7 @@ static int inet_reuseport_add_sock(struct sock *sk,
sk2->sk_bound_dev_if == sk->sk_bound_dev_if &&
inet_csk(sk2)->icsk_bind_hash == tb &&
sk2->sk_reuseport && uid_eq(uid, sock_i_uid(sk2)) &&
- saddr_same(sk, sk2, false))
+ inet_rcv_saddr_equal(sk, sk2, false))
return reuseport_add_sock(sk, sk2);
}
@@ -461,10 +458,7 @@ static int inet_reuseport_add_sock(struct sock *sk,
return 0;
}
-int __inet_hash(struct sock *sk, struct sock *osk,
- int (*saddr_same)(const struct sock *sk1,
- const struct sock *sk2,
- bool match_wildcard))
+int __inet_hash(struct sock *sk, struct sock *osk)
{
struct inet_hashinfo *hashinfo = sk->sk_prot->h.hashinfo;
struct inet_listen_hashbucket *ilb;
@@ -479,7 +473,7 @@ int __inet_hash(struct sock *sk, struct sock *osk,
spin_lock(&ilb->lock);
if (sk->sk_reuseport) {
- err = inet_reuseport_add_sock(sk, ilb, saddr_same);
+ err = inet_reuseport_add_sock(sk, ilb);
if (err)
goto unlock;
}
@@ -503,7 +497,7 @@ int inet_hash(struct sock *sk)
if (sk->sk_state != TCP_CLOSE) {
local_bh_disable();
- err = __inet_hash(sk, NULL, ipv4_rcv_saddr_equal);
+ err = __inet_hash(sk, NULL);
local_bh_enable();
}
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 4318d72..d6dddcf 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -137,11 +137,7 @@ EXPORT_SYMBOL(udp_memory_allocated);
static int udp_lib_lport_inuse(struct net *net, __u16 num,
const struct udp_hslot *hslot,
unsigned long *bitmap,
- struct sock *sk,
- int (*saddr_comp)(const struct sock *sk1,
- const struct sock *sk2,
- bool match_wildcard),
- unsigned int log)
+ struct sock *sk, unsigned int log)
{
struct sock *sk2;
kuid_t uid = sock_i_uid(sk);
@@ -153,7 +149,7 @@ static int udp_lib_lport_inuse(struct net *net, __u16 num,
(!sk2->sk_reuse || !sk->sk_reuse) &&
(!sk2->sk_bound_dev_if || !sk->sk_bound_dev_if ||
sk2->sk_bound_dev_if == sk->sk_bound_dev_if) &&
- saddr_comp(sk, sk2, true)) {
+ inet_rcv_saddr_equal(sk, sk2, true)) {
if (sk2->sk_reuseport && sk->sk_reuseport &&
!rcu_access_pointer(sk->sk_reuseport_cb) &&
uid_eq(uid, sock_i_uid(sk2))) {
@@ -176,10 +172,7 @@ static int udp_lib_lport_inuse(struct net *net, __u16 num,
*/
static int udp_lib_lport_inuse2(struct net *net, __u16 num,
struct udp_hslot *hslot2,
- struct sock *sk,
- int (*saddr_comp)(const struct sock *sk1,
- const struct sock *sk2,
- bool match_wildcard))
+ struct sock *sk)
{
struct sock *sk2;
kuid_t uid = sock_i_uid(sk);
@@ -193,7 +186,7 @@ static int udp_lib_lport_inuse2(struct net *net, __u16 num,
(!sk2->sk_reuse || !sk->sk_reuse) &&
(!sk2->sk_bound_dev_if || !sk->sk_bound_dev_if ||
sk2->sk_bound_dev_if == sk->sk_bound_dev_if) &&
- saddr_comp(sk, sk2, true)) {
+ inet_rcv_saddr_equal(sk, sk2, true)) {
if (sk2->sk_reuseport && sk->sk_reuseport &&
!rcu_access_pointer(sk->sk_reuseport_cb) &&
uid_eq(uid, sock_i_uid(sk2))) {
@@ -208,10 +201,7 @@ static int udp_lib_lport_inuse2(struct net *net, __u16 num,
return res;
}
-static int udp_reuseport_add_sock(struct sock *sk, struct udp_hslot *hslot,
- int (*saddr_same)(const struct sock *sk1,
- const struct sock *sk2,
- bool match_wildcard))
+static int udp_reuseport_add_sock(struct sock *sk, struct udp_hslot *hslot)
{
struct net *net = sock_net(sk);
kuid_t uid = sock_i_uid(sk);
@@ -225,7 +215,7 @@ static int udp_reuseport_add_sock(struct sock *sk, struct udp_hslot *hslot,
(udp_sk(sk2)->udp_port_hash == udp_sk(sk)->udp_port_hash) &&
(sk2->sk_bound_dev_if == sk->sk_bound_dev_if) &&
sk2->sk_reuseport && uid_eq(uid, sock_i_uid(sk2)) &&
- (*saddr_same)(sk, sk2, false)) {
+ inet_rcv_saddr_equal(sk, sk2, false)) {
return reuseport_add_sock(sk, sk2);
}
}
@@ -241,14 +231,10 @@ static int udp_reuseport_add_sock(struct sock *sk, struct udp_hslot *hslot,
*
* @sk: socket struct in question
* @snum: port number to look up
- * @saddr_comp: AF-dependent comparison of bound local IP addresses
* @hash2_nulladdr: AF-dependent hash value in secondary hash chains,
* with NULL address
*/
int udp_lib_get_port(struct sock *sk, unsigned short snum,
- int (*saddr_comp)(const struct sock *sk1,
- const struct sock *sk2,
- bool match_wildcard),
unsigned int hash2_nulladdr)
{
struct udp_hslot *hslot, *hslot2;
@@ -277,7 +263,7 @@ int udp_lib_get_port(struct sock *sk, unsigned short snum,
bitmap_zero(bitmap, PORTS_PER_CHAIN);
spin_lock_bh(&hslot->lock);
udp_lib_lport_inuse(net, snum, hslot, bitmap, sk,
- saddr_comp, udptable->log);
+ udptable->log);
snum = first;
/*
@@ -310,12 +296,11 @@ int udp_lib_get_port(struct sock *sk, unsigned short snum,
if (hslot->count < hslot2->count)
goto scan_primary_hash;
- exist = udp_lib_lport_inuse2(net, snum, hslot2,
- sk, saddr_comp);
+ exist = udp_lib_lport_inuse2(net, snum, hslot2, sk);
if (!exist && (hash2_nulladdr != slot2)) {
hslot2 = udp_hashslot2(udptable, hash2_nulladdr);
exist = udp_lib_lport_inuse2(net, snum, hslot2,
- sk, saddr_comp);
+ sk);
}
if (exist)
goto fail_unlock;
@@ -323,8 +308,7 @@ int udp_lib_get_port(struct sock *sk, unsigned short snum,
goto found;
}
scan_primary_hash:
- if (udp_lib_lport_inuse(net, snum, hslot, NULL, sk,
- saddr_comp, 0))
+ if (udp_lib_lport_inuse(net, snum, hslot, NULL, sk, 0))
goto fail_unlock;
}
found:
@@ -333,7 +317,7 @@ int udp_lib_get_port(struct sock *sk, unsigned short snum,
udp_sk(sk)->udp_portaddr_hash ^= snum;
if (sk_unhashed(sk)) {
if (sk->sk_reuseport &&
- udp_reuseport_add_sock(sk, hslot, saddr_comp)) {
+ udp_reuseport_add_sock(sk, hslot)) {
inet_sk(sk)->inet_num = 0;
udp_sk(sk)->udp_port_hash = 0;
udp_sk(sk)->udp_portaddr_hash ^= snum;
@@ -365,24 +349,6 @@ int udp_lib_get_port(struct sock *sk, unsigned short snum,
}
EXPORT_SYMBOL(udp_lib_get_port);
-/* match_wildcard == true: 0.0.0.0 equals to any IPv4 addresses
- * match_wildcard == false: addresses must be exactly the same, i.e.
- * 0.0.0.0 only equals to 0.0.0.0
- */
-int ipv4_rcv_saddr_equal(const struct sock *sk1, const struct sock *sk2,
- bool match_wildcard)
-{
- struct inet_sock *inet1 = inet_sk(sk1), *inet2 = inet_sk(sk2);
-
- if (!ipv6_only_sock(sk2)) {
- if (inet1->inet_rcv_saddr == inet2->inet_rcv_saddr)
- return 1;
- if (!inet1->inet_rcv_saddr || !inet2->inet_rcv_saddr)
- return match_wildcard;
- }
- return 0;
-}
-
static u32 udp4_portaddr_hash(const struct net *net, __be32 saddr,
unsigned int port)
{
@@ -398,7 +364,7 @@ int udp_v4_get_port(struct sock *sk, unsigned short snum)
/* precompute partial secondary hash */
udp_sk(sk)->udp_portaddr_hash = hash2_partial;
- return udp_lib_get_port(sk, snum, ipv4_rcv_saddr_equal, hash2_nulladdr);
+ return udp_lib_get_port(sk, snum, hash2_nulladdr);
}
static int compute_score(struct sock *sk, struct net *net,
diff --git a/net/ipv6/inet6_connection_sock.c b/net/ipv6/inet6_connection_sock.c
index 7396e75..55ee2ea 100644
--- a/net/ipv6/inet6_connection_sock.c
+++ b/net/ipv6/inet6_connection_sock.c
@@ -54,12 +54,12 @@ int inet6_csk_bind_conflict(const struct sock *sk,
(sk2->sk_state != TCP_TIME_WAIT &&
!uid_eq(uid,
sock_i_uid((struct sock *)sk2))))) {
- if (ipv6_rcv_saddr_equal(sk, sk2, true))
+ if (inet_rcv_saddr_equal(sk, sk2, true))
break;
}
if (!relax && reuse && sk2->sk_reuse &&
sk2->sk_state != TCP_LISTEN &&
- ipv6_rcv_saddr_equal(sk, sk2, true))
+ inet_rcv_saddr_equal(sk, sk2, true))
break;
}
}
diff --git a/net/ipv6/inet6_hashtables.c b/net/ipv6/inet6_hashtables.c
index 02761c9..d090091 100644
--- a/net/ipv6/inet6_hashtables.c
+++ b/net/ipv6/inet6_hashtables.c
@@ -268,54 +268,10 @@ int inet6_hash(struct sock *sk)
if (sk->sk_state != TCP_CLOSE) {
local_bh_disable();
- err = __inet_hash(sk, NULL, ipv6_rcv_saddr_equal);
+ err = __inet_hash(sk, NULL);
local_bh_enable();
}
return err;
}
EXPORT_SYMBOL_GPL(inet6_hash);
-
-/* match_wildcard == true: IPV6_ADDR_ANY equals to any IPv6 addresses if IPv6
- * only, and any IPv4 addresses if not IPv6 only
- * match_wildcard == false: addresses must be exactly the same, i.e.
- * IPV6_ADDR_ANY only equals to IPV6_ADDR_ANY,
- * and 0.0.0.0 equals to 0.0.0.0 only
- */
-int ipv6_rcv_saddr_equal(const struct sock *sk, const struct sock *sk2,
- bool match_wildcard)
-{
- const struct in6_addr *sk2_rcv_saddr6 = inet6_rcv_saddr(sk2);
- int sk2_ipv6only = inet_v6_ipv6only(sk2);
- int addr_type = ipv6_addr_type(&sk->sk_v6_rcv_saddr);
- int addr_type2 = sk2_rcv_saddr6 ? ipv6_addr_type(sk2_rcv_saddr6) : IPV6_ADDR_MAPPED;
-
- /* if both are mapped, treat as IPv4 */
- if (addr_type == IPV6_ADDR_MAPPED && addr_type2 == IPV6_ADDR_MAPPED) {
- if (!sk2_ipv6only) {
- if (sk->sk_rcv_saddr == sk2->sk_rcv_saddr)
- return 1;
- if (!sk->sk_rcv_saddr || !sk2->sk_rcv_saddr)
- return match_wildcard;
- }
- return 0;
- }
-
- if (addr_type == IPV6_ADDR_ANY && addr_type2 == IPV6_ADDR_ANY)
- return 1;
-
- if (addr_type2 == IPV6_ADDR_ANY && match_wildcard &&
- !(sk2_ipv6only && addr_type == IPV6_ADDR_MAPPED))
- return 1;
-
- if (addr_type == IPV6_ADDR_ANY && match_wildcard &&
- !(ipv6_only_sock(sk) && addr_type2 == IPV6_ADDR_MAPPED))
- return 1;
-
- if (sk2_rcv_saddr6 &&
- ipv6_addr_equal(&sk->sk_v6_rcv_saddr, sk2_rcv_saddr6))
- return 1;
-
- return 0;
-}
-EXPORT_SYMBOL_GPL(ipv6_rcv_saddr_equal);
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index 4d5c4ee..05d6932 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -103,7 +103,7 @@ int udp_v6_get_port(struct sock *sk, unsigned short snum)
/* precompute partial secondary hash */
udp_sk(sk)->udp_portaddr_hash = hash2_partial;
- return udp_lib_get_port(sk, snum, ipv6_rcv_saddr_equal, hash2_nulladdr);
+ return udp_lib_get_port(sk, snum, hash2_nulladdr);
}
static void udp_v6_rehash(struct sock *sk)
--
2.9.3
^ permalink raw reply related [flat|nested] 13+ messages in thread
end of thread, other threads:[~2017-01-17 15:51 UTC | newest]
Thread overview: 13+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2017-01-11 20:06 [PATCH 0/6 net-next][V3] Rework inet_csk_get_port Josef Bacik
2017-01-11 20:19 ` [PATCH 1/6 net-next] inet: collapse ipv4/v6 rcv_saddr_equal functions into one Josef Bacik
2017-01-11 20:22 ` [PATCH 2/6 net-next] inet: drop ->bind_conflict Josef Bacik
2017-01-11 20:22 ` [PATCH 3/6 net-next] inet: kill smallest_size and smallest_port Josef Bacik
2017-01-11 20:22 ` [PATCH 4/6 net-next] inet: don't check for bind conflicts twice when searching for a port Josef Bacik
2017-01-11 20:22 ` [PATCH 5/6 net-next] inet: split inet_csk_get_port into two functions Josef Bacik
2017-01-11 20:22 ` [PATCH 6/6 net-next] inet: reset tb->fastreuseport when adding a reuseport sk Josef Bacik
2017-01-12 19:56 ` [PATCH 2/6 net-next] inet: drop ->bind_conflict David Miller
2017-01-12 21:04 ` Josef Bacik
-- strict thread matches above, loose matches on Subject: below --
2017-01-17 15:51 [PATCH 0/6 net-next][V4] Rework inet_csk_get_port Josef Bacik
2017-01-17 15:51 ` [PATCH 1/6 net-next] inet: collapse ipv4/v6 rcv_saddr_equal functions into one Josef Bacik
2017-01-12 17:41 Craig Gallek
2017-01-12 18:13 ` Josef Bacik
2016-12-22 21:26 [PATCH 0/6 net-next][V2] Rework inet_csk_get_port Josef Bacik
2016-12-22 21:26 ` [PATCH 1/6 net-next] inet: collapse ipv4/v6 rcv_saddr_equal functions into one Josef Bacik
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).