* [RFC PATCH 01/10] net: Add sk_lookup struct and helper
2017-07-25 15:38 [RFC PATCH 00/10] net: l3mdev: Support for sockets bound to enslaved device David Ahern
@ 2017-07-25 15:38 ` David Ahern
2017-07-25 15:38 ` [RFC PATCH 02/10] net: ipv4: Convert udp socket lookups to new struct David Ahern
` (8 subsequent siblings)
9 siblings, 0 replies; 11+ messages in thread
From: David Ahern @ 2017-07-25 15:38 UTC (permalink / raw)
To: netdev; +Cc: David Ahern
Consolidate the socket lookup args into a struct.
Add helper that compares sk_bound_dev_if for a socket to the lookup
parameters.
Signed-off-by: David Ahern <dsahern@gmail.com>
---
include/net/sock.h | 38 ++++++++++++++++++++++++++++++++++++++
1 file changed, 38 insertions(+)
diff --git a/include/net/sock.h b/include/net/sock.h
index 7c0632c7e870..a2db5fd30192 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -491,6 +491,44 @@ enum sk_pacing {
#define rcu_dereference_sk_user_data(sk) rcu_dereference(__sk_user_data((sk)))
#define rcu_assign_sk_user_data(sk, ptr) rcu_assign_pointer(__sk_user_data((sk)), ptr)
+/* used for socket lookups */
+struct sk_lookup {
+ union {
+ const struct in6_addr *ipv6;
+ __be32 ipv4;
+ } saddr;
+ union {
+ const struct in6_addr *ipv6;
+ __be32 ipv4;
+ } daddr;
+
+ __be16 sport;
+ __be16 dport;
+ unsigned short hnum;
+
+ int dif;
+ bool exact_dif;
+};
+
+/* Compare sk_bound_dev_if to socket lookup dif
+ * Returns:
+ * -1 exact dif required and not met
+ * 0 sk_bound_dev_if is either not set or does not match
+ * 1 sk_bound_dev_if is set and matches dif
+ */
+static inline int sk_lookup_device_cmp(const struct sock *sk,
+ const struct sk_lookup *params)
+{
+ /* exact_dif true == l3mdev case */
+ if (params->exact_dif && sk->sk_bound_dev_if != params->dif)
+ return -1;
+
+ if (sk->sk_bound_dev_if && sk->sk_bound_dev_if == params->dif)
+ return 1;
+
+ return 0;
+}
+
/*
* SK_CAN_REUSE and SK_NO_REUSE on a socket mean that the socket is OK
* or not whether his port will be reused by someone else. SK_FORCE_REUSE
--
2.1.4
^ permalink raw reply related [flat|nested] 11+ messages in thread* [RFC PATCH 02/10] net: ipv4: Convert udp socket lookups to new struct
2017-07-25 15:38 [RFC PATCH 00/10] net: l3mdev: Support for sockets bound to enslaved device David Ahern
2017-07-25 15:38 ` [RFC PATCH 01/10] net: Add sk_lookup struct and helper David Ahern
@ 2017-07-25 15:38 ` David Ahern
2017-07-25 15:38 ` [RFC PATCH 03/10] net: ipv4: Convert inet " David Ahern
` (7 subsequent siblings)
9 siblings, 0 replies; 11+ messages in thread
From: David Ahern @ 2017-07-25 15:38 UTC (permalink / raw)
To: netdev; +Cc: David Ahern
Convert udp4_lib_lookup and __udp4_lib_lookup to use the new sk_lookup
struct.
Signed-off-by: David Ahern <dsahern@gmail.com>
---
include/net/udp.h | 6 +-
net/ipv4/netfilter/nf_socket_ipv4.c | 11 ++-
net/ipv4/udp.c | 170 +++++++++++++++++++-----------------
net/ipv4/udp_diag.c | 51 +++++++----
net/netfilter/xt_TPROXY.c | 11 ++-
5 files changed, 144 insertions(+), 105 deletions(-)
diff --git a/include/net/udp.h b/include/net/udp.h
index 972ce4baab6b..5e0ff095dc6d 100644
--- a/include/net/udp.h
+++ b/include/net/udp.h
@@ -283,10 +283,8 @@ int udp_lib_getsockopt(struct sock *sk, int level, int optname,
int udp_lib_setsockopt(struct sock *sk, int level, int optname,
char __user *optval, unsigned int optlen,
int (*push_pending_frames)(struct sock *));
-struct sock *udp4_lib_lookup(struct net *net, __be32 saddr, __be16 sport,
- __be32 daddr, __be16 dport, int dif);
-struct sock *__udp4_lib_lookup(struct net *net, __be32 saddr, __be16 sport,
- __be32 daddr, __be16 dport, int dif,
+struct sock *udp4_lib_lookup(struct net *net, struct sk_lookup *params);
+struct sock *__udp4_lib_lookup(struct net *net, struct sk_lookup *params,
struct udp_table *tbl, struct sk_buff *skb);
struct sock *udp4_lib_lookup_skb(struct sk_buff *skb,
__be16 sport, __be16 dport);
diff --git a/net/ipv4/netfilter/nf_socket_ipv4.c b/net/ipv4/netfilter/nf_socket_ipv4.c
index e9293bdebba0..121767b36763 100644
--- a/net/ipv4/netfilter/nf_socket_ipv4.c
+++ b/net/ipv4/netfilter/nf_socket_ipv4.c
@@ -81,14 +81,21 @@ nf_socket_get_sock_v4(struct net *net, struct sk_buff *skb, const int doff,
const __be16 sport, const __be16 dport,
const struct net_device *in)
{
+ struct sk_lookup params = {
+ .saddr.ipv4 = saddr,
+ .daddr.ipv4 = daddr,
+ .sport = sport,
+ .dport = dport,
+ .dif = in->ifindex,
+ };
+
switch (protocol) {
case IPPROTO_TCP:
return inet_lookup(net, &tcp_hashinfo, skb, doff,
saddr, sport, daddr, dport,
in->ifindex);
case IPPROTO_UDP:
- return udp4_lib_lookup(net, saddr, sport, daddr, dport,
- in->ifindex);
+ return udp4_lib_lookup(net, ¶ms);
}
return NULL;
}
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index b057653ceca9..132a8f070d16 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -379,15 +379,13 @@ int udp_v4_get_port(struct sock *sk, unsigned short snum)
}
static int compute_score(struct sock *sk, struct net *net,
- __be32 saddr, __be16 sport,
- __be32 daddr, unsigned short hnum, int dif,
- bool exact_dif)
+ const struct sk_lookup *params)
{
- int score;
struct inet_sock *inet;
+ int score, rc;
if (!net_eq(sock_net(sk), net) ||
- udp_sk(sk)->udp_port_hash != hnum ||
+ udp_sk(sk)->udp_port_hash != params->hnum ||
ipv6_only_sock(sk))
return -1;
@@ -395,28 +393,28 @@ static int compute_score(struct sock *sk, struct net *net,
inet = inet_sk(sk);
if (inet->inet_rcv_saddr) {
- if (inet->inet_rcv_saddr != daddr)
+ if (inet->inet_rcv_saddr != params->daddr.ipv4)
return -1;
score += 4;
}
if (inet->inet_daddr) {
- if (inet->inet_daddr != saddr)
+ if (inet->inet_daddr != params->saddr.ipv4)
return -1;
score += 4;
}
if (inet->inet_dport) {
- if (inet->inet_dport != sport)
+ if (inet->inet_dport != params->sport)
return -1;
score += 4;
}
- if (sk->sk_bound_dev_if || exact_dif) {
- if (sk->sk_bound_dev_if != dif)
- return -1;
+ rc = sk_lookup_device_cmp(sk, params);
+ if (rc < 0)
+ return -1;
+ if (rc > 0)
score += 4;
- }
if (sk->sk_incoming_cpu == raw_smp_processor_id())
score++;
return score;
@@ -436,10 +434,9 @@ static u32 udp_ehashfn(const struct net *net, const __be32 laddr,
/* called with rcu_read_lock() */
static struct sock *udp4_lib_lookup2(struct net *net,
- __be32 saddr, __be16 sport,
- __be32 daddr, unsigned int hnum, int dif, bool exact_dif,
- struct udp_hslot *hslot2,
- struct sk_buff *skb)
+ const struct sk_lookup *params,
+ struct udp_hslot *hslot2,
+ struct sk_buff *skb)
{
struct sock *sk, *result;
int score, badness, matches = 0, reuseport = 0;
@@ -448,13 +445,14 @@ static struct sock *udp4_lib_lookup2(struct net *net,
result = NULL;
badness = 0;
udp_portaddr_for_each_entry_rcu(sk, &hslot2->head) {
- score = compute_score(sk, net, saddr, sport,
- daddr, hnum, dif, exact_dif);
+ score = compute_score(sk, net, params);
if (score > badness) {
reuseport = sk->sk_reuseport;
if (reuseport) {
- hash = udp_ehashfn(net, daddr, hnum,
- saddr, sport);
+ hash = udp_ehashfn(net, params->daddr.ipv4,
+ params->hnum,
+ params->saddr.ipv4,
+ params->sport);
result = reuseport_select_sock(sk, hash, skb,
sizeof(struct udphdr));
if (result)
@@ -476,28 +474,27 @@ static struct sock *udp4_lib_lookup2(struct net *net,
/* UDP is nearly always wildcards out the wazoo, it makes no sense to try
* harder than this. -DaveM
*/
-struct sock *__udp4_lib_lookup(struct net *net, __be32 saddr,
- __be16 sport, __be32 daddr, __be16 dport,
- int dif, struct udp_table *udptable, struct sk_buff *skb)
+struct sock *__udp4_lib_lookup(struct net *net, struct sk_lookup *params,
+ struct udp_table *udptable, struct sk_buff *skb)
{
struct sock *sk, *result;
- unsigned short hnum = ntohs(dport);
+ unsigned short hnum = ntohs(params->dport);
unsigned int hash2, slot2, slot = udp_hashfn(net, hnum, udptable->mask);
struct udp_hslot *hslot2, *hslot = &udptable->hash[slot];
- bool exact_dif = udp_lib_exact_dif_match(net, skb);
int score, badness, matches = 0, reuseport = 0;
u32 hash = 0;
+ params->hnum = hnum;
+ params->exact_dif = udp_lib_exact_dif_match(net, skb);
+
if (hslot->count > 10) {
- hash2 = udp4_portaddr_hash(net, daddr, hnum);
+ hash2 = udp4_portaddr_hash(net, params->daddr.ipv4, hnum);
slot2 = hash2 & udptable->mask;
hslot2 = &udptable->hash2[slot2];
if (hslot->count < hslot2->count)
goto begin;
- result = udp4_lib_lookup2(net, saddr, sport,
- daddr, hnum, dif,
- exact_dif, hslot2, skb);
+ result = udp4_lib_lookup2(net, params, hslot2, skb);
if (!result) {
unsigned int old_slot2 = slot2;
hash2 = udp4_portaddr_hash(net, htonl(INADDR_ANY), hnum);
@@ -510,9 +507,7 @@ struct sock *__udp4_lib_lookup(struct net *net, __be32 saddr,
if (hslot->count < hslot2->count)
goto begin;
- result = udp4_lib_lookup2(net, saddr, sport,
- daddr, hnum, dif,
- exact_dif, hslot2, skb);
+ result = udp4_lib_lookup2(net, params, hslot2, skb);
}
return result;
}
@@ -520,13 +515,14 @@ struct sock *__udp4_lib_lookup(struct net *net, __be32 saddr,
result = NULL;
badness = 0;
sk_for_each_rcu(sk, &hslot->head) {
- score = compute_score(sk, net, saddr, sport,
- daddr, hnum, dif, exact_dif);
+ score = compute_score(sk, net, params);
if (score > badness) {
reuseport = sk->sk_reuseport;
if (reuseport) {
- hash = udp_ehashfn(net, daddr, hnum,
- saddr, sport);
+ hash = udp_ehashfn(net, params->daddr.ipv4,
+ params->hnum,
+ params->saddr.ipv4,
+ params->sport);
result = reuseport_select_sock(sk, hash, skb,
sizeof(struct udphdr));
if (result)
@@ -551,10 +547,16 @@ static inline struct sock *__udp4_lib_lookup_skb(struct sk_buff *skb,
struct udp_table *udptable)
{
const struct iphdr *iph = ip_hdr(skb);
+ struct net *net = dev_net(skb->dev);
+ struct sk_lookup params = {
+ .saddr.ipv4 = iph->saddr,
+ .daddr.ipv4 = iph->daddr,
+ .sport = sport,
+ .dport = dport,
+ .dif = inet_iif(skb),
+ };
- return __udp4_lib_lookup(dev_net(skb->dev), iph->saddr, sport,
- iph->daddr, dport, inet_iif(skb),
- udptable, skb);
+ return __udp4_lib_lookup(net, ¶ms, udptable, skb);
}
struct sock *udp4_lib_lookup_skb(struct sk_buff *skb,
@@ -570,13 +572,11 @@ EXPORT_SYMBOL_GPL(udp4_lib_lookup_skb);
#if IS_ENABLED(CONFIG_NETFILTER_XT_MATCH_SOCKET) || \
IS_ENABLED(CONFIG_NETFILTER_XT_TARGET_TPROXY) || \
IS_ENABLED(CONFIG_NF_SOCKET_IPV4)
-struct sock *udp4_lib_lookup(struct net *net, __be32 saddr, __be16 sport,
- __be32 daddr, __be16 dport, int dif)
+struct sock *udp4_lib_lookup(struct net *net, struct sk_lookup *params)
{
struct sock *sk;
- sk = __udp4_lib_lookup(net, saddr, sport, daddr, dport,
- dif, &udp_table, NULL);
+ sk = __udp4_lib_lookup(net, params, &udp_table, NULL);
if (sk && !refcount_inc_not_zero(&sk->sk_refcnt))
sk = NULL;
return sk;
@@ -585,21 +585,21 @@ EXPORT_SYMBOL_GPL(udp4_lib_lookup);
#endif
static inline bool __udp_is_mcast_sock(struct net *net, struct sock *sk,
- __be16 loc_port, __be32 loc_addr,
- __be16 rmt_port, __be32 rmt_addr,
- int dif, unsigned short hnum)
+ const struct sk_lookup *params)
{
struct inet_sock *inet = inet_sk(sk);
+ __be32 loc_addr = params->daddr.ipv4;
+ __be32 rmt_addr = params->saddr.ipv4;
if (!net_eq(sock_net(sk), net) ||
- udp_sk(sk)->udp_port_hash != hnum ||
+ udp_sk(sk)->udp_port_hash != params->hnum ||
(inet->inet_daddr && inet->inet_daddr != rmt_addr) ||
- (inet->inet_dport != rmt_port && inet->inet_dport) ||
+ (inet->inet_dport != params->sport && inet->inet_dport) ||
(inet->inet_rcv_saddr && inet->inet_rcv_saddr != loc_addr) ||
ipv6_only_sock(sk) ||
- (sk->sk_bound_dev_if && sk->sk_bound_dev_if != dif))
+ (sk->sk_bound_dev_if && sk->sk_bound_dev_if != params->dif))
return false;
- if (!ip_mc_sf_allow(sk, loc_addr, rmt_addr, dif))
+ if (!ip_mc_sf_allow(sk, loc_addr, rmt_addr, params->dif))
return false;
return true;
}
@@ -626,10 +626,15 @@ void __udp4_lib_err(struct sk_buff *skb, u32 info, struct udp_table *udptable)
int harderr;
int err;
struct net *net = dev_net(skb->dev);
-
- sk = __udp4_lib_lookup(net, iph->daddr, uh->dest,
- iph->saddr, uh->source, skb->dev->ifindex, udptable,
- NULL);
+ struct sk_lookup params = {
+ .saddr.ipv4 = iph->daddr,
+ .daddr.ipv4 = iph->saddr,
+ .sport = uh->dest,
+ .dport = uh->source,
+ .dif = skb->dev->ifindex,
+ };
+
+ sk = __udp4_lib_lookup(net, ¶ms, udptable, NULL);
if (!sk) {
__ICMP_INC_STATS(net, ICMP_MIB_INERRORS);
return; /* No socket for error */
@@ -1956,9 +1961,16 @@ static int __udp4_lib_mcast_deliver(struct net *net, struct sk_buff *skb,
struct udp_hslot *hslot = udp_hashslot(udptable, net, hnum);
unsigned int hash2 = 0, hash2_any = 0, use_hash2 = (hslot->count > 10);
unsigned int offset = offsetof(typeof(*sk), sk_node);
- int dif = skb->dev->ifindex;
struct hlist_node *node;
struct sk_buff *nskb;
+ struct sk_lookup params = {
+ .saddr.ipv4 = saddr,
+ .daddr.ipv4 = daddr,
+ .sport = uh->source,
+ .dport = uh->dest,
+ .hnum = hnum,
+ .dif = skb->dev->ifindex,
+ };
if (use_hash2) {
hash2_any = udp4_portaddr_hash(net, htonl(INADDR_ANY), hnum) &
@@ -1970,8 +1982,7 @@ static int __udp4_lib_mcast_deliver(struct net *net, struct sk_buff *skb,
}
sk_for_each_entry_offset_rcu(sk, node, &hslot->head, offset) {
- if (!__udp_is_mcast_sock(net, sk, uh->dest, daddr,
- uh->source, saddr, dif, hnum))
+ if (!__udp_is_mcast_sock(net, sk, ¶ms))
continue;
if (!first) {
@@ -2159,13 +2170,10 @@ int __udp4_lib_rcv(struct sk_buff *skb, struct udp_table *udptable,
* If more than one socket found returns NULL
*/
static struct sock *__udp4_lib_mcast_demux_lookup(struct net *net,
- __be16 loc_port, __be32 loc_addr,
- __be16 rmt_port, __be32 rmt_addr,
- int dif)
+ const struct sk_lookup *params)
{
struct sock *sk, *result;
- unsigned short hnum = ntohs(loc_port);
- unsigned int slot = udp_hashfn(net, hnum, udp_table.mask);
+ unsigned int slot = udp_hashfn(net, params->hnum, udp_table.mask);
struct udp_hslot *hslot = &udp_table.hash[slot];
/* Do not bother scanning a too big list */
@@ -2174,8 +2182,7 @@ static struct sock *__udp4_lib_mcast_demux_lookup(struct net *net,
result = NULL;
sk_for_each_rcu(sk, &hslot->head) {
- if (__udp_is_mcast_sock(net, sk, loc_port, loc_addr,
- rmt_port, rmt_addr, dif, hnum)) {
+ if (__udp_is_mcast_sock(net, sk, params)) {
if (result)
return NULL;
result = sk;
@@ -2190,21 +2197,20 @@ static struct sock *__udp4_lib_mcast_demux_lookup(struct net *net,
* if the first socket is an exact match and if not move on.
*/
static struct sock *__udp4_lib_demux_lookup(struct net *net,
- __be16 loc_port, __be32 loc_addr,
- __be16 rmt_port, __be32 rmt_addr,
- int dif)
+ const struct sk_lookup *params)
{
- unsigned short hnum = ntohs(loc_port);
- unsigned int hash2 = udp4_portaddr_hash(net, loc_addr, hnum);
+ unsigned int hash2 = udp4_portaddr_hash(net, params->daddr.ipv4,
+ params->hnum);
unsigned int slot2 = hash2 & udp_table.mask;
struct udp_hslot *hslot2 = &udp_table.hash2[slot2];
- INET_ADDR_COOKIE(acookie, rmt_addr, loc_addr);
- const __portpair ports = INET_COMBINED_PORTS(rmt_port, hnum);
+ INET_ADDR_COOKIE(acookie, params->saddr.ipv4, params->daddr.ipv4);
+ const __portpair ports = INET_COMBINED_PORTS(params->sport,
+ params->hnum);
struct sock *sk;
udp_portaddr_for_each_entry_rcu(sk, &hslot2->head) {
- if (INET_MATCH(sk, net, acookie, rmt_addr,
- loc_addr, ports, dif))
+ if (INET_MATCH(sk, net, acookie, params->saddr.ipv4,
+ params->daddr.ipv4, ports, params->dif))
return sk;
/* Only check first socket in chain */
break;
@@ -2215,11 +2221,13 @@ static struct sock *__udp4_lib_demux_lookup(struct net *net,
void udp_v4_early_demux(struct sk_buff *skb)
{
struct net *net = dev_net(skb->dev);
+ struct sk_lookup params = {
+ .dif = skb->dev->ifindex,
+ };
const struct iphdr *iph;
const struct udphdr *uh;
struct sock *sk = NULL;
struct dst_entry *dst;
- int dif = skb->dev->ifindex;
int ours;
/* validate the packet */
@@ -2228,6 +2236,11 @@ void udp_v4_early_demux(struct sk_buff *skb)
iph = ip_hdr(skb);
uh = udp_hdr(skb);
+ params.saddr.ipv4 = iph->saddr;
+ params.daddr.ipv4 = iph->daddr;
+ params.sport = uh->source;
+ params.dport = uh->dest;
+ params.hnum = ntohs(uh->dest);
if (skb->pkt_type == PACKET_BROADCAST ||
skb->pkt_type == PACKET_MULTICAST) {
@@ -2244,12 +2257,9 @@ void udp_v4_early_demux(struct sk_buff *skb)
return;
}
- sk = __udp4_lib_mcast_demux_lookup(net, uh->dest, iph->daddr,
- uh->source, iph->saddr, dif);
- } else if (skb->pkt_type == PACKET_HOST) {
- sk = __udp4_lib_demux_lookup(net, uh->dest, iph->daddr,
- uh->source, iph->saddr, dif);
- }
+ sk = __udp4_lib_mcast_demux_lookup(net, ¶ms);
+ } else if (skb->pkt_type == PACKET_HOST)
+ sk = __udp4_lib_demux_lookup(net, ¶ms);
if (!sk || !refcount_inc_not_zero(&sk->sk_refcnt))
return;
diff --git a/net/ipv4/udp_diag.c b/net/ipv4/udp_diag.c
index 4515836d2a3a..5e0640877536 100644
--- a/net/ipv4/udp_diag.c
+++ b/net/ipv4/udp_diag.c
@@ -41,11 +41,17 @@ static int udp_dump_one(struct udp_table *tbl, struct sk_buff *in_skb,
struct net *net = sock_net(in_skb->sk);
rcu_read_lock();
- if (req->sdiag_family == AF_INET)
- sk = __udp4_lib_lookup(net,
- req->id.idiag_src[0], req->id.idiag_sport,
- req->id.idiag_dst[0], req->id.idiag_dport,
- req->id.idiag_if, tbl, NULL);
+ if (req->sdiag_family == AF_INET) {
+ struct sk_lookup params = {
+ .saddr.ipv4 = req->id.idiag_src[0],
+ .daddr.ipv4 = req->id.idiag_dst[0],
+ .sport = req->id.idiag_sport,
+ .dport = req->id.idiag_dport,
+ .dif = req->id.idiag_if,
+ };
+
+ sk = __udp4_lib_lookup(net, ¶ms, tbl, NULL);
+ }
#if IS_ENABLED(CONFIG_IPV6)
else if (req->sdiag_family == AF_INET6)
sk = __udp6_lib_lookup(net,
@@ -178,27 +184,38 @@ static int __udp_diag_destroy(struct sk_buff *in_skb,
rcu_read_lock();
- if (req->sdiag_family == AF_INET)
- sk = __udp4_lib_lookup(net,
- req->id.idiag_dst[0], req->id.idiag_dport,
- req->id.idiag_src[0], req->id.idiag_sport,
- req->id.idiag_if, tbl, NULL);
+ if (req->sdiag_family == AF_INET) {
+ struct sk_lookup params = {
+ .saddr.ipv4 = req->id.idiag_dst[0],
+ .daddr.ipv4 = req->id.idiag_src[0],
+ .sport = req->id.idiag_dport,
+ .dport = req->id.idiag_sport,
+ .dif = req->id.idiag_if,
+ };
+
+ sk = __udp4_lib_lookup(net, ¶ms, tbl, NULL);
+ }
#if IS_ENABLED(CONFIG_IPV6)
else if (req->sdiag_family == AF_INET6) {
if (ipv6_addr_v4mapped((struct in6_addr *)req->id.idiag_dst) &&
- ipv6_addr_v4mapped((struct in6_addr *)req->id.idiag_src))
- sk = __udp4_lib_lookup(net,
- req->id.idiag_dst[3], req->id.idiag_dport,
- req->id.idiag_src[3], req->id.idiag_sport,
- req->id.idiag_if, tbl, NULL);
-
- else
+ ipv6_addr_v4mapped((struct in6_addr *)req->id.idiag_src)) {
+ struct sk_lookup params = {
+ .saddr.ipv4 = req->id.idiag_dst[3],
+ .daddr.ipv4 = req->id.idiag_src[3],
+ .sport = req->id.idiag_dport,
+ .dport = req->id.idiag_sport,
+ .dif = req->id.idiag_if,
+ };
+
+ sk = __udp4_lib_lookup(net, ¶ms, tbl, NULL);
+ } else {
sk = __udp6_lib_lookup(net,
(struct in6_addr *)req->id.idiag_dst,
req->id.idiag_dport,
(struct in6_addr *)req->id.idiag_src,
req->id.idiag_sport,
req->id.idiag_if, tbl, NULL);
+ }
}
#endif
else {
diff --git a/net/netfilter/xt_TPROXY.c b/net/netfilter/xt_TPROXY.c
index d767e35fff6b..972a0e40c59a 100644
--- a/net/netfilter/xt_TPROXY.c
+++ b/net/netfilter/xt_TPROXY.c
@@ -112,6 +112,14 @@ nf_tproxy_get_sock_v4(struct net *net, struct sk_buff *skb, void *hp,
const struct net_device *in,
const enum nf_tproxy_lookup_t lookup_type)
{
+ struct sk_lookup params = {
+ .saddr.ipv4 = saddr,
+ .daddr.ipv4 = daddr,
+ .sport = sport,
+ .dport = dport,
+ .dif = in->ifindex,
+ };
+
struct sock *sk;
struct tcphdr *tcph;
@@ -145,8 +153,7 @@ nf_tproxy_get_sock_v4(struct net *net, struct sk_buff *skb, void *hp,
}
break;
case IPPROTO_UDP:
- sk = udp4_lib_lookup(net, saddr, sport, daddr, dport,
- in->ifindex);
+ sk = udp4_lib_lookup(net, ¶ms);
if (sk) {
int connected = (sk->sk_state == TCP_ESTABLISHED);
int wildcard = (inet_sk(sk)->inet_rcv_saddr == 0);
--
2.1.4
^ permalink raw reply related [flat|nested] 11+ messages in thread* [RFC PATCH 03/10] net: ipv4: Convert inet socket lookups to new struct
2017-07-25 15:38 [RFC PATCH 00/10] net: l3mdev: Support for sockets bound to enslaved device David Ahern
2017-07-25 15:38 ` [RFC PATCH 01/10] net: Add sk_lookup struct and helper David Ahern
2017-07-25 15:38 ` [RFC PATCH 02/10] net: ipv4: Convert udp socket lookups to new struct David Ahern
@ 2017-07-25 15:38 ` David Ahern
2017-07-25 15:38 ` [RFC PATCH 04/10] net: ipv4: Convert raw sockets to sk_lookup David Ahern
` (6 subsequent siblings)
9 siblings, 0 replies; 11+ messages in thread
From: David Ahern @ 2017-07-25 15:38 UTC (permalink / raw)
To: netdev; +Cc: David Ahern
Convert the various inet_lookup functions to use the new sk_lookup
struct.
Signed-off-by: David Ahern <dsahern@gmail.com>
---
include/net/inet_hashtables.h | 57 ++++++++++++++--------------------
net/dccp/ipv4.c | 19 +++++++++---
net/ipv4/inet_diag.c | 33 ++++++++++++++------
net/ipv4/inet_hashtables.c | 54 ++++++++++++++++++--------------
net/ipv4/netfilter/nf_socket_ipv4.c | 5 ++-
net/ipv4/tcp_ipv4.c | 62 +++++++++++++++++++++++++++----------
net/ipv4/udp_diag.c | 3 ++
net/netfilter/xt_TPROXY.c | 10 +++---
8 files changed, 145 insertions(+), 98 deletions(-)
diff --git a/include/net/inet_hashtables.h b/include/net/inet_hashtables.h
index 5026b1f08bb8..fabb8dd8fdb1 100644
--- a/include/net/inet_hashtables.h
+++ b/include/net/inet_hashtables.h
@@ -218,19 +218,16 @@ void inet_unhash(struct sock *sk);
struct sock *__inet_lookup_listener(struct net *net,
struct inet_hashinfo *hashinfo,
struct sk_buff *skb, int doff,
- const __be32 saddr, const __be16 sport,
- const __be32 daddr,
- const unsigned short hnum,
- const int dif);
+ struct sk_lookup *params);
static inline struct sock *inet_lookup_listener(struct net *net,
struct inet_hashinfo *hashinfo,
struct sk_buff *skb, int doff,
- __be32 saddr, __be16 sport,
- __be32 daddr, __be16 dport, int dif)
+ struct sk_lookup *params)
{
- return __inet_lookup_listener(net, hashinfo, skb, doff, saddr, sport,
- daddr, ntohs(dport), dif);
+ params->hnum = ntohs(params->dport);
+
+ return __inet_lookup_listener(net, hashinfo, skb, doff, params);
}
/* Socket demux engine toys. */
@@ -286,53 +283,44 @@ static inline struct sock *inet_lookup_listener(struct net *net,
*/
struct sock *__inet_lookup_established(struct net *net,
struct inet_hashinfo *hashinfo,
- const __be32 saddr, const __be16 sport,
- const __be32 daddr, const u16 hnum,
- const int dif);
+ const struct sk_lookup *params);
static inline struct sock *
inet_lookup_established(struct net *net, struct inet_hashinfo *hashinfo,
- const __be32 saddr, const __be16 sport,
- const __be32 daddr, const __be16 dport,
- const int dif)
+ struct sk_lookup *params)
{
- return __inet_lookup_established(net, hashinfo, saddr, sport, daddr,
- ntohs(dport), dif);
+ params->hnum = ntohs(params->dport);
+
+ return __inet_lookup_established(net, hashinfo, params);
}
static inline struct sock *__inet_lookup(struct net *net,
struct inet_hashinfo *hashinfo,
struct sk_buff *skb, int doff,
- const __be32 saddr, const __be16 sport,
- const __be32 daddr, const __be16 dport,
- const int dif,
+ struct sk_lookup *params,
bool *refcounted)
{
- u16 hnum = ntohs(dport);
struct sock *sk;
- sk = __inet_lookup_established(net, hashinfo, saddr, sport,
- daddr, hnum, dif);
+ params->hnum = ntohs(params->dport);
+
+ sk = __inet_lookup_established(net, hashinfo, params);
*refcounted = true;
if (sk)
return sk;
*refcounted = false;
- return __inet_lookup_listener(net, hashinfo, skb, doff, saddr,
- sport, daddr, hnum, dif);
+ return __inet_lookup_listener(net, hashinfo, skb, doff, params);
}
static inline struct sock *inet_lookup(struct net *net,
struct inet_hashinfo *hashinfo,
struct sk_buff *skb, int doff,
- const __be32 saddr, const __be16 sport,
- const __be32 daddr, const __be16 dport,
- const int dif)
+ struct sk_lookup *params)
{
struct sock *sk;
bool refcounted;
- sk = __inet_lookup(net, hashinfo, skb, doff, saddr, sport, daddr,
- dport, dif, &refcounted);
+ sk = __inet_lookup(net, hashinfo, skb, doff, params, &refcounted);
if (sk && !refcounted && !refcount_inc_not_zero(&sk->sk_refcnt))
sk = NULL;
@@ -342,21 +330,22 @@ static inline struct sock *inet_lookup(struct net *net,
static inline struct sock *__inet_lookup_skb(struct inet_hashinfo *hashinfo,
struct sk_buff *skb,
int doff,
- const __be16 sport,
- const __be16 dport,
+ struct sk_lookup *params,
bool *refcounted)
{
struct sock *sk = skb_steal_sock(skb);
const struct iphdr *iph = ip_hdr(skb);
+ params->dif = inet_iif(skb),
+ params->saddr.ipv4 = iph->saddr,
+ params->daddr.ipv4 = iph->daddr,
+
*refcounted = true;
if (sk)
return sk;
return __inet_lookup(dev_net(skb_dst(skb)->dev), hashinfo, skb,
- doff, iph->saddr, sport,
- iph->daddr, dport, inet_iif(skb),
- refcounted);
+ doff, params, refcounted);
}
u32 inet6_ehashfn(const struct net *net,
diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c
index f85d901f4e3f..f98a65fa5f5e 100644
--- a/net/dccp/ipv4.c
+++ b/net/dccp/ipv4.c
@@ -244,6 +244,11 @@ static void dccp_v4_err(struct sk_buff *skb, u32 info)
__u64 seq;
int err;
struct net *net = dev_net(skb->dev);
+ struct sk_lookup params = {
+ .saddr.ipv4 = iph->daddr,
+ .daddr.ipv4 = iph->saddr,
+ .dif = inet_iif(skb),
+ };
/* Only need dccph_dport & dccph_sport which are the first
* 4 bytes in dccp header.
@@ -253,10 +258,11 @@ static void dccp_v4_err(struct sk_buff *skb, u32 info)
BUILD_BUG_ON(offsetofend(struct dccp_hdr, dccph_dport) > 8);
dh = (struct dccp_hdr *)(skb->data + offset);
- sk = __inet_lookup_established(net, &dccp_hashinfo,
- iph->daddr, dh->dccph_dport,
- iph->saddr, ntohs(dh->dccph_sport),
- inet_iif(skb));
+ params.sport = dh->dccph_dport;
+ params.dport = dh->dccph_sport;
+ params.hnum = ntohs(dh->dccph_sport);
+
+ sk = inet_lookup_established(net, &dccp_hashinfo, ¶ms);
if (!sk) {
__ICMP_INC_STATS(net, ICMP_MIB_INERRORS);
return;
@@ -763,6 +769,7 @@ EXPORT_SYMBOL_GPL(dccp_invalid_packet);
/* this is called when real data arrives */
static int dccp_v4_rcv(struct sk_buff *skb)
{
+ struct sk_lookup params = {};
const struct dccp_hdr *dh;
const struct iphdr *iph;
bool refcounted;
@@ -801,9 +808,11 @@ static int dccp_v4_rcv(struct sk_buff *skb)
DCCP_SKB_CB(skb)->dccpd_ack_seq);
}
+ params.sport = dh->dccph_sport;
+ params.dport = dh->dccph_dport;
lookup:
sk = __inet_lookup_skb(&dccp_hashinfo, skb, __dccp_hdr_len(dh),
- dh->dccph_sport, dh->dccph_dport, &refcounted);
+ ¶ms, &refcounted);
if (!sk) {
dccp_pr_debug("failed to look up flow ID in table and "
"get corresponding socket\n");
diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c
index 3828b3a805cd..6c3bc4e408d0 100644
--- a/net/ipv4/inet_diag.c
+++ b/net/ipv4/inet_diag.c
@@ -396,18 +396,33 @@ struct sock *inet_diag_find_one_icsk(struct net *net,
struct sock *sk;
rcu_read_lock();
- if (req->sdiag_family == AF_INET)
- sk = inet_lookup(net, hashinfo, NULL, 0, req->id.idiag_dst[0],
- req->id.idiag_dport, req->id.idiag_src[0],
- req->id.idiag_sport, req->id.idiag_if);
+ if (req->sdiag_family == AF_INET) {
+ struct sk_lookup params = {
+ .saddr.ipv4 = req->id.idiag_dst[0],
+ .daddr.ipv4 = req->id.idiag_src[0],
+ .sport = req->id.idiag_dport,
+ .dport = req->id.idiag_sport,
+ .hnum = ntohs(req->id.idiag_sport),
+ .dif = req->id.idiag_if,
+ };
+
+ sk = inet_lookup(net, hashinfo, NULL, 0, ¶ms);
+ }
#if IS_ENABLED(CONFIG_IPV6)
else if (req->sdiag_family == AF_INET6) {
if (ipv6_addr_v4mapped((struct in6_addr *)req->id.idiag_dst) &&
- ipv6_addr_v4mapped((struct in6_addr *)req->id.idiag_src))
- sk = inet_lookup(net, hashinfo, NULL, 0, req->id.idiag_dst[3],
- req->id.idiag_dport, req->id.idiag_src[3],
- req->id.idiag_sport, req->id.idiag_if);
- else
+ ipv6_addr_v4mapped((struct in6_addr *)req->id.idiag_src)) {
+ struct sk_lookup params = {
+ .saddr.ipv4 = req->id.idiag_dst[3],
+ .daddr.ipv4 = req->id.idiag_src[3],
+ .sport = req->id.idiag_dport,
+ .dport = req->id.idiag_sport,
+ .hnum = ntohs(req->id.idiag_sport),
+ .dif = req->id.idiag_if,
+ };
+
+ sk = inet_lookup(net, hashinfo, NULL, 0, ¶ms);
+ } else
sk = inet6_lookup(net, hashinfo, NULL, 0,
(struct in6_addr *)req->id.idiag_dst,
req->id.idiag_dport,
diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c
index 2e3389d614d1..e581e200d01d 100644
--- a/net/ipv4/inet_hashtables.c
+++ b/net/ipv4/inet_hashtables.c
@@ -169,26 +169,28 @@ int __inet_inherit_port(const struct sock *sk, struct sock *child)
EXPORT_SYMBOL_GPL(__inet_inherit_port);
static inline int compute_score(struct sock *sk, struct net *net,
- const unsigned short hnum, const __be32 daddr,
- const int dif, bool exact_dif)
+ const struct sk_lookup *params)
{
int score = -1;
struct inet_sock *inet = inet_sk(sk);
- if (net_eq(sock_net(sk), net) && inet->inet_num == hnum &&
- !ipv6_only_sock(sk)) {
+ if (net_eq(sock_net(sk), net) &&
+ inet->inet_num == params->hnum &&
+ !ipv6_only_sock(sk)) {
__be32 rcv_saddr = inet->inet_rcv_saddr;
+ int rc;
+
score = sk->sk_family == PF_INET ? 2 : 1;
if (rcv_saddr) {
- if (rcv_saddr != daddr)
+ if (rcv_saddr != params->daddr.ipv4)
return -1;
score += 4;
}
- if (sk->sk_bound_dev_if || exact_dif) {
- if (sk->sk_bound_dev_if != dif)
- return -1;
+ rc = sk_lookup_device_cmp(sk, params);
+ if (rc < 0)
+ return -1;
+ if (rc > 0)
score += 4;
- }
if (sk->sk_incoming_cpu == raw_smp_processor_id())
score++;
}
@@ -206,24 +208,25 @@ static inline int compute_score(struct sock *sk, struct net *net,
struct sock *__inet_lookup_listener(struct net *net,
struct inet_hashinfo *hashinfo,
struct sk_buff *skb, int doff,
- const __be32 saddr, __be16 sport,
- const __be32 daddr, const unsigned short hnum,
- const int dif)
+ struct sk_lookup *params)
{
- unsigned int hash = inet_lhashfn(net, hnum);
+ unsigned int hash = inet_lhashfn(net, params->hnum);
struct inet_listen_hashbucket *ilb = &hashinfo->listening_hash[hash];
int score, hiscore = 0, matches = 0, reuseport = 0;
- bool exact_dif = inet_exact_dif_match(net, skb);
struct sock *sk, *result = NULL;
u32 phash = 0;
+ params->exact_dif = inet_exact_dif_match(net, skb);
+
sk_for_each_rcu(sk, &ilb->head) {
- score = compute_score(sk, net, hnum, daddr, dif, exact_dif);
+ score = compute_score(sk, net, params);
if (score > hiscore) {
reuseport = sk->sk_reuseport;
if (reuseport) {
- phash = inet_ehashfn(net, daddr, hnum,
- saddr, sport);
+ phash = inet_ehashfn(net, params->daddr.ipv4,
+ params->hnum,
+ params->saddr.ipv4,
+ params->sport);
result = reuseport_select_sock(sk, phash,
skb, doff);
if (result)
@@ -265,11 +268,13 @@ void sock_edemux(struct sk_buff *skb)
EXPORT_SYMBOL(sock_edemux);
struct sock *__inet_lookup_established(struct net *net,
- struct inet_hashinfo *hashinfo,
- const __be32 saddr, const __be16 sport,
- const __be32 daddr, const u16 hnum,
- const int dif)
+ struct inet_hashinfo *hashinfo,
+ const struct sk_lookup *params)
{
+ const __be32 saddr = params->saddr.ipv4;
+ const __be32 daddr = params->daddr.ipv4;
+ const __be16 sport = params->sport;
+ const u16 hnum = params->hnum;
INET_ADDR_COOKIE(acookie, saddr, daddr);
const __portpair ports = INET_COMBINED_PORTS(sport, hnum);
struct sock *sk;
@@ -285,12 +290,13 @@ struct sock *__inet_lookup_established(struct net *net,
sk_nulls_for_each_rcu(sk, node, &head->chain) {
if (sk->sk_hash != hash)
continue;
- if (likely(INET_MATCH(sk, net, acookie,
- saddr, daddr, ports, dif))) {
+ if (likely(INET_MATCH(sk, net, acookie, saddr, daddr,
+ ports, params->dif))) {
if (unlikely(!refcount_inc_not_zero(&sk->sk_refcnt)))
goto out;
if (unlikely(!INET_MATCH(sk, net, acookie,
- saddr, daddr, ports, dif))) {
+ saddr, daddr, ports,
+ params->dif))) {
sock_gen_put(sk);
goto begin;
}
diff --git a/net/ipv4/netfilter/nf_socket_ipv4.c b/net/ipv4/netfilter/nf_socket_ipv4.c
index 121767b36763..b0f9954712f9 100644
--- a/net/ipv4/netfilter/nf_socket_ipv4.c
+++ b/net/ipv4/netfilter/nf_socket_ipv4.c
@@ -86,14 +86,13 @@ nf_socket_get_sock_v4(struct net *net, struct sk_buff *skb, const int doff,
.daddr.ipv4 = daddr,
.sport = sport,
.dport = dport,
+ .hnum = ntohs(dport),
.dif = in->ifindex,
};
switch (protocol) {
case IPPROTO_TCP:
- return inet_lookup(net, &tcp_hashinfo, skb, doff,
- saddr, sport, daddr, dport,
- in->ifindex);
+ return inet_lookup(net, &tcp_hashinfo, skb, doff, ¶ms);
case IPPROTO_UDP:
return udp4_lib_lookup(net, ¶ms);
}
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index a20e7f03d5f7..89a0d166e677 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -382,10 +382,16 @@ void tcp_v4_err(struct sk_buff *icmp_skb, u32 info)
u32 delta_us;
int err;
struct net *net = dev_net(icmp_skb->dev);
-
- sk = __inet_lookup_established(net, &tcp_hashinfo, iph->daddr,
- th->dest, iph->saddr, ntohs(th->source),
- inet_iif(icmp_skb));
+ struct sk_lookup params = {
+ .daddr.ipv4 = iph->saddr,
+ .saddr.ipv4 = iph->daddr,
+ .sport = th->dest,
+ .dport = th->source,
+ .hnum = ntohs(th->source),
+ .dif = inet_iif(icmp_skb),
+ };
+
+ sk = inet_lookup_established(net, &tcp_hashinfo, ¶ms);
if (!sk) {
__ICMP_INC_STATS(net, ICMP_MIB_INERRORS);
return;
@@ -651,6 +657,14 @@ static void tcp_v4_send_reset(const struct sock *sk, struct sk_buff *skb)
key = tcp_md5_do_lookup(sk, (union tcp_md5_addr *)
&ip_hdr(skb)->saddr, AF_INET);
} else if (hash_location) {
+ struct sk_lookup params = {
+ .saddr.ipv4 = ip_hdr(skb)->saddr,
+ .daddr.ipv4 = ip_hdr(skb)->daddr,
+ .hnum = ntohs(th->source),
+ .sport = th->source,
+ .dif = inet_iif(skb),
+ };
+
/*
* active side is lost. Try to find listening socket through
* source port, and then find md5 key through listening socket.
@@ -658,10 +672,8 @@ static void tcp_v4_send_reset(const struct sock *sk, struct sk_buff *skb)
* Incoming packet is checked with md5 hash with finding key,
* no RST generated if md5 hash doesn't match.
*/
- sk1 = __inet_lookup_listener(net, &tcp_hashinfo, NULL, 0,
- ip_hdr(skb)->saddr,
- th->source, ip_hdr(skb)->daddr,
- ntohs(th->source), inet_iif(skb));
+ sk1 = inet_lookup_listener(net, &tcp_hashinfo, NULL, 0,
+ ¶ms);
/* don't send rst if it can't find key */
if (!sk1)
goto out;
@@ -1509,6 +1521,10 @@ void tcp_v4_early_demux(struct sk_buff *skb)
const struct iphdr *iph;
const struct tcphdr *th;
struct sock *sk;
+ struct sk_lookup params = {
+ .dif = skb->skb_iif,
+ };
+
if (skb->pkt_type != PACKET_HOST)
return;
@@ -1522,10 +1538,13 @@ void tcp_v4_early_demux(struct sk_buff *skb)
if (th->doff < sizeof(struct tcphdr) / 4)
return;
- sk = __inet_lookup_established(dev_net(skb->dev), &tcp_hashinfo,
- iph->saddr, th->source,
- iph->daddr, ntohs(th->dest),
- skb->skb_iif);
+ params.saddr.ipv4 = iph->saddr;
+ params.daddr.ipv4 = iph->daddr;
+ params.sport = th->source;
+ params.dport = th->dest;
+ params.hnum = ntohs(th->dest),
+
+ sk = inet_lookup_established(dev_net(skb->dev), &tcp_hashinfo, ¶ms);
if (sk) {
skb->sk = sk;
skb->destructor = sock_edemux;
@@ -1645,6 +1664,7 @@ EXPORT_SYMBOL(tcp_filter);
int tcp_v4_rcv(struct sk_buff *skb)
{
struct net *net = dev_net(skb->dev);
+ struct sk_lookup params = { };
const struct iphdr *iph;
const struct tcphdr *th;
bool refcounted;
@@ -1693,9 +1713,11 @@ int tcp_v4_rcv(struct sk_buff *skb)
TCP_SKB_CB(skb)->ip_dsfield = ipv4_get_dsfield(iph);
TCP_SKB_CB(skb)->sacked = 0;
+ params.sport = th->source;
+ params.dport = th->dest;
lookup:
- sk = __inet_lookup_skb(&tcp_hashinfo, skb, __tcp_hdrlen(th), th->source,
- th->dest, &refcounted);
+ sk = __inet_lookup_skb(&tcp_hashinfo, skb, __tcp_hdrlen(th), ¶ms,
+ &refcounted);
if (!sk)
goto no_tcp_socket;
@@ -1819,12 +1841,18 @@ int tcp_v4_rcv(struct sk_buff *skb)
}
switch (tcp_timewait_state_process(inet_twsk(sk), skb, th)) {
case TCP_TW_SYN: {
+ struct sk_lookup params = {
+ .saddr.ipv4 = iph->saddr,
+ .daddr.ipv4 = iph->daddr,
+ .sport = th->source,
+ .dport = th->dest,
+ .hnum = ntohs(th->dest),
+ .dif = inet_iif(skb),
+ };
struct sock *sk2 = inet_lookup_listener(dev_net(skb->dev),
&tcp_hashinfo, skb,
__tcp_hdrlen(th),
- iph->saddr, th->source,
- iph->daddr, th->dest,
- inet_iif(skb));
+ ¶ms);
if (sk2) {
inet_twsk_deschedule_put(inet_twsk(sk));
sk = sk2;
diff --git a/net/ipv4/udp_diag.c b/net/ipv4/udp_diag.c
index 5e0640877536..d7f6af42ebcc 100644
--- a/net/ipv4/udp_diag.c
+++ b/net/ipv4/udp_diag.c
@@ -47,6 +47,7 @@ static int udp_dump_one(struct udp_table *tbl, struct sk_buff *in_skb,
.daddr.ipv4 = req->id.idiag_dst[0],
.sport = req->id.idiag_sport,
.dport = req->id.idiag_dport,
+ .hnum = ntohs(req->id.idiag_dport),
.dif = req->id.idiag_if,
};
@@ -190,6 +191,7 @@ static int __udp_diag_destroy(struct sk_buff *in_skb,
.daddr.ipv4 = req->id.idiag_src[0],
.sport = req->id.idiag_dport,
.dport = req->id.idiag_sport,
+ .hnum = ntohs(req->id.idiag_sport),
.dif = req->id.idiag_if,
};
@@ -204,6 +206,7 @@ static int __udp_diag_destroy(struct sk_buff *in_skb,
.daddr.ipv4 = req->id.idiag_src[3],
.sport = req->id.idiag_dport,
.dport = req->id.idiag_sport,
+ .hnum = ntohs(req->id.idiag_sport),
.dif = req->id.idiag_if,
};
diff --git a/net/netfilter/xt_TPROXY.c b/net/netfilter/xt_TPROXY.c
index 972a0e40c59a..5cce7eb7dea2 100644
--- a/net/netfilter/xt_TPROXY.c
+++ b/net/netfilter/xt_TPROXY.c
@@ -117,6 +117,7 @@ nf_tproxy_get_sock_v4(struct net *net, struct sk_buff *skb, void *hp,
.daddr.ipv4 = daddr,
.sport = sport,
.dport = dport,
+ .hnum = ntohs(dport),
.dif = in->ifindex,
};
@@ -129,11 +130,9 @@ nf_tproxy_get_sock_v4(struct net *net, struct sk_buff *skb, void *hp,
case NFT_LOOKUP_LISTENER:
tcph = hp;
sk = inet_lookup_listener(net, &tcp_hashinfo, skb,
- ip_hdrlen(skb) +
+ ip_hdrlen(skb) +
__tcp_hdrlen(tcph),
- saddr, sport,
- daddr, dport,
- in->ifindex);
+ ¶ms);
if (sk && !refcount_inc_not_zero(&sk->sk_refcnt))
sk = NULL;
@@ -145,8 +144,7 @@ nf_tproxy_get_sock_v4(struct net *net, struct sk_buff *skb, void *hp,
break;
case NFT_LOOKUP_ESTABLISHED:
sk = inet_lookup_established(net, &tcp_hashinfo,
- saddr, sport, daddr, dport,
- in->ifindex);
+ ¶ms);
break;
default:
BUG();
--
2.1.4
^ permalink raw reply related [flat|nested] 11+ messages in thread* [RFC PATCH 04/10] net: ipv4: Convert raw sockets to sk_lookup
2017-07-25 15:38 [RFC PATCH 00/10] net: l3mdev: Support for sockets bound to enslaved device David Ahern
` (2 preceding siblings ...)
2017-07-25 15:38 ` [RFC PATCH 03/10] net: ipv4: Convert inet " David Ahern
@ 2017-07-25 15:38 ` David Ahern
2017-07-25 15:38 ` [RFC PATCH 05/10] net: ipv6: Convert udp socket lookups to new struct David Ahern
` (5 subsequent siblings)
9 siblings, 0 replies; 11+ messages in thread
From: David Ahern @ 2017-07-25 15:38 UTC (permalink / raw)
To: netdev; +Cc: David Ahern
Convert __raw_v4_lookup to use the new sk_lookup struct
Signed-off-by: David Ahern <dsahern@gmail.com>
---
include/net/raw.h | 3 +--
net/ipv4/raw.c | 72 ++++++++++++++++++++++++++++++++++-------------------
net/ipv4/raw_diag.c | 15 +++++++----
3 files changed, 58 insertions(+), 32 deletions(-)
diff --git a/include/net/raw.h b/include/net/raw.h
index 57c33dd22ec4..8d0f0e5d013b 100644
--- a/include/net/raw.h
+++ b/include/net/raw.h
@@ -25,8 +25,7 @@ extern struct proto raw_prot;
extern struct raw_hashinfo raw_v4_hashinfo;
struct sock *__raw_v4_lookup(struct net *net, struct sock *sk,
- unsigned short num, __be32 raddr,
- __be32 laddr, int dif);
+ const struct sk_lookup *params);
int raw_abort(struct sock *sk, int err);
void raw_icmp_error(struct sk_buff *, int, u32);
diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c
index b0bb5d0a30bd..4da5d87a61a5 100644
--- a/net/ipv4/raw.c
+++ b/net/ipv4/raw.c
@@ -122,15 +122,23 @@ void raw_unhash_sk(struct sock *sk)
EXPORT_SYMBOL_GPL(raw_unhash_sk);
struct sock *__raw_v4_lookup(struct net *net, struct sock *sk,
- unsigned short num, __be32 raddr, __be32 laddr, int dif)
+ const struct sk_lookup *params)
{
+ __be32 raddr = params->saddr.ipv4;
+ __be32 laddr = params->daddr.ipv4;
+
sk_for_each_from(sk) {
struct inet_sock *inet = inet_sk(sk);
+ bool dev_match;
+
+ dev_match = (!sk->sk_bound_dev_if ||
+ sk->sk_bound_dev_if == params->dif);
- if (net_eq(sock_net(sk), net) && inet->inet_num == num &&
- !(inet->inet_daddr && inet->inet_daddr != raddr) &&
+ if (net_eq(sock_net(sk), net) &&
+ inet->inet_num == params->hnum &&
+ !(inet->inet_daddr && inet->inet_daddr != raddr) &&
!(inet->inet_rcv_saddr && inet->inet_rcv_saddr != laddr) &&
- !(sk->sk_bound_dev_if && sk->sk_bound_dev_if != dif))
+ dev_match)
goto found; /* gotcha */
}
sk = NULL;
@@ -169,23 +177,20 @@ static int icmp_filter(const struct sock *sk, const struct sk_buff *skb)
* RFC 1122: SHOULD pass TOS value up to the transport layer.
* -> It does. And not only TOS, but all IP header.
*/
-static int raw_v4_input(struct sk_buff *skb, const struct iphdr *iph, int hash)
+static int __raw_v4_input(struct sk_buff *skb, const struct iphdr *iph,
+ struct hlist_head *head)
{
- struct sock *sk;
- struct hlist_head *head;
+ struct net *net = dev_net(skb->dev);
+ const struct sk_lookup params = {
+ .saddr.ipv4 = iph->saddr,
+ .daddr.ipv4 = iph->daddr,
+ .hnum = iph->protocol,
+ .dif = skb->dev->ifindex,
+ };
int delivered = 0;
- struct net *net;
-
- read_lock(&raw_v4_hashinfo.lock);
- head = &raw_v4_hashinfo.ht[hash];
- if (hlist_empty(head))
- goto out;
-
- net = dev_net(skb->dev);
- sk = __raw_v4_lookup(net, __sk_head(head), iph->protocol,
- iph->saddr, iph->daddr,
- skb->dev->ifindex);
+ struct sock *sk;
+ sk = __raw_v4_lookup(net, __sk_head(head), ¶ms);
while (sk) {
delivered = 1;
if ((iph->protocol != IPPROTO_ICMP || !icmp_filter(sk, skb)) &&
@@ -197,11 +202,22 @@ static int raw_v4_input(struct sk_buff *skb, const struct iphdr *iph, int hash)
if (clone)
raw_rcv(sk, clone);
}
- sk = __raw_v4_lookup(net, sk_next(sk), iph->protocol,
- iph->saddr, iph->daddr,
- skb->dev->ifindex);
+ sk = __raw_v4_lookup(net, sk_next(sk), ¶ms);
}
-out:
+
+ return delivered;
+}
+
+static int raw_v4_input(struct sk_buff *skb, const struct iphdr *iph, int hash)
+{
+ struct hlist_head *head;
+ int delivered = 0;
+
+ read_lock(&raw_v4_hashinfo.lock);
+ head = &raw_v4_hashinfo.ht[hash];
+ if (!hlist_empty(head))
+ delivered = __raw_v4_input(skb, iph, head);
+
read_unlock(&raw_v4_hashinfo.lock);
return delivered;
}
@@ -297,12 +313,18 @@ void raw_icmp_error(struct sk_buff *skb, int protocol, u32 info)
read_lock(&raw_v4_hashinfo.lock);
raw_sk = sk_head(&raw_v4_hashinfo.ht[hash]);
if (raw_sk) {
+ struct sk_lookup params = {
+ .hnum = protocol,
+ .dif = skb->dev->ifindex,
+ };
+
iph = (const struct iphdr *)skb->data;
net = dev_net(skb->dev);
- while ((raw_sk = __raw_v4_lookup(net, raw_sk, protocol,
- iph->daddr, iph->saddr,
- skb->dev->ifindex)) != NULL) {
+ params.saddr.ipv4 = iph->daddr;
+ params.daddr.ipv4 = iph->saddr;
+ while ((raw_sk = __raw_v4_lookup(net, raw_sk,
+ ¶ms)) != NULL) {
raw_err(raw_sk, skb, info);
raw_sk = sk_next(raw_sk);
iph = (const struct iphdr *)skb->data;
diff --git a/net/ipv4/raw_diag.c b/net/ipv4/raw_diag.c
index e1a51ca68d23..a708de070cc6 100644
--- a/net/ipv4/raw_diag.c
+++ b/net/ipv4/raw_diag.c
@@ -42,11 +42,16 @@ static struct sock *raw_lookup(struct net *net, struct sock *from,
struct inet_diag_req_raw *r = (void *)req;
struct sock *sk = NULL;
- if (r->sdiag_family == AF_INET)
- sk = __raw_v4_lookup(net, from, r->sdiag_raw_protocol,
- r->id.idiag_dst[0],
- r->id.idiag_src[0],
- r->id.idiag_if);
+ if (r->sdiag_family == AF_INET) {
+ const struct sk_lookup params = {
+ .saddr.ipv4 = r->id.idiag_dst[0],
+ .daddr.ipv4 = r->id.idiag_src[0],
+ .hnum = r->sdiag_raw_protocol,
+ .dif = r->id.idiag_if,
+ };
+
+ sk = __raw_v4_lookup(net, from, ¶ms);
+ }
#if IS_ENABLED(CONFIG_IPV6)
else
sk = __raw_v6_lookup(net, from, r->sdiag_raw_protocol,
--
2.1.4
^ permalink raw reply related [flat|nested] 11+ messages in thread* [RFC PATCH 05/10] net: ipv6: Convert udp socket lookups to new struct
2017-07-25 15:38 [RFC PATCH 00/10] net: l3mdev: Support for sockets bound to enslaved device David Ahern
` (3 preceding siblings ...)
2017-07-25 15:38 ` [RFC PATCH 04/10] net: ipv4: Convert raw sockets to sk_lookup David Ahern
@ 2017-07-25 15:38 ` David Ahern
2017-07-25 15:38 ` [RFC PATCH 06/10] net: ipv6: Convert inet " David Ahern
` (4 subsequent siblings)
9 siblings, 0 replies; 11+ messages in thread
From: David Ahern @ 2017-07-25 15:38 UTC (permalink / raw)
To: netdev; +Cc: David Ahern
Convert udp6_lib_lookup and __udp6_lib_lookup to use the new sk_lookup
struct.
Signed-off-by: David Ahern <dsahern@gmail.com>
---
include/net/udp.h | 12 +--
net/ipv4/udp_diag.c | 33 ++++---
net/ipv6/netfilter/nf_socket_ipv6.c | 11 ++-
net/ipv6/udp.c | 177 +++++++++++++++++++-----------------
net/netfilter/xt_TPROXY.c | 10 +-
5 files changed, 135 insertions(+), 108 deletions(-)
diff --git a/include/net/udp.h b/include/net/udp.h
index 5e0ff095dc6d..c5a75e9422c6 100644
--- a/include/net/udp.h
+++ b/include/net/udp.h
@@ -288,15 +288,9 @@ struct sock *__udp4_lib_lookup(struct net *net, struct sk_lookup *params,
struct udp_table *tbl, struct sk_buff *skb);
struct sock *udp4_lib_lookup_skb(struct sk_buff *skb,
__be16 sport, __be16 dport);
-struct sock *udp6_lib_lookup(struct net *net,
- const struct in6_addr *saddr, __be16 sport,
- const struct in6_addr *daddr, __be16 dport,
- int dif);
-struct sock *__udp6_lib_lookup(struct net *net,
- const struct in6_addr *saddr, __be16 sport,
- const struct in6_addr *daddr, __be16 dport,
- int dif, struct udp_table *tbl,
- struct sk_buff *skb);
+struct sock *udp6_lib_lookup(struct net *net, struct sk_lookup *params);
+struct sock *__udp6_lib_lookup(struct net *net, struct sk_lookup *params,
+ struct udp_table *tbl, struct sk_buff *skb);
struct sock *udp6_lib_lookup_skb(struct sk_buff *skb,
__be16 sport, __be16 dport);
diff --git a/net/ipv4/udp_diag.c b/net/ipv4/udp_diag.c
index d7f6af42ebcc..8c1221f5f2dd 100644
--- a/net/ipv4/udp_diag.c
+++ b/net/ipv4/udp_diag.c
@@ -54,13 +54,17 @@ static int udp_dump_one(struct udp_table *tbl, struct sk_buff *in_skb,
sk = __udp4_lib_lookup(net, ¶ms, tbl, NULL);
}
#if IS_ENABLED(CONFIG_IPV6)
- else if (req->sdiag_family == AF_INET6)
- sk = __udp6_lib_lookup(net,
- (struct in6_addr *)req->id.idiag_src,
- req->id.idiag_sport,
- (struct in6_addr *)req->id.idiag_dst,
- req->id.idiag_dport,
- req->id.idiag_if, tbl, NULL);
+ else if (req->sdiag_family == AF_INET6) {
+ struct sk_lookup params = {
+ .saddr.ipv6 = (struct in6_addr *)req->id.idiag_src,
+ .daddr.ipv6 = (struct in6_addr *)req->id.idiag_dst,
+ .sport = req->id.idiag_sport,
+ .dport = req->id.idiag_dport,
+ .dif = req->id.idiag_if,
+ };
+
+ sk = __udp6_lib_lookup(net, ¶ms, tbl, NULL);
+ }
#endif
if (sk && !refcount_inc_not_zero(&sk->sk_refcnt))
sk = NULL;
@@ -212,12 +216,15 @@ static int __udp_diag_destroy(struct sk_buff *in_skb,
sk = __udp4_lib_lookup(net, ¶ms, tbl, NULL);
} else {
- sk = __udp6_lib_lookup(net,
- (struct in6_addr *)req->id.idiag_dst,
- req->id.idiag_dport,
- (struct in6_addr *)req->id.idiag_src,
- req->id.idiag_sport,
- req->id.idiag_if, tbl, NULL);
+ struct sk_lookup params = {
+ .saddr.ipv6 = (struct in6_addr *)req->id.idiag_dst,
+ .daddr.ipv6 = (struct in6_addr *)req->id.idiag_src,
+ .sport = req->id.idiag_dport,
+ .dport = req->id.idiag_sport,
+ .dif = req->id.idiag_if,
+ };
+
+ sk = __udp6_lib_lookup(net, ¶ms, tbl, NULL);
}
}
#endif
diff --git a/net/ipv6/netfilter/nf_socket_ipv6.c b/net/ipv6/netfilter/nf_socket_ipv6.c
index ebb2bf84232a..46e45b81094f 100644
--- a/net/ipv6/netfilter/nf_socket_ipv6.c
+++ b/net/ipv6/netfilter/nf_socket_ipv6.c
@@ -86,14 +86,21 @@ nf_socket_get_sock_v6(struct net *net, struct sk_buff *skb, int doff,
const __be16 sport, const __be16 dport,
const struct net_device *in)
{
+ struct sk_lookup params = {
+ .saddr.ipv6 = saddr,
+ .daddr.ipv6 = daddr,
+ .sport = sport,
+ .dport = dport,
+ .dif = in->ifindex,
+ };
+
switch (protocol) {
case IPPROTO_TCP:
return inet6_lookup(net, &tcp_hashinfo, skb, doff,
saddr, sport, daddr, dport,
in->ifindex);
case IPPROTO_UDP:
- return udp6_lib_lookup(net, saddr, sport, daddr, dport,
- in->ifindex);
+ return udp6_lib_lookup(net, ¶ms);
}
return NULL;
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index 4a3e65626e8b..5c4fdbe52c24 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -67,13 +67,14 @@ static bool udp6_lib_exact_dif_match(struct net *net, struct sk_buff *skb)
}
static u32 udp6_ehashfn(const struct net *net,
- const struct in6_addr *laddr,
- const u16 lport,
- const struct in6_addr *faddr,
- const __be16 fport)
+ const struct sk_lookup *params)
{
+ const struct in6_addr *laddr = params->daddr.ipv6;
+ const struct in6_addr *faddr = params->saddr.ipv6;
static u32 udp6_ehash_secret __read_mostly;
static u32 udp_ipv6_hash_secret __read_mostly;
+ const __be16 fport = params->sport;
+ const u16 lport = params->hnum;
u32 lhash, fhash;
@@ -127,15 +128,13 @@ static void udp_v6_rehash(struct sock *sk)
}
static int compute_score(struct sock *sk, struct net *net,
- const struct in6_addr *saddr, __be16 sport,
- const struct in6_addr *daddr, unsigned short hnum,
- int dif, bool exact_dif)
+ const struct sk_lookup *params)
{
- int score;
struct inet_sock *inet;
+ int score, rc;
if (!net_eq(sock_net(sk), net) ||
- udp_sk(sk)->udp_port_hash != hnum ||
+ udp_sk(sk)->udp_port_hash != params->hnum ||
sk->sk_family != PF_INET6)
return -1;
@@ -143,28 +142,28 @@ static int compute_score(struct sock *sk, struct net *net,
inet = inet_sk(sk);
if (inet->inet_dport) {
- if (inet->inet_dport != sport)
+ if (inet->inet_dport != params->sport)
return -1;
score++;
}
if (!ipv6_addr_any(&sk->sk_v6_rcv_saddr)) {
- if (!ipv6_addr_equal(&sk->sk_v6_rcv_saddr, daddr))
+ if (!ipv6_addr_equal(&sk->sk_v6_rcv_saddr, params->daddr.ipv6))
return -1;
score++;
}
if (!ipv6_addr_any(&sk->sk_v6_daddr)) {
- if (!ipv6_addr_equal(&sk->sk_v6_daddr, saddr))
+ if (!ipv6_addr_equal(&sk->sk_v6_daddr, params->saddr.ipv6))
return -1;
score++;
}
- if (sk->sk_bound_dev_if || exact_dif) {
- if (sk->sk_bound_dev_if != dif)
- return -1;
+ rc = sk_lookup_device_cmp(sk, params);
+ if (rc < 0)
+ return -1;
+ if (rc > 0)
score++;
- }
if (sk->sk_incoming_cpu == raw_smp_processor_id())
score++;
@@ -174,10 +173,9 @@ static int compute_score(struct sock *sk, struct net *net,
/* called with rcu_read_lock() */
static struct sock *udp6_lib_lookup2(struct net *net,
- const struct in6_addr *saddr, __be16 sport,
- const struct in6_addr *daddr, unsigned int hnum, int dif,
- bool exact_dif, struct udp_hslot *hslot2,
- struct sk_buff *skb)
+ const struct sk_lookup *params,
+ struct udp_hslot *hslot2,
+ struct sk_buff *skb)
{
struct sock *sk, *result;
int score, badness, matches = 0, reuseport = 0;
@@ -186,13 +184,11 @@ static struct sock *udp6_lib_lookup2(struct net *net,
result = NULL;
badness = -1;
udp_portaddr_for_each_entry_rcu(sk, &hslot2->head) {
- score = compute_score(sk, net, saddr, sport,
- daddr, hnum, dif, exact_dif);
+ score = compute_score(sk, net, params);
if (score > badness) {
reuseport = sk->sk_reuseport;
if (reuseport) {
- hash = udp6_ehashfn(net, daddr, hnum,
- saddr, sport);
+ hash = udp6_ehashfn(net, params);
result = reuseport_select_sock(sk, hash, skb,
sizeof(struct udphdr));
@@ -213,30 +209,27 @@ static struct sock *udp6_lib_lookup2(struct net *net,
}
/* rcu_read_lock() must be held */
-struct sock *__udp6_lib_lookup(struct net *net,
- const struct in6_addr *saddr, __be16 sport,
- const struct in6_addr *daddr, __be16 dport,
- int dif, struct udp_table *udptable,
- struct sk_buff *skb)
+struct sock *__udp6_lib_lookup(struct net *net, struct sk_lookup *params,
+ struct udp_table *udptable, struct sk_buff *skb)
{
struct sock *sk, *result;
- unsigned short hnum = ntohs(dport);
+ unsigned short hnum = ntohs(params->dport);
unsigned int hash2, slot2, slot = udp_hashfn(net, hnum, udptable->mask);
struct udp_hslot *hslot2, *hslot = &udptable->hash[slot];
- bool exact_dif = udp6_lib_exact_dif_match(net, skb);
int score, badness, matches = 0, reuseport = 0;
u32 hash = 0;
+ params->hnum = hnum;
+ params->exact_dif = udp6_lib_exact_dif_match(net, skb);
+
if (hslot->count > 10) {
- hash2 = udp6_portaddr_hash(net, daddr, hnum);
+ hash2 = udp6_portaddr_hash(net, params->daddr.ipv6, hnum);
slot2 = hash2 & udptable->mask;
hslot2 = &udptable->hash2[slot2];
if (hslot->count < hslot2->count)
goto begin;
- result = udp6_lib_lookup2(net, saddr, sport,
- daddr, hnum, dif, exact_dif,
- hslot2, skb);
+ result = udp6_lib_lookup2(net, params, hslot2, skb);
if (!result) {
unsigned int old_slot2 = slot2;
hash2 = udp6_portaddr_hash(net, &in6addr_any, hnum);
@@ -249,10 +242,7 @@ struct sock *__udp6_lib_lookup(struct net *net,
if (hslot->count < hslot2->count)
goto begin;
- result = udp6_lib_lookup2(net, saddr, sport,
- daddr, hnum, dif,
- exact_dif, hslot2,
- skb);
+ result = udp6_lib_lookup2(net, params, hslot2, skb);
}
return result;
}
@@ -260,13 +250,11 @@ struct sock *__udp6_lib_lookup(struct net *net,
result = NULL;
badness = -1;
sk_for_each_rcu(sk, &hslot->head) {
- score = compute_score(sk, net, saddr, sport, daddr, hnum, dif,
- exact_dif);
+ score = compute_score(sk, net, params);
if (score > badness) {
reuseport = sk->sk_reuseport;
if (reuseport) {
- hash = udp6_ehashfn(net, daddr, hnum,
- saddr, sport);
+ hash = udp6_ehashfn(net, params);
result = reuseport_select_sock(sk, hash, skb,
sizeof(struct udphdr));
if (result)
@@ -292,23 +280,34 @@ static struct sock *__udp6_lib_lookup_skb(struct sk_buff *skb,
{
const struct ipv6hdr *iph = ipv6_hdr(skb);
struct sock *sk;
+ struct sk_lookup params = {
+ .saddr.ipv6 = &iph->saddr,
+ .daddr.ipv6 = &iph->daddr,
+ .sport = sport,
+ .dport = dport,
+ .dif = inet6_iif(skb),
+ };
sk = skb_steal_sock(skb);
if (unlikely(sk))
return sk;
- return __udp6_lib_lookup(dev_net(skb->dev), &iph->saddr, sport,
- &iph->daddr, dport, inet6_iif(skb),
- udptable, skb);
+
+ return __udp6_lib_lookup(dev_net(skb->dev), ¶ms, udptable, skb);
}
struct sock *udp6_lib_lookup_skb(struct sk_buff *skb,
__be16 sport, __be16 dport)
{
const struct ipv6hdr *iph = ipv6_hdr(skb);
-
- return __udp6_lib_lookup(dev_net(skb->dev), &iph->saddr, sport,
- &iph->daddr, dport, inet6_iif(skb),
- &udp_table, skb);
+ struct sk_lookup params = {
+ .saddr.ipv6 = &iph->saddr,
+ .daddr.ipv6 = &iph->daddr,
+ .sport = sport,
+ .dport = dport,
+ .dif = inet6_iif(skb),
+ };
+
+ return __udp6_lib_lookup(dev_net(skb->dev), ¶ms, &udp_table, skb);
}
EXPORT_SYMBOL_GPL(udp6_lib_lookup_skb);
@@ -318,13 +317,11 @@ EXPORT_SYMBOL_GPL(udp6_lib_lookup_skb);
#if IS_ENABLED(CONFIG_NETFILTER_XT_MATCH_SOCKET) || \
IS_ENABLED(CONFIG_NETFILTER_XT_TARGET_TPROXY) || \
IS_ENABLED(CONFIG_NF_SOCKET_IPV6)
-struct sock *udp6_lib_lookup(struct net *net, const struct in6_addr *saddr, __be16 sport,
- const struct in6_addr *daddr, __be16 dport, int dif)
+struct sock *udp6_lib_lookup(struct net *net, struct sk_lookup *params)
{
struct sock *sk;
- sk = __udp6_lib_lookup(net, saddr, sport, daddr, dport,
- dif, &udp_table, NULL);
+ sk = __udp6_lib_lookup(net, params, &udp_table, NULL);
if (sk && !refcount_inc_not_zero(&sk->sk_refcnt))
sk = NULL;
return sk;
@@ -487,16 +484,20 @@ void __udp6_lib_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
{
struct ipv6_pinfo *np;
const struct ipv6hdr *hdr = (const struct ipv6hdr *)skb->data;
- const struct in6_addr *saddr = &hdr->saddr;
- const struct in6_addr *daddr = &hdr->daddr;
struct udphdr *uh = (struct udphdr *)(skb->data+offset);
+ struct sk_lookup params = {
+ .saddr.ipv6 = &hdr->daddr,
+ .daddr.ipv6 = &hdr->saddr,
+ .sport = uh->dest,
+ .dport = uh->source,
+ .dif = inet6_iif(skb),
+ };
struct sock *sk;
int harderr;
int err;
struct net *net = dev_net(skb->dev);
- sk = __udp6_lib_lookup(net, daddr, uh->dest, saddr, uh->source,
- inet6_iif(skb), udptable, skb);
+ sk = __udp6_lib_lookup(net, ¶ms, udptable, skb);
if (!sk) {
__ICMP6_INC_STATS(net, __in6_dev_get(skb->dev),
ICMP6_MIB_INERRORS);
@@ -658,21 +659,21 @@ static int udpv6_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
}
static bool __udp_v6_is_mcast_sock(struct net *net, struct sock *sk,
- __be16 loc_port, const struct in6_addr *loc_addr,
- __be16 rmt_port, const struct in6_addr *rmt_addr,
- int dif, unsigned short hnum)
+ struct sk_lookup *params)
{
+ const struct in6_addr *loc_addr = params->daddr.ipv6;
+ const struct in6_addr *rmt_addr = params->saddr.ipv6;
struct inet_sock *inet = inet_sk(sk);
if (!net_eq(sock_net(sk), net))
return false;
- if (udp_sk(sk)->udp_port_hash != hnum ||
+ if (udp_sk(sk)->udp_port_hash != params->hnum ||
sk->sk_family != PF_INET6 ||
- (inet->inet_dport && inet->inet_dport != rmt_port) ||
+ (inet->inet_dport && inet->inet_dport != params->sport) ||
(!ipv6_addr_any(&sk->sk_v6_daddr) &&
!ipv6_addr_equal(&sk->sk_v6_daddr, rmt_addr)) ||
- (sk->sk_bound_dev_if && sk->sk_bound_dev_if != dif) ||
+ (sk->sk_bound_dev_if && sk->sk_bound_dev_if != params->dif) ||
(!ipv6_addr_any(&sk->sk_v6_rcv_saddr) &&
!ipv6_addr_equal(&sk->sk_v6_rcv_saddr, loc_addr)))
return false;
@@ -705,9 +706,16 @@ static int __udp6_lib_mcast_deliver(struct net *net, struct sk_buff *skb,
struct udp_hslot *hslot = udp_hashslot(udptable, net, hnum);
unsigned int offset = offsetof(typeof(*sk), sk_node);
unsigned int hash2 = 0, hash2_any = 0, use_hash2 = (hslot->count > 10);
- int dif = inet6_iif(skb);
struct hlist_node *node;
struct sk_buff *nskb;
+ struct sk_lookup params = {
+ .saddr.ipv6 = saddr,
+ .daddr.ipv6 = daddr,
+ .sport = uh->source,
+ .dport = uh->dest,
+ .hnum = hnum,
+ .dif = inet6_iif(skb),
+ };
if (use_hash2) {
hash2_any = udp6_portaddr_hash(net, &in6addr_any, hnum) &
@@ -719,8 +727,7 @@ static int __udp6_lib_mcast_deliver(struct net *net, struct sk_buff *skb,
}
sk_for_each_entry_offset_rcu(sk, node, &hslot->head, offset) {
- if (!__udp_v6_is_mcast_sock(net, sk, uh->dest, daddr,
- uh->source, saddr, dif, hnum))
+ if (!__udp_v6_is_mcast_sock(net, sk, ¶ms))
continue;
/* If zero checksum and no_check is not on for
* the socket then skip it.
@@ -873,21 +880,22 @@ int __udp6_lib_rcv(struct sk_buff *skb, struct udp_table *udptable,
static struct sock *__udp6_lib_demux_lookup(struct net *net,
- __be16 loc_port, const struct in6_addr *loc_addr,
- __be16 rmt_port, const struct in6_addr *rmt_addr,
- int dif)
+ const struct sk_lookup *params)
{
- unsigned short hnum = ntohs(loc_port);
- unsigned int hash2 = udp6_portaddr_hash(net, loc_addr, hnum);
+ unsigned short hnum = params->hnum;
+ unsigned int hash2 = udp6_portaddr_hash(net, params->daddr.ipv6, hnum);
unsigned int slot2 = hash2 & udp_table.mask;
struct udp_hslot *hslot2 = &udp_table.hash2[slot2];
- const __portpair ports = INET_COMBINED_PORTS(rmt_port, hnum);
+ const __portpair ports = INET_COMBINED_PORTS(params->sport, hnum);
struct sock *sk;
udp_portaddr_for_each_entry_rcu(sk, &hslot2->head) {
if (sk->sk_state == TCP_ESTABLISHED &&
- INET6_MATCH(sk, net, rmt_addr, loc_addr, ports, dif))
+ INET6_MATCH(sk, net, params->saddr.ipv6,
+ params->daddr.ipv6, ports,
+ params->dif))
return sk;
+
/* Only check first socket in chain */
break;
}
@@ -900,7 +908,12 @@ static void udp_v6_early_demux(struct sk_buff *skb)
const struct udphdr *uh;
struct sock *sk;
struct dst_entry *dst;
- int dif = skb->dev->ifindex;
+ struct sk_lookup params = {
+ .dif = skb->dev->ifindex,
+ };
+
+ if (skb->pkt_type != PACKET_HOST)
+ return;
if (!pskb_may_pull(skb, skb_transport_offset(skb) +
sizeof(struct udphdr)))
@@ -908,13 +921,13 @@ static void udp_v6_early_demux(struct sk_buff *skb)
uh = udp_hdr(skb);
- if (skb->pkt_type == PACKET_HOST)
- sk = __udp6_lib_demux_lookup(net, uh->dest,
- &ipv6_hdr(skb)->daddr,
- uh->source, &ipv6_hdr(skb)->saddr,
- dif);
- else
- return;
+ params.daddr.ipv6 = &ipv6_hdr(skb)->daddr;
+ params.dport = uh->dest;
+ params.hnum = ntohs(uh->dest);
+ params.saddr.ipv6 = &ipv6_hdr(skb)->saddr;
+ params.sport = uh->source;
+
+ sk = __udp6_lib_demux_lookup(net, ¶ms);
if (!sk || !refcount_inc_not_zero(&sk->sk_refcnt))
return;
diff --git a/net/netfilter/xt_TPROXY.c b/net/netfilter/xt_TPROXY.c
index 5cce7eb7dea2..25843f741c0b 100644
--- a/net/netfilter/xt_TPROXY.c
+++ b/net/netfilter/xt_TPROXY.c
@@ -188,6 +188,13 @@ nf_tproxy_get_sock_v6(struct net *net, struct sk_buff *skb, int thoff, void *hp,
const struct net_device *in,
const enum nf_tproxy_lookup_t lookup_type)
{
+ struct sk_lookup params = {
+ .saddr.ipv6 = saddr,
+ .daddr.ipv6 = daddr,
+ .sport = sport,
+ .dport = dport,
+ .dif = in->ifindex,
+ };
struct sock *sk;
struct tcphdr *tcph;
@@ -220,8 +227,7 @@ nf_tproxy_get_sock_v6(struct net *net, struct sk_buff *skb, int thoff, void *hp,
}
break;
case IPPROTO_UDP:
- sk = udp6_lib_lookup(net, saddr, sport, daddr, dport,
- in->ifindex);
+ sk = udp6_lib_lookup(net, ¶ms);
if (sk) {
int connected = (sk->sk_state == TCP_ESTABLISHED);
int wildcard = ipv6_addr_any(&sk->sk_v6_rcv_saddr);
--
2.1.4
^ permalink raw reply related [flat|nested] 11+ messages in thread* [RFC PATCH 06/10] net: ipv6: Convert inet socket lookups to new struct
2017-07-25 15:38 [RFC PATCH 00/10] net: l3mdev: Support for sockets bound to enslaved device David Ahern
` (4 preceding siblings ...)
2017-07-25 15:38 ` [RFC PATCH 05/10] net: ipv6: Convert udp socket lookups to new struct David Ahern
@ 2017-07-25 15:38 ` David Ahern
2017-07-25 15:38 ` [RFC PATCH 07/10] net: ipv6: Convert raw sockets to sk_lookup David Ahern
` (3 subsequent siblings)
9 siblings, 0 replies; 11+ messages in thread
From: David Ahern @ 2017-07-25 15:38 UTC (permalink / raw)
To: netdev; +Cc: David Ahern
Convert the various inet6_lookup functions to use the new sk_lookup
struct.
Signed-off-by: David Ahern <dsahern@gmail.com>
---
include/net/inet6_hashtables.h | 39 +++++++-------------
net/dccp/ipv6.c | 22 ++++++++----
net/ipv4/inet_diag.c | 19 ++++++----
net/ipv4/udp_diag.c | 2 ++
net/ipv6/inet6_hashtables.c | 72 +++++++++++++++++++------------------
net/ipv6/netfilter/nf_socket_ipv6.c | 5 ++-
net/ipv6/tcp_ipv6.c | 60 +++++++++++++++++++++----------
net/netfilter/xt_TPROXY.c | 8 ++---
8 files changed, 125 insertions(+), 102 deletions(-)
diff --git a/include/net/inet6_hashtables.h b/include/net/inet6_hashtables.h
index b87becacd9d3..15db41272ff2 100644
--- a/include/net/inet6_hashtables.h
+++ b/include/net/inet6_hashtables.h
@@ -46,63 +46,50 @@ static inline unsigned int __inet6_ehashfn(const u32 lhash,
*/
struct sock *__inet6_lookup_established(struct net *net,
struct inet_hashinfo *hashinfo,
- const struct in6_addr *saddr,
- const __be16 sport,
- const struct in6_addr *daddr,
- const u16 hnum, const int dif);
+ const struct sk_lookup *params);
struct sock *inet6_lookup_listener(struct net *net,
struct inet_hashinfo *hashinfo,
struct sk_buff *skb, int doff,
- const struct in6_addr *saddr,
- const __be16 sport,
- const struct in6_addr *daddr,
- const unsigned short hnum, const int dif);
+ struct sk_lookup *params);
static inline struct sock *__inet6_lookup(struct net *net,
struct inet_hashinfo *hashinfo,
struct sk_buff *skb, int doff,
- const struct in6_addr *saddr,
- const __be16 sport,
- const struct in6_addr *daddr,
- const u16 hnum,
- const int dif,
+ struct sk_lookup *params,
bool *refcounted)
{
- struct sock *sk = __inet6_lookup_established(net, hashinfo, saddr,
- sport, daddr, hnum, dif);
+ struct sock *sk = __inet6_lookup_established(net, hashinfo, params);
+
*refcounted = true;
if (sk)
return sk;
*refcounted = false;
- return inet6_lookup_listener(net, hashinfo, skb, doff, saddr, sport,
- daddr, hnum, dif);
+ return inet6_lookup_listener(net, hashinfo, skb, doff, params);
}
static inline struct sock *__inet6_lookup_skb(struct inet_hashinfo *hashinfo,
struct sk_buff *skb, int doff,
- const __be16 sport,
- const __be16 dport,
- int iif,
+ struct sk_lookup *params,
bool *refcounted)
{
struct sock *sk = skb_steal_sock(skb);
+ params->saddr.ipv6 = &ipv6_hdr(skb)->saddr,
+ params->daddr.ipv6 = &ipv6_hdr(skb)->daddr,
+ params->hnum = ntohs(params->dport),
+
*refcounted = true;
if (sk)
return sk;
return __inet6_lookup(dev_net(skb_dst(skb)->dev), hashinfo, skb,
- doff, &ipv6_hdr(skb)->saddr, sport,
- &ipv6_hdr(skb)->daddr, ntohs(dport),
- iif, refcounted);
+ doff, params, refcounted);
}
struct sock *inet6_lookup(struct net *net, struct inet_hashinfo *hashinfo,
struct sk_buff *skb, int doff,
- const struct in6_addr *saddr, const __be16 sport,
- const struct in6_addr *daddr, const __be16 dport,
- const int dif);
+ struct sk_lookup *params);
int inet6_hash(struct sock *sk);
#endif /* IS_ENABLED(CONFIG_IPV6) */
diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c
index c376af5bfdfb..e92f10a832dd 100644
--- a/net/dccp/ipv6.c
+++ b/net/dccp/ipv6.c
@@ -70,6 +70,11 @@ static void dccp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
u8 type, u8 code, int offset, __be32 info)
{
const struct ipv6hdr *hdr = (const struct ipv6hdr *)skb->data;
+ struct sk_lookup params = {
+ .saddr.ipv6 = &hdr->daddr,
+ .daddr.ipv6 = &hdr->saddr,
+ .dif = inet6_iif(skb),
+ };
const struct dccp_hdr *dh;
struct dccp_sock *dp;
struct ipv6_pinfo *np;
@@ -86,11 +91,10 @@ static void dccp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
BUILD_BUG_ON(offsetofend(struct dccp_hdr, dccph_dport) > 8);
dh = (struct dccp_hdr *)(skb->data + offset);
- sk = __inet6_lookup_established(net, &dccp_hashinfo,
- &hdr->daddr, dh->dccph_dport,
- &hdr->saddr, ntohs(dh->dccph_sport),
- inet6_iif(skb));
-
+ params.sport = dh->dccph_dport;
+ params.dport = dh->dccph_sport;
+ params.hnum = ntohs(dh->dccph_sport);
+ sk = __inet6_lookup_established(net, &dccp_hashinfo, ¶ms);
if (!sk) {
__ICMP6_INC_STATS(net, __in6_dev_get(skb->dev),
ICMP6_MIB_INERRORS);
@@ -656,6 +660,9 @@ static int dccp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)
static int dccp_v6_rcv(struct sk_buff *skb)
{
+ struct sk_lookup params = {
+ .dif = inet6_iif(skb),
+ };
const struct dccp_hdr *dh;
bool refcounted;
struct sock *sk;
@@ -683,10 +690,11 @@ static int dccp_v6_rcv(struct sk_buff *skb)
else
DCCP_SKB_CB(skb)->dccpd_ack_seq = dccp_hdr_ack_seq(skb);
+ params.sport = dh->dccph_sport;
+ params.dport = dh->dccph_dport;
lookup:
sk = __inet6_lookup_skb(&dccp_hashinfo, skb, __dccp_hdr_len(dh),
- dh->dccph_sport, dh->dccph_dport,
- inet6_iif(skb), &refcounted);
+ ¶ms, &refcounted);
if (!sk) {
dccp_pr_debug("failed to look up flow ID in table and "
"get corresponding socket\n");
diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c
index 6c3bc4e408d0..fa0d8531ce36 100644
--- a/net/ipv4/inet_diag.c
+++ b/net/ipv4/inet_diag.c
@@ -422,13 +422,18 @@ struct sock *inet_diag_find_one_icsk(struct net *net,
};
sk = inet_lookup(net, hashinfo, NULL, 0, ¶ms);
- } else
- sk = inet6_lookup(net, hashinfo, NULL, 0,
- (struct in6_addr *)req->id.idiag_dst,
- req->id.idiag_dport,
- (struct in6_addr *)req->id.idiag_src,
- req->id.idiag_sport,
- req->id.idiag_if);
+ } else {
+ struct sk_lookup params = {
+ .saddr.ipv6 = (struct in6_addr *)req->id.idiag_dst,
+ .daddr.ipv6 = (struct in6_addr *)req->id.idiag_src,
+ .sport = req->id.idiag_dport,
+ .dport = req->id.idiag_sport,
+ .hnum = ntohs(req->id.idiag_sport),
+ .dif = req->id.idiag_if,
+ };
+
+ sk = inet6_lookup(net, hashinfo, NULL, 0, ¶ms);
+ }
}
#endif
else {
diff --git a/net/ipv4/udp_diag.c b/net/ipv4/udp_diag.c
index 8c1221f5f2dd..a11be7b8b55d 100644
--- a/net/ipv4/udp_diag.c
+++ b/net/ipv4/udp_diag.c
@@ -60,6 +60,7 @@ static int udp_dump_one(struct udp_table *tbl, struct sk_buff *in_skb,
.daddr.ipv6 = (struct in6_addr *)req->id.idiag_dst,
.sport = req->id.idiag_sport,
.dport = req->id.idiag_dport,
+ .hnum = ntohs(req->id.idiag_dport),
.dif = req->id.idiag_if,
};
@@ -221,6 +222,7 @@ static int __udp_diag_destroy(struct sk_buff *in_skb,
.daddr.ipv6 = (struct in6_addr *)req->id.idiag_src,
.sport = req->id.idiag_dport,
.dport = req->id.idiag_sport,
+ .hnum = ntohs(req->id.idiag_sport),
.dif = req->id.idiag_if,
};
diff --git a/net/ipv6/inet6_hashtables.c b/net/ipv6/inet6_hashtables.c
index b13b8f93079d..878c03094f2e 100644
--- a/net/ipv6/inet6_hashtables.c
+++ b/net/ipv6/inet6_hashtables.c
@@ -52,33 +52,35 @@ u32 inet6_ehashfn(const struct net *net,
*/
struct sock *__inet6_lookup_established(struct net *net,
struct inet_hashinfo *hashinfo,
- const struct in6_addr *saddr,
- const __be16 sport,
- const struct in6_addr *daddr,
- const u16 hnum,
- const int dif)
+ const struct sk_lookup *params)
{
+ const __portpair ports = INET_COMBINED_PORTS(params->sport,
+ params->hnum);
+ const struct in6_addr *saddr = params->saddr.ipv6;
+ const struct in6_addr *daddr = params->daddr.ipv6;
struct sock *sk;
const struct hlist_nulls_node *node;
- const __portpair ports = INET_COMBINED_PORTS(sport, hnum);
+
/* Optimize here for direct hit, only listening connections can
* have wildcards anyways.
*/
- unsigned int hash = inet6_ehashfn(net, daddr, hnum, saddr, sport);
+ unsigned int hash = inet6_ehashfn(net, daddr, params->hnum,
+ saddr, params->sport);
unsigned int slot = hash & hashinfo->ehash_mask;
struct inet_ehash_bucket *head = &hashinfo->ehash[slot];
-
begin:
sk_nulls_for_each_rcu(sk, node, &head->chain) {
if (sk->sk_hash != hash)
continue;
- if (!INET6_MATCH(sk, net, saddr, daddr, ports, dif))
+ if (!INET6_MATCH(sk, net, saddr, daddr, ports,
+ params->dif))
continue;
if (unlikely(!refcount_inc_not_zero(&sk->sk_refcnt)))
goto out;
- if (unlikely(!INET6_MATCH(sk, net, saddr, daddr, ports, dif))) {
+ if (unlikely(!INET6_MATCH(sk, net, saddr, daddr, ports,
+ params->dif))) {
sock_gen_put(sk);
goto begin;
}
@@ -94,26 +96,27 @@ struct sock *__inet6_lookup_established(struct net *net,
EXPORT_SYMBOL(__inet6_lookup_established);
static inline int compute_score(struct sock *sk, struct net *net,
- const unsigned short hnum,
- const struct in6_addr *daddr,
- const int dif, bool exact_dif)
+ const struct sk_lookup *params)
{
int score = -1;
- if (net_eq(sock_net(sk), net) && inet_sk(sk)->inet_num == hnum &&
+ if (net_eq(sock_net(sk), net) &&
+ inet_sk(sk)->inet_num == params->hnum &&
sk->sk_family == PF_INET6) {
+ int rc;
score = 1;
if (!ipv6_addr_any(&sk->sk_v6_rcv_saddr)) {
- if (!ipv6_addr_equal(&sk->sk_v6_rcv_saddr, daddr))
+ if (!ipv6_addr_equal(&sk->sk_v6_rcv_saddr,
+ params->daddr.ipv6))
return -1;
score++;
}
- if (sk->sk_bound_dev_if || exact_dif) {
- if (sk->sk_bound_dev_if != dif)
- return -1;
+ rc = sk_lookup_device_cmp(sk, params);
+ if (rc < 0)
+ return -1;
+ if (rc > 0)
score++;
- }
if (sk->sk_incoming_cpu == raw_smp_processor_id())
score++;
}
@@ -122,26 +125,27 @@ static inline int compute_score(struct sock *sk, struct net *net,
/* called with rcu_read_lock() */
struct sock *inet6_lookup_listener(struct net *net,
- struct inet_hashinfo *hashinfo,
- struct sk_buff *skb, int doff,
- const struct in6_addr *saddr,
- const __be16 sport, const struct in6_addr *daddr,
- const unsigned short hnum, const int dif)
+ struct inet_hashinfo *hashinfo,
+ struct sk_buff *skb, int doff,
+ struct sk_lookup *params)
{
- unsigned int hash = inet_lhashfn(net, hnum);
+ unsigned int hash = inet_lhashfn(net, params->hnum);
struct inet_listen_hashbucket *ilb = &hashinfo->listening_hash[hash];
int score, hiscore = 0, matches = 0, reuseport = 0;
- bool exact_dif = inet6_exact_dif_match(net, skb);
struct sock *sk, *result = NULL;
u32 phash = 0;
+ params->exact_dif = inet6_exact_dif_match(net, skb);
+
sk_for_each(sk, &ilb->head) {
- score = compute_score(sk, net, hnum, daddr, dif, exact_dif);
+ score = compute_score(sk, net, params);
if (score > hiscore) {
reuseport = sk->sk_reuseport;
if (reuseport) {
- phash = inet6_ehashfn(net, daddr, hnum,
- saddr, sport);
+ phash = inet6_ehashfn(net, params->daddr.ipv6,
+ params->hnum,
+ params->saddr.ipv6,
+ params->sport);
result = reuseport_select_sock(sk, phash,
skb, doff);
if (result)
@@ -163,15 +167,12 @@ EXPORT_SYMBOL_GPL(inet6_lookup_listener);
struct sock *inet6_lookup(struct net *net, struct inet_hashinfo *hashinfo,
struct sk_buff *skb, int doff,
- const struct in6_addr *saddr, const __be16 sport,
- const struct in6_addr *daddr, const __be16 dport,
- const int dif)
+ struct sk_lookup *params)
{
struct sock *sk;
bool refcounted;
- sk = __inet6_lookup(net, hashinfo, skb, doff, saddr, sport, daddr,
- ntohs(dport), dif, &refcounted);
+ sk = __inet6_lookup(net, hashinfo, skb, doff, params, &refcounted);
if (sk && !refcounted && !refcount_inc_not_zero(&sk->sk_refcnt))
sk = NULL;
return sk;
@@ -203,7 +204,8 @@ static int __inet6_check_established(struct inet_timewait_death_row *death_row,
if (sk2->sk_hash != hash)
continue;
- if (likely(INET6_MATCH(sk2, net, saddr, daddr, ports, dif))) {
+ if (likely(INET6_MATCH(sk2, net, saddr, daddr, ports,
+ dif))) {
if (sk2->sk_state == TCP_TIME_WAIT) {
tw = inet_twsk(sk2);
if (twsk_unique(sk, sk2, twp))
diff --git a/net/ipv6/netfilter/nf_socket_ipv6.c b/net/ipv6/netfilter/nf_socket_ipv6.c
index 46e45b81094f..2918c9062e1a 100644
--- a/net/ipv6/netfilter/nf_socket_ipv6.c
+++ b/net/ipv6/netfilter/nf_socket_ipv6.c
@@ -91,14 +91,13 @@ nf_socket_get_sock_v6(struct net *net, struct sk_buff *skb, int doff,
.daddr.ipv6 = daddr,
.sport = sport,
.dport = dport,
+ .hnum = ntohs(dport),
.dif = in->ifindex,
};
switch (protocol) {
case IPPROTO_TCP:
- return inet6_lookup(net, &tcp_hashinfo, skb, doff,
- saddr, sport, daddr, dport,
- in->ifindex);
+ return inet6_lookup(net, &tcp_hashinfo, skb, doff, ¶ms);
case IPPROTO_UDP:
return udp6_lib_lookup(net, ¶ms);
}
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 2521690d62d6..154886daba7b 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -45,6 +45,7 @@
#include <linux/random.h>
#include <net/tcp.h>
+#include <net/inet_hashtables.h>
#include <net/ndisc.h>
#include <net/inet6_hashtables.h>
#include <net/inet6_connection_sock.h>
@@ -338,6 +339,13 @@ static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
{
const struct ipv6hdr *hdr = (const struct ipv6hdr *)skb->data;
const struct tcphdr *th = (struct tcphdr *)(skb->data+offset);
+ struct sk_lookup params = {
+ .saddr.ipv6 = &hdr->daddr,
+ .daddr.ipv6 = &hdr->saddr,
+ .sport = th->dest,
+ .hnum = ntohs(th->source),
+ .dif = skb->dev->ifindex,
+ };
struct net *net = dev_net(skb->dev);
struct request_sock *fastopen;
struct ipv6_pinfo *np;
@@ -347,11 +355,7 @@ static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
bool fatal;
int err;
- sk = __inet6_lookup_established(net, &tcp_hashinfo,
- &hdr->daddr, th->dest,
- &hdr->saddr, ntohs(th->source),
- skb->dev->ifindex);
-
+ sk = __inet6_lookup_established(net, &tcp_hashinfo, ¶ms);
if (!sk) {
__ICMP6_INC_STATS(net, __in6_dev_get(skb->dev),
ICMP6_MIB_INERRORS);
@@ -907,6 +911,14 @@ static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb)
if (sk && sk_fullsock(sk)) {
key = tcp_v6_md5_do_lookup(sk, &ipv6h->saddr);
} else if (hash_location) {
+ struct sk_lookup params = {
+ .saddr.ipv6 = &ipv6h->saddr,
+ .daddr.ipv6 = &ipv6h->daddr,
+ .sport = th->source,
+ .hnum = ntohs(th->source),
+ .dif = tcp_v6_iif(skb),
+ };
+
/*
* active side is lost. Try to find listening socket through
* source port, and then find md5 key through listening socket.
@@ -915,10 +927,7 @@ static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb)
* no RST generated if md5 hash doesn't match.
*/
sk1 = inet6_lookup_listener(dev_net(skb_dst(skb)->dev),
- &tcp_hashinfo, NULL, 0,
- &ipv6h->saddr,
- th->source, &ipv6h->daddr,
- ntohs(th->source), tcp_v6_iif(skb));
+ &tcp_hashinfo, NULL, 0, ¶ms);
if (!sk1)
goto out;
@@ -1403,6 +1412,9 @@ static int tcp_v6_rcv(struct sk_buff *skb)
struct sock *sk;
int ret;
struct net *net = dev_net(skb->dev);
+ struct sk_lookup params = {
+ .dif = inet6_iif(skb),
+ };
if (skb->pkt_type != PACKET_HOST)
goto discard_it;
@@ -1428,10 +1440,11 @@ static int tcp_v6_rcv(struct sk_buff *skb)
th = (const struct tcphdr *)skb->data;
hdr = ipv6_hdr(skb);
+ params.sport = th->source;
+ params.dport = th->dest;
lookup:
sk = __inet6_lookup_skb(&tcp_hashinfo, skb, __tcp_hdrlen(th),
- th->source, th->dest, inet6_iif(skb),
- &refcounted);
+ ¶ms, &refcounted);
if (!sk)
goto no_tcp_socket;
@@ -1558,13 +1571,17 @@ static int tcp_v6_rcv(struct sk_buff *skb)
switch (tcp_timewait_state_process(inet_twsk(sk), skb, th)) {
case TCP_TW_SYN:
{
+ struct sk_lookup params = {
+ .saddr.ipv6 = &ipv6_hdr(skb)->saddr,
+ .daddr.ipv6 = &ipv6_hdr(skb)->daddr,
+ .sport = th->source,
+ .hnum = ntohs(th->dest),
+ .dif = tcp_v6_iif(skb),
+ };
struct sock *sk2;
sk2 = inet6_lookup_listener(dev_net(skb->dev), &tcp_hashinfo,
- skb, __tcp_hdrlen(th),
- &ipv6_hdr(skb)->saddr, th->source,
- &ipv6_hdr(skb)->daddr,
- ntohs(th->dest), tcp_v6_iif(skb));
+ skb, __tcp_hdrlen(th), ¶ms);
if (sk2) {
struct inet_timewait_sock *tw = inet_twsk(sk);
inet_twsk_deschedule_put(tw);
@@ -1591,6 +1608,10 @@ static int tcp_v6_rcv(struct sk_buff *skb)
static void tcp_v6_early_demux(struct sk_buff *skb)
{
+ /* Note : We use inet6_iif() here, not tcp_v6_iif() */
+ struct sk_lookup params = {
+ .dif = inet6_iif(skb),
+ };
const struct ipv6hdr *hdr;
const struct tcphdr *th;
struct sock *sk;
@@ -1607,11 +1628,12 @@ static void tcp_v6_early_demux(struct sk_buff *skb)
if (th->doff < sizeof(struct tcphdr) / 4)
return;
- /* Note : We use inet6_iif() here, not tcp_v6_iif() */
+ params.saddr.ipv6 = &hdr->saddr,
+ params.daddr.ipv6 = &hdr->daddr,
+ params.sport = th->source,
+ params.hnum = ntohs(th->dest),
sk = __inet6_lookup_established(dev_net(skb->dev), &tcp_hashinfo,
- &hdr->saddr, th->source,
- &hdr->daddr, ntohs(th->dest),
- inet6_iif(skb));
+ ¶ms);
if (sk) {
skb->sk = sk;
skb->destructor = sock_edemux;
diff --git a/net/netfilter/xt_TPROXY.c b/net/netfilter/xt_TPROXY.c
index 25843f741c0b..c031385369c4 100644
--- a/net/netfilter/xt_TPROXY.c
+++ b/net/netfilter/xt_TPROXY.c
@@ -193,6 +193,7 @@ nf_tproxy_get_sock_v6(struct net *net, struct sk_buff *skb, int thoff, void *hp,
.daddr.ipv6 = daddr,
.sport = sport,
.dport = dport,
+ .hnum = ntohs(dport),
.dif = in->ifindex,
};
struct sock *sk;
@@ -205,9 +206,7 @@ nf_tproxy_get_sock_v6(struct net *net, struct sk_buff *skb, int thoff, void *hp,
tcph = hp;
sk = inet6_lookup_listener(net, &tcp_hashinfo, skb,
thoff + __tcp_hdrlen(tcph),
- saddr, sport,
- daddr, ntohs(dport),
- in->ifindex);
+ ¶ms);
if (sk && !refcount_inc_not_zero(&sk->sk_refcnt))
sk = NULL;
@@ -219,8 +218,7 @@ nf_tproxy_get_sock_v6(struct net *net, struct sk_buff *skb, int thoff, void *hp,
break;
case NFT_LOOKUP_ESTABLISHED:
sk = __inet6_lookup_established(net, &tcp_hashinfo,
- saddr, sport, daddr, ntohs(dport),
- in->ifindex);
+ ¶ms);
break;
default:
BUG();
--
2.1.4
^ permalink raw reply related [flat|nested] 11+ messages in thread* [RFC PATCH 07/10] net: ipv6: Convert raw sockets to sk_lookup
2017-07-25 15:38 [RFC PATCH 00/10] net: l3mdev: Support for sockets bound to enslaved device David Ahern
` (5 preceding siblings ...)
2017-07-25 15:38 ` [RFC PATCH 06/10] net: ipv6: Convert inet " David Ahern
@ 2017-07-25 15:38 ` David Ahern
2017-07-25 15:38 ` [RFC PATCH 08/10] net: Add sdif " David Ahern
` (2 subsequent siblings)
9 siblings, 0 replies; 11+ messages in thread
From: David Ahern @ 2017-07-25 15:38 UTC (permalink / raw)
To: netdev; +Cc: David Ahern
Convert __raw_v6_lookup to use the new sk_lookup struct
Signed-off-by: David Ahern <dsahern@gmail.com>
---
include/net/rawv6.h | 3 +--
net/ipv4/raw_diag.c | 15 ++++++++++-----
net/ipv6/raw.c | 41 +++++++++++++++++++++++------------------
3 files changed, 34 insertions(+), 25 deletions(-)
diff --git a/include/net/rawv6.h b/include/net/rawv6.h
index cbe4e9de1894..406268324d26 100644
--- a/include/net/rawv6.h
+++ b/include/net/rawv6.h
@@ -5,8 +5,7 @@
extern struct raw_hashinfo raw_v6_hashinfo;
struct sock *__raw_v6_lookup(struct net *net, struct sock *sk,
- unsigned short num, const struct in6_addr *loc_addr,
- const struct in6_addr *rmt_addr, int dif);
+ const struct sk_lookup *params);
int raw_abort(struct sock *sk, int err);
diff --git a/net/ipv4/raw_diag.c b/net/ipv4/raw_diag.c
index a708de070cc6..2314993f6294 100644
--- a/net/ipv4/raw_diag.c
+++ b/net/ipv4/raw_diag.c
@@ -53,11 +53,16 @@ static struct sock *raw_lookup(struct net *net, struct sock *from,
sk = __raw_v4_lookup(net, from, ¶ms);
}
#if IS_ENABLED(CONFIG_IPV6)
- else
- sk = __raw_v6_lookup(net, from, r->sdiag_raw_protocol,
- (const struct in6_addr *)r->id.idiag_src,
- (const struct in6_addr *)r->id.idiag_dst,
- r->id.idiag_if);
+ else {
+ struct sk_lookup params = {
+ .saddr.ipv6 = (const struct in6_addr *)r->id.idiag_dst,
+ .daddr.ipv6 = (const struct in6_addr *)r->id.idiag_src,
+ .hnum = r->sdiag_raw_protocol,
+ .dif = r->id.idiag_if,
+ };
+
+ sk = __raw_v6_lookup(net, from, ¶ms);
+ }
#endif
return sk;
}
diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c
index 60be012fe708..51e651f18ffb 100644
--- a/net/ipv6/raw.c
+++ b/net/ipv6/raw.c
@@ -71,14 +71,14 @@ struct raw_hashinfo raw_v6_hashinfo = {
EXPORT_SYMBOL_GPL(raw_v6_hashinfo);
struct sock *__raw_v6_lookup(struct net *net, struct sock *sk,
- unsigned short num, const struct in6_addr *loc_addr,
- const struct in6_addr *rmt_addr, int dif)
+ const struct sk_lookup *params)
{
+ const struct in6_addr *loc_addr = params->daddr.ipv6;
+ const struct in6_addr *rmt_addr = params->saddr.ipv6;
bool is_multicast = ipv6_addr_is_multicast(loc_addr);
sk_for_each_from(sk)
- if (inet_sk(sk)->inet_num == num) {
-
+ if (inet_sk(sk)->inet_num == params->hnum) {
if (!net_eq(sock_net(sk), net))
continue;
@@ -86,7 +86,8 @@ struct sock *__raw_v6_lookup(struct net *net, struct sock *sk,
!ipv6_addr_equal(&sk->sk_v6_daddr, rmt_addr))
continue;
- if (sk->sk_bound_dev_if && sk->sk_bound_dev_if != dif)
+ if (sk->sk_bound_dev_if &&
+ sk->sk_bound_dev_if != params->dif)
continue;
if (!ipv6_addr_any(&sk->sk_v6_rcv_saddr)) {
@@ -159,15 +160,17 @@ EXPORT_SYMBOL(rawv6_mh_filter_unregister);
*/
static bool ipv6_raw_deliver(struct sk_buff *skb, int nexthdr)
{
- const struct in6_addr *saddr;
- const struct in6_addr *daddr;
+ struct sk_lookup params = {
+ .saddr.ipv6 = &ipv6_hdr(skb)->saddr,
+ .daddr.ipv6 = &ipv6_hdr(skb)->daddr,
+ .hnum = nexthdr,
+ .dif = inet6_iif(skb),
+ };
struct sock *sk;
bool delivered = false;
__u8 hash;
struct net *net;
- saddr = &ipv6_hdr(skb)->saddr;
- daddr = saddr + 1;
hash = nexthdr & (RAW_HTABLE_SIZE - 1);
@@ -178,7 +181,7 @@ static bool ipv6_raw_deliver(struct sk_buff *skb, int nexthdr)
goto out;
net = dev_net(skb->dev);
- sk = __raw_v6_lookup(net, sk, nexthdr, daddr, saddr, inet6_iif(skb));
+ sk = __raw_v6_lookup(net, sk, ¶ms);
while (sk) {
int filtered;
@@ -221,8 +224,7 @@ static bool ipv6_raw_deliver(struct sk_buff *skb, int nexthdr)
rawv6_rcv(sk, clone);
}
}
- sk = __raw_v6_lookup(net, sk_next(sk), nexthdr, daddr, saddr,
- inet6_iif(skb));
+ sk = __raw_v6_lookup(net, sk_next(sk), ¶ms);
}
out:
read_unlock(&raw_v6_hashinfo.lock);
@@ -362,23 +364,26 @@ void raw6_icmp_error(struct sk_buff *skb, int nexthdr,
u8 type, u8 code, int inner_offset, __be32 info)
{
struct sock *sk;
- int hash;
- const struct in6_addr *saddr, *daddr;
struct net *net;
+ int hash;
hash = nexthdr & (RAW_HTABLE_SIZE - 1);
read_lock(&raw_v6_hashinfo.lock);
sk = sk_head(&raw_v6_hashinfo.ht[hash]);
if (sk) {
+ struct sk_lookup params = {
+ .hnum = nexthdr,
+ .dif = inet6_iif(skb),
+ };
/* Note: ipv6_hdr(skb) != skb->data */
const struct ipv6hdr *ip6h = (const struct ipv6hdr *)skb->data;
- saddr = &ip6h->saddr;
- daddr = &ip6h->daddr;
+
+ params.daddr.ipv6 = &ip6h->saddr;
+ params.saddr.ipv6 = &ip6h->daddr;
net = dev_net(skb->dev);
- while ((sk = __raw_v6_lookup(net, sk, nexthdr, saddr, daddr,
- inet6_iif(skb)))) {
+ while ((sk = __raw_v6_lookup(net, sk, ¶ms))) {
rawv6_err(sk, skb, NULL, type, code,
inner_offset, info);
sk = sk_next(sk);
--
2.1.4
^ permalink raw reply related [flat|nested] 11+ messages in thread* [RFC PATCH 08/10] net: Add sdif to sk_lookup
2017-07-25 15:38 [RFC PATCH 00/10] net: l3mdev: Support for sockets bound to enslaved device David Ahern
` (6 preceding siblings ...)
2017-07-25 15:38 ` [RFC PATCH 07/10] net: ipv6: Convert raw sockets to sk_lookup David Ahern
@ 2017-07-25 15:38 ` David Ahern
2017-07-25 15:38 ` [RFC PATCH 09/10] net: ipv4: Support for sockets bound to enslaved device David Ahern
2017-07-25 15:38 ` [RFC PATCH 10/10] net: ipv6: " David Ahern
9 siblings, 0 replies; 11+ messages in thread
From: David Ahern @ 2017-07-25 15:38 UTC (permalink / raw)
To: netdev; +Cc: David Ahern
Add a second device index, sdif, to the socket lookup struct. sdif
will be the device index for devices enslaved to an l3mdev. It allows
the lookups to consider the enslaved device as well as the L3 master
device when searching for a socket.
Signed-off-by: David Ahern <dsahern@gmail.com>
---
include/net/sock.h | 12 ++++++++----
1 file changed, 8 insertions(+), 4 deletions(-)
diff --git a/include/net/sock.h b/include/net/sock.h
index a2db5fd30192..c5d93a4bcd0a 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -507,23 +507,27 @@ struct sk_lookup {
unsigned short hnum;
int dif;
+ int sdif;
bool exact_dif;
};
-/* Compare sk_bound_dev_if to socket lookup dif
+/* Compare sk_bound_dev_if to socket lookup dif and sdif
* Returns:
* -1 exact dif required and not met
* 0 sk_bound_dev_if is either not set or does not match
- * 1 sk_bound_dev_if is set and matches dif
+ * 1 sk_bound_dev_if is set and matches dif or sdif
*/
static inline int sk_lookup_device_cmp(const struct sock *sk,
const struct sk_lookup *params)
{
+ bool dev_match = (sk->sk_bound_dev_if == params->dif ||
+ sk->sk_bound_dev_if == params->sdif);
+
/* exact_dif true == l3mdev case */
- if (params->exact_dif && sk->sk_bound_dev_if != params->dif)
+ if (params->exact_dif && !dev_match)
return -1;
- if (sk->sk_bound_dev_if && sk->sk_bound_dev_if == params->dif)
+ if (sk->sk_bound_dev_if && dev_match)
return 1;
return 0;
--
2.1.4
^ permalink raw reply related [flat|nested] 11+ messages in thread* [RFC PATCH 09/10] net: ipv4: Support for sockets bound to enslaved device
2017-07-25 15:38 [RFC PATCH 00/10] net: l3mdev: Support for sockets bound to enslaved device David Ahern
` (7 preceding siblings ...)
2017-07-25 15:38 ` [RFC PATCH 08/10] net: Add sdif " David Ahern
@ 2017-07-25 15:38 ` David Ahern
2017-07-25 15:38 ` [RFC PATCH 10/10] net: ipv6: " David Ahern
9 siblings, 0 replies; 11+ messages in thread
From: David Ahern @ 2017-07-25 15:38 UTC (permalink / raw)
To: netdev; +Cc: David Ahern
Add support for sockets bound to a network interface enslaved to an
L3 Master device (e.g, VRF). Currently for VRF, skb->dev points to the
VRF device meaning socket lookups only consider this device index. The
real ingress device index is saved to IPCB(skb)->iif and the VRF driver
marks the skb with IPSKB_L3SLAVE to know that the real ingress device
is an enslaved one without having to lookup the iif.
Use those flags to add the enslaved device index to the socket lookup
and allow sk->sk_bound_dev_if to match either dif (VRF device) or sdif
(enslaved device).
Signed-off-by: David Ahern <dsahern@gmail.com>
---
include/linux/igmp.h | 3 ++-
include/net/inet_hashtables.h | 10 ++++++----
include/net/ip.h | 10 ++++++++++
include/net/tcp.h | 10 ++++++++++
net/ipv4/igmp.c | 6 ++++--
net/ipv4/inet_hashtables.c | 6 +++---
net/ipv4/raw.c | 7 +++++--
net/ipv4/tcp_ipv4.c | 6 ++++--
net/ipv4/udp.c | 11 ++++++++---
9 files changed, 52 insertions(+), 17 deletions(-)
diff --git a/include/linux/igmp.h b/include/linux/igmp.h
index 97caf1821de8..f8231854b5d6 100644
--- a/include/linux/igmp.h
+++ b/include/linux/igmp.h
@@ -118,7 +118,8 @@ extern int ip_mc_msfget(struct sock *sk, struct ip_msfilter *msf,
struct ip_msfilter __user *optval, int __user *optlen);
extern int ip_mc_gsfget(struct sock *sk, struct group_filter *gsf,
struct group_filter __user *optval, int __user *optlen);
-extern int ip_mc_sf_allow(struct sock *sk, __be32 local, __be32 rmt, int dif);
+extern int ip_mc_sf_allow(struct sock *sk, __be32 local, __be32 rmt,
+ int dif, int sdif);
extern void ip_mc_init_dev(struct in_device *);
extern void ip_mc_destroy_dev(struct in_device *);
extern void ip_mc_up(struct in_device *);
diff --git a/include/net/inet_hashtables.h b/include/net/inet_hashtables.h
index fabb8dd8fdb1..201f29d3c157 100644
--- a/include/net/inet_hashtables.h
+++ b/include/net/inet_hashtables.h
@@ -259,22 +259,24 @@ static inline struct sock *inet_lookup_listener(struct net *net,
(((__force __u64)(__be32)(__daddr)) << 32) | \
((__force __u64)(__be32)(__saddr)))
#endif /* __BIG_ENDIAN */
-#define INET_MATCH(__sk, __net, __cookie, __saddr, __daddr, __ports, __dif) \
+#define INET_MATCH(__sk, __net, __cookie, __saddr, __daddr, __ports, __dif, __sdif) \
(((__sk)->sk_portpair == (__ports)) && \
((__sk)->sk_addrpair == (__cookie)) && \
(!(__sk)->sk_bound_dev_if || \
- ((__sk)->sk_bound_dev_if == (__dif))) && \
+ ((__sk)->sk_bound_dev_if == (__dif)) || \
+ ((__sk)->sk_bound_dev_if == (__sdif))) && \
net_eq(sock_net(__sk), (__net)))
#else /* 32-bit arch */
#define INET_ADDR_COOKIE(__name, __saddr, __daddr) \
const int __name __deprecated __attribute__((unused))
-#define INET_MATCH(__sk, __net, __cookie, __saddr, __daddr, __ports, __dif) \
+#define INET_MATCH(__sk, __net, __cookie, __saddr, __daddr, __ports, __dif, __sdif) \
(((__sk)->sk_portpair == (__ports)) && \
((__sk)->sk_daddr == (__saddr)) && \
((__sk)->sk_rcv_saddr == (__daddr)) && \
(!(__sk)->sk_bound_dev_if || \
- ((__sk)->sk_bound_dev_if == (__dif))) && \
+ ((__sk)->sk_bound_dev_if == (__dif)) || \
+ ((__sk)->sk_bound_dev_if == (__sdif))) && \
net_eq(sock_net(__sk), (__net)))
#endif /* 64-bit arch */
diff --git a/include/net/ip.h b/include/net/ip.h
index 821cedcc8e73..e10da8814dba 100644
--- a/include/net/ip.h
+++ b/include/net/ip.h
@@ -78,6 +78,16 @@ struct ipcm_cookie {
#define IPCB(skb) ((struct inet_skb_parm*)((skb)->cb))
#define PKTINFO_SKB_CB(skb) ((struct in_pktinfo *)((skb)->cb))
+/* return enslaved device index if relevant */
+static inline int ip_sdif(struct sk_buff *skb)
+{
+#if IS_ENABLED(CONFIG_NET_L3_MASTER_DEV)
+ if (skb && ipv4_l3mdev_skb(IPCB(skb)->flags))
+ return IPCB(skb)->iif;
+#endif
+ return 0;
+}
+
struct ip_ra_chain {
struct ip_ra_chain __rcu *next;
struct sock *sk;
diff --git a/include/net/tcp.h b/include/net/tcp.h
index 4f056ea79df2..1a66ab82988b 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -861,6 +861,16 @@ static inline bool inet_exact_dif_match(struct net *net, struct sk_buff *skb)
return false;
}
+/* TCP_SKB_CB reference means this can not be used from early demux */
+static inline int tcp_v4_sdif(struct sk_buff *skb)
+{
+#if IS_ENABLED(CONFIG_NET_L3_MASTER_DEV)
+ if (skb && ipv4_l3mdev_skb(TCP_SKB_CB(skb)->header.h4.flags))
+ return TCP_SKB_CB(skb)->header.h4.iif;
+#endif
+ return 0;
+}
+
/* Due to TSO, an SKB can be composed of multiple actual
* packets. To keep these tracked properly, we use this.
*/
diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c
index 28f14afd0dd3..0d5fb47743bf 100644
--- a/net/ipv4/igmp.c
+++ b/net/ipv4/igmp.c
@@ -2549,7 +2549,8 @@ int ip_mc_gsfget(struct sock *sk, struct group_filter *gsf,
/*
* check if a multicast source filter allows delivery for a given <src,dst,intf>
*/
-int ip_mc_sf_allow(struct sock *sk, __be32 loc_addr, __be32 rmt_addr, int dif)
+int ip_mc_sf_allow(struct sock *sk, __be32 loc_addr, __be32 rmt_addr,
+ int dif, int sdif)
{
struct inet_sock *inet = inet_sk(sk);
struct ip_mc_socklist *pmc;
@@ -2564,7 +2565,8 @@ int ip_mc_sf_allow(struct sock *sk, __be32 loc_addr, __be32 rmt_addr, int dif)
rcu_read_lock();
for_each_pmc_rcu(inet, pmc) {
if (pmc->multi.imr_multiaddr.s_addr == loc_addr &&
- pmc->multi.imr_ifindex == dif)
+ (pmc->multi.imr_ifindex == dif ||
+ pmc->multi.imr_ifindex == sdif))
break;
}
ret = inet->mc_all;
diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c
index e581e200d01d..764da4302dac 100644
--- a/net/ipv4/inet_hashtables.c
+++ b/net/ipv4/inet_hashtables.c
@@ -291,12 +291,12 @@ struct sock *__inet_lookup_established(struct net *net,
if (sk->sk_hash != hash)
continue;
if (likely(INET_MATCH(sk, net, acookie, saddr, daddr,
- ports, params->dif))) {
+ ports, params->dif, params->sdif))) {
if (unlikely(!refcount_inc_not_zero(&sk->sk_refcnt)))
goto out;
if (unlikely(!INET_MATCH(sk, net, acookie,
saddr, daddr, ports,
- params->dif))) {
+ params->dif, params->sdif))) {
sock_gen_put(sk);
goto begin;
}
@@ -345,7 +345,7 @@ static int __inet_check_established(struct inet_timewait_death_row *death_row,
continue;
if (likely(INET_MATCH(sk2, net, acookie,
- saddr, daddr, ports, dif))) {
+ saddr, daddr, ports, dif, 0))) {
if (sk2->sk_state == TCP_TIME_WAIT) {
tw = inet_twsk(sk2);
if (twsk_unique(sk, sk2, twp))
diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c
index 4da5d87a61a5..a94f8f115b6e 100644
--- a/net/ipv4/raw.c
+++ b/net/ipv4/raw.c
@@ -132,7 +132,8 @@ struct sock *__raw_v4_lookup(struct net *net, struct sock *sk,
bool dev_match;
dev_match = (!sk->sk_bound_dev_if ||
- sk->sk_bound_dev_if == params->dif);
+ sk->sk_bound_dev_if == params->dif ||
+ sk->sk_bound_dev_if == params->sdif);
if (net_eq(sock_net(sk), net) &&
inet->inet_num == params->hnum &&
@@ -186,6 +187,7 @@ static int __raw_v4_input(struct sk_buff *skb, const struct iphdr *iph,
.daddr.ipv4 = iph->daddr,
.hnum = iph->protocol,
.dif = skb->dev->ifindex,
+ .sdif = ip_sdif(skb),
};
int delivered = 0;
struct sock *sk;
@@ -195,7 +197,7 @@ static int __raw_v4_input(struct sk_buff *skb, const struct iphdr *iph,
delivered = 1;
if ((iph->protocol != IPPROTO_ICMP || !icmp_filter(sk, skb)) &&
ip_mc_sf_allow(sk, iph->daddr, iph->saddr,
- skb->dev->ifindex)) {
+ skb->dev->ifindex, params.sdif)) {
struct sk_buff *clone = skb_clone(skb, GFP_ATOMIC);
/* Not releasing hash table! */
@@ -316,6 +318,7 @@ void raw_icmp_error(struct sk_buff *skb, int protocol, u32 info)
struct sk_lookup params = {
.hnum = protocol,
.dif = skb->dev->ifindex,
+ .sdif = ip_sdif(skb),
};
iph = (const struct iphdr *)skb->data;
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 89a0d166e677..d0f397dab3ed 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -1664,7 +1664,9 @@ EXPORT_SYMBOL(tcp_filter);
int tcp_v4_rcv(struct sk_buff *skb)
{
struct net *net = dev_net(skb->dev);
- struct sk_lookup params = { };
+ struct sk_lookup params = {
+ .sdif = ip_sdif(skb),
+ };
const struct iphdr *iph;
const struct tcphdr *th;
bool refcounted;
@@ -1846,8 +1848,8 @@ int tcp_v4_rcv(struct sk_buff *skb)
.daddr.ipv4 = iph->daddr,
.sport = th->source,
.dport = th->dest,
- .hnum = ntohs(th->dest),
.dif = inet_iif(skb),
+ .sdif = tcp_v4_sdif(skb),
};
struct sock *sk2 = inet_lookup_listener(dev_net(skb->dev),
&tcp_hashinfo, skb,
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 132a8f070d16..5c9fffed9c4a 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -485,6 +485,7 @@ struct sock *__udp4_lib_lookup(struct net *net, struct sk_lookup *params,
u32 hash = 0;
params->hnum = hnum;
+ params->sdif = ip_sdif(skb);
params->exact_dif = udp_lib_exact_dif_match(net, skb);
if (hslot->count > 10) {
@@ -597,9 +598,10 @@ static inline bool __udp_is_mcast_sock(struct net *net, struct sock *sk,
(inet->inet_dport != params->sport && inet->inet_dport) ||
(inet->inet_rcv_saddr && inet->inet_rcv_saddr != loc_addr) ||
ipv6_only_sock(sk) ||
- (sk->sk_bound_dev_if && sk->sk_bound_dev_if != params->dif))
+ (sk->sk_bound_dev_if && sk->sk_bound_dev_if != params->dif &&
+ sk->sk_bound_dev_if != params->sdif))
return false;
- if (!ip_mc_sf_allow(sk, loc_addr, rmt_addr, params->dif))
+ if (!ip_mc_sf_allow(sk, loc_addr, rmt_addr, params->dif, params->sdif))
return false;
return true;
}
@@ -1970,6 +1972,7 @@ static int __udp4_lib_mcast_deliver(struct net *net, struct sk_buff *skb,
.dport = uh->dest,
.hnum = hnum,
.dif = skb->dev->ifindex,
+ .sdif = ip_sdif(skb),
};
if (use_hash2) {
@@ -2210,7 +2213,8 @@ static struct sock *__udp4_lib_demux_lookup(struct net *net,
udp_portaddr_for_each_entry_rcu(sk, &hslot2->head) {
if (INET_MATCH(sk, net, acookie, params->saddr.ipv4,
- params->daddr.ipv4, ports, params->dif))
+ params->daddr.ipv4, ports, params->dif,
+ params->sdif))
return sk;
/* Only check first socket in chain */
break;
@@ -2223,6 +2227,7 @@ void udp_v4_early_demux(struct sk_buff *skb)
struct net *net = dev_net(skb->dev);
struct sk_lookup params = {
.dif = skb->dev->ifindex,
+ .sdif = ip_sdif(skb),
};
const struct iphdr *iph;
const struct udphdr *uh;
--
2.1.4
^ permalink raw reply related [flat|nested] 11+ messages in thread* [RFC PATCH 10/10] net: ipv6: Support for sockets bound to enslaved device
2017-07-25 15:38 [RFC PATCH 00/10] net: l3mdev: Support for sockets bound to enslaved device David Ahern
` (8 preceding siblings ...)
2017-07-25 15:38 ` [RFC PATCH 09/10] net: ipv4: Support for sockets bound to enslaved device David Ahern
@ 2017-07-25 15:38 ` David Ahern
9 siblings, 0 replies; 11+ messages in thread
From: David Ahern @ 2017-07-25 15:38 UTC (permalink / raw)
To: netdev; +Cc: David Ahern
Add support for sockets bound to a network interface enslaved to an
L3 Master device (e.g, VRF). Currently for VRF, skb->dev points to the
VRF device meaning socket lookups only consider this device index. The
real ingress device index is saved to IP6CB(skb)->iif and the VRF driver
marks the skb with IP6SKB_L3SLAVE to know that the real ingress device
is an enslaved one without having to lookup the iif.
Use those flags to add the enslaved device index to the socket lookup
and allow sk->sk_bound_dev_if to match either dif (VRF device) or sdif
(enslaved device).
Signed-off-by: David Ahern <dsahern@gmail.com>
---
include/linux/ipv6.h | 8 ++++++++
include/net/inet6_hashtables.h | 5 +++--
include/net/tcp.h | 7 +++++++
net/ipv6/inet6_hashtables.c | 6 +++---
net/ipv6/raw.c | 5 ++++-
net/ipv6/tcp_ipv6.c | 3 +++
net/ipv6/udp.c | 8 ++++++--
7 files changed, 34 insertions(+), 8 deletions(-)
diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h
index e1b442996f81..094357907b45 100644
--- a/include/linux/ipv6.h
+++ b/include/linux/ipv6.h
@@ -153,6 +153,14 @@ static inline int inet6_iif(const struct sk_buff *skb)
}
/* can not be used in TCP layer after tcp_v6_fill_cb */
+static inline int inet6_sdif(const struct sk_buff *skb)
+{
+ bool l3_slave = ipv6_l3mdev_skb(IP6CB(skb)->flags);
+
+ return l3_slave ? IP6CB(skb)->iif : 0;
+}
+
+/* can not be used in TCP layer after tcp_v6_fill_cb */
static inline bool inet6_exact_dif_match(struct net *net, struct sk_buff *skb)
{
#if defined(CONFIG_NET_L3_MASTER_DEV)
diff --git a/include/net/inet6_hashtables.h b/include/net/inet6_hashtables.h
index 15db41272ff2..0fc5a2fe4ad3 100644
--- a/include/net/inet6_hashtables.h
+++ b/include/net/inet6_hashtables.h
@@ -94,13 +94,14 @@ struct sock *inet6_lookup(struct net *net, struct inet_hashinfo *hashinfo,
int inet6_hash(struct sock *sk);
#endif /* IS_ENABLED(CONFIG_IPV6) */
-#define INET6_MATCH(__sk, __net, __saddr, __daddr, __ports, __dif) \
+#define INET6_MATCH(__sk, __net, __saddr, __daddr, __ports, __dif, __sdif) \
(((__sk)->sk_portpair == (__ports)) && \
((__sk)->sk_family == AF_INET6) && \
ipv6_addr_equal(&(__sk)->sk_v6_daddr, (__saddr)) && \
ipv6_addr_equal(&(__sk)->sk_v6_rcv_saddr, (__daddr)) && \
(!(__sk)->sk_bound_dev_if || \
- ((__sk)->sk_bound_dev_if == (__dif))) && \
+ ((__sk)->sk_bound_dev_if == (__dif)) || \
+ ((__sk)->sk_bound_dev_if == (__sdif))) && \
net_eq(sock_net(__sk), (__net)))
#endif /* _INET6_HASHTABLES_H */
diff --git a/include/net/tcp.h b/include/net/tcp.h
index 1a66ab82988b..3bc0bc4daa05 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -848,6 +848,13 @@ static inline int tcp_v6_iif(const struct sk_buff *skb)
return l3_slave ? skb->skb_iif : TCP_SKB_CB(skb)->header.h6.iif;
}
+
+static inline int tcp_v6_sdif(const struct sk_buff *skb)
+{
+ bool l3_slave = ipv6_l3mdev_skb(TCP_SKB_CB(skb)->header.h6.flags);
+
+ return l3_slave ? TCP_SKB_CB(skb)->header.h6.iif : 0;
+}
#endif
/* TCP_SKB_CB reference means this can not be used from early demux */
diff --git a/net/ipv6/inet6_hashtables.c b/net/ipv6/inet6_hashtables.c
index 878c03094f2e..2af34af36110 100644
--- a/net/ipv6/inet6_hashtables.c
+++ b/net/ipv6/inet6_hashtables.c
@@ -74,13 +74,13 @@ struct sock *__inet6_lookup_established(struct net *net,
if (sk->sk_hash != hash)
continue;
if (!INET6_MATCH(sk, net, saddr, daddr, ports,
- params->dif))
+ params->dif, params->sdif))
continue;
if (unlikely(!refcount_inc_not_zero(&sk->sk_refcnt)))
goto out;
if (unlikely(!INET6_MATCH(sk, net, saddr, daddr, ports,
- params->dif))) {
+ params->dif, params->sdif))) {
sock_gen_put(sk);
goto begin;
}
@@ -205,7 +205,7 @@ static int __inet6_check_established(struct inet_timewait_death_row *death_row,
continue;
if (likely(INET6_MATCH(sk2, net, saddr, daddr, ports,
- dif))) {
+ dif, 0))) {
if (sk2->sk_state == TCP_TIME_WAIT) {
tw = inet_twsk(sk2);
if (twsk_unique(sk, sk2, twp))
diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c
index 51e651f18ffb..bab365214d17 100644
--- a/net/ipv6/raw.c
+++ b/net/ipv6/raw.c
@@ -87,7 +87,8 @@ struct sock *__raw_v6_lookup(struct net *net, struct sock *sk,
continue;
if (sk->sk_bound_dev_if &&
- sk->sk_bound_dev_if != params->dif)
+ sk->sk_bound_dev_if != params->dif &&
+ sk->sk_bound_dev_if != params->sdif)
continue;
if (!ipv6_addr_any(&sk->sk_v6_rcv_saddr)) {
@@ -165,6 +166,7 @@ static bool ipv6_raw_deliver(struct sk_buff *skb, int nexthdr)
.daddr.ipv6 = &ipv6_hdr(skb)->daddr,
.hnum = nexthdr,
.dif = inet6_iif(skb),
+ .sdif = inet6_sdif(skb),
};
struct sock *sk;
bool delivered = false;
@@ -375,6 +377,7 @@ void raw6_icmp_error(struct sk_buff *skb, int nexthdr,
struct sk_lookup params = {
.hnum = nexthdr,
.dif = inet6_iif(skb),
+ .sdif = inet6_sdif(skb),
};
/* Note: ipv6_hdr(skb) != skb->data */
const struct ipv6hdr *ip6h = (const struct ipv6hdr *)skb->data;
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 154886daba7b..55a7256211ca 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -917,6 +917,7 @@ static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb)
.sport = th->source,
.hnum = ntohs(th->source),
.dif = tcp_v6_iif(skb),
+ .sdif = tcp_v6_sdif(skb),
};
/*
@@ -1414,6 +1415,7 @@ static int tcp_v6_rcv(struct sk_buff *skb)
struct net *net = dev_net(skb->dev);
struct sk_lookup params = {
.dif = inet6_iif(skb),
+ .sdif = inet6_sdif(skb),
};
if (skb->pkt_type != PACKET_HOST)
@@ -1577,6 +1579,7 @@ static int tcp_v6_rcv(struct sk_buff *skb)
.sport = th->source,
.hnum = ntohs(th->dest),
.dif = tcp_v6_iif(skb),
+ .sdif = tcp_v6_sdif(skb),
};
struct sock *sk2;
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index 5c4fdbe52c24..bb72a480d169 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -220,6 +220,7 @@ struct sock *__udp6_lib_lookup(struct net *net, struct sk_lookup *params,
u32 hash = 0;
params->hnum = hnum;
+ params->sdif = inet6_sdif(skb);
params->exact_dif = udp6_lib_exact_dif_match(net, skb);
if (hslot->count > 10) {
@@ -673,7 +674,8 @@ static bool __udp_v6_is_mcast_sock(struct net *net, struct sock *sk,
(inet->inet_dport && inet->inet_dport != params->sport) ||
(!ipv6_addr_any(&sk->sk_v6_daddr) &&
!ipv6_addr_equal(&sk->sk_v6_daddr, rmt_addr)) ||
- (sk->sk_bound_dev_if && sk->sk_bound_dev_if != params->dif) ||
+ (sk->sk_bound_dev_if && sk->sk_bound_dev_if != params->dif &&
+ sk->sk_bound_dev_if != params->sdif) ||
(!ipv6_addr_any(&sk->sk_v6_rcv_saddr) &&
!ipv6_addr_equal(&sk->sk_v6_rcv_saddr, loc_addr)))
return false;
@@ -715,6 +717,7 @@ static int __udp6_lib_mcast_deliver(struct net *net, struct sk_buff *skb,
.dport = uh->dest,
.hnum = hnum,
.dif = inet6_iif(skb),
+ .sdif = inet6_sdif(skb),
};
if (use_hash2) {
@@ -893,7 +896,7 @@ static struct sock *__udp6_lib_demux_lookup(struct net *net,
if (sk->sk_state == TCP_ESTABLISHED &&
INET6_MATCH(sk, net, params->saddr.ipv6,
params->daddr.ipv6, ports,
- params->dif))
+ params->dif, params->sdif))
return sk;
/* Only check first socket in chain */
@@ -910,6 +913,7 @@ static void udp_v6_early_demux(struct sk_buff *skb)
struct dst_entry *dst;
struct sk_lookup params = {
.dif = skb->dev->ifindex,
+ .sdif = inet6_sdif(skb),
};
if (skb->pkt_type != PACKET_HOST)
--
2.1.4
^ permalink raw reply related [flat|nested] 11+ messages in thread