* [PATCH net-next 0/2] net: Initialize sk_hash to random value and reset for failing cnxs
@ 2015-07-28 23:02 Tom Herbert
2015-07-28 23:02 ` [PATCH net-next 1/2] net: Set sk_txhash from a random number Tom Herbert
` (2 more replies)
0 siblings, 3 replies; 15+ messages in thread
From: Tom Herbert @ 2015-07-28 23:02 UTC (permalink / raw)
To: davem, netdev; +Cc: kernel-team
This patch set implements a common function to simply set sk_txhash to
a random number instead of going through the trouble to call flow
dissector. From dst_negative_advice we now reset the sk_txhash in hopes
of finding a better ECMP path through the network. Changing sk_txhash
affects:
- IPv6 flow label and UDP source port which affect ECMP in the network
- Local EMCP route selection (pending changes to use sk_txhash)
Tom Herbert (2):
net: Set sk_txhash from a random number
net: Recompute sk_txhash on negative routing advice
include/net/ip.h | 16 ----------------
include/net/ipv6.h | 19 -------------------
include/net/sock.h | 16 ++++++++++++++++
net/ipv4/datagram.c | 2 +-
net/ipv4/tcp_ipv4.c | 4 ++--
net/ipv6/datagram.c | 2 +-
net/ipv6/tcp_ipv6.c | 4 ++--
7 files changed, 22 insertions(+), 41 deletions(-)
--
1.8.1
^ permalink raw reply [flat|nested] 15+ messages in thread
* [PATCH net-next 1/2] net: Set sk_txhash from a random number
2015-07-28 23:02 [PATCH net-next 0/2] net: Initialize sk_hash to random value and reset for failing cnxs Tom Herbert
@ 2015-07-28 23:02 ` Tom Herbert
2015-07-29 9:13 ` Thomas Graf
2015-12-08 8:33 ` [net-next,1/2] " Alexander Drozdov
2015-07-28 23:02 ` [PATCH net-next 2/2] net: Recompute sk_txhash on negative routing advice Tom Herbert
2015-07-30 5:44 ` [PATCH net-next 0/2] net: Initialize sk_hash to random value and reset for failing cnxs David Miller
2 siblings, 2 replies; 15+ messages in thread
From: Tom Herbert @ 2015-07-28 23:02 UTC (permalink / raw)
To: davem, netdev; +Cc: kernel-team
This patch creates sk_set_txhash and eliminates protocol specific
inet_set_txhash and ip6_set_txhash. sk_set_txhash simply sets a
random number instead of performing flow dissection. sk_set_txash
is also allowed to be called multiple times for the same socket,
we'll need this when redoing the hash for negative routing advice.
Signed-off-by: Tom Herbert <tom@herbertland.com>
---
include/net/ip.h | 16 ----------------
include/net/ipv6.h | 19 -------------------
include/net/sock.h | 8 ++++++++
net/ipv4/datagram.c | 2 +-
net/ipv4/tcp_ipv4.c | 4 ++--
net/ipv6/datagram.c | 2 +-
net/ipv6/tcp_ipv6.c | 4 ++--
7 files changed, 14 insertions(+), 41 deletions(-)
diff --git a/include/net/ip.h b/include/net/ip.h
index d5fe9f2..bee5f35 100644
--- a/include/net/ip.h
+++ b/include/net/ip.h
@@ -370,22 +370,6 @@ static inline void iph_to_flow_copy_v4addrs(struct flow_keys *flow,
flow->control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS;
}
-static inline void inet_set_txhash(struct sock *sk)
-{
- struct inet_sock *inet = inet_sk(sk);
- struct flow_keys keys;
-
- memset(&keys, 0, sizeof(keys));
-
- keys.addrs.v4addrs.src = inet->inet_saddr;
- keys.addrs.v4addrs.dst = inet->inet_daddr;
- keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS;
- keys.ports.src = inet->inet_sport;
- keys.ports.dst = inet->inet_dport;
-
- sk->sk_txhash = flow_hash_from_keys(&keys);
-}
-
static inline __wsum inet_gro_compute_pseudo(struct sk_buff *skb, int proto)
{
const struct iphdr *iph = skb_gro_network_header(skb);
diff --git a/include/net/ipv6.h b/include/net/ipv6.h
index 82dbdb0..7c79798 100644
--- a/include/net/ipv6.h
+++ b/include/net/ipv6.h
@@ -707,25 +707,6 @@ static inline void iph_to_flow_copy_v6addrs(struct flow_keys *flow,
}
#if IS_ENABLED(CONFIG_IPV6)
-static inline void ip6_set_txhash(struct sock *sk)
-{
- struct inet_sock *inet = inet_sk(sk);
- struct ipv6_pinfo *np = inet6_sk(sk);
- struct flow_keys keys;
-
- memset(&keys, 0, sizeof(keys));
-
- memcpy(&keys.addrs.v6addrs.src, &np->saddr,
- sizeof(keys.addrs.v6addrs.src));
- memcpy(&keys.addrs.v6addrs.dst, &sk->sk_v6_daddr,
- sizeof(keys.addrs.v6addrs.dst));
- keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
- keys.ports.src = inet->inet_sport;
- keys.ports.dst = inet->inet_dport;
-
- sk->sk_txhash = flow_hash_from_keys(&keys);
-}
-
static inline __be32 ip6_make_flowlabel(struct net *net, struct sk_buff *skb,
__be32 flowlabel, bool autolabel)
{
diff --git a/include/net/sock.h b/include/net/sock.h
index 4353ef7..fe735c4 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -1687,6 +1687,14 @@ static inline void sock_graft(struct sock *sk, struct socket *parent)
kuid_t sock_i_uid(struct sock *sk);
unsigned long sock_i_ino(struct sock *sk);
+static inline void sk_set_txhash(struct sock *sk)
+{
+ sk->sk_txhash = prandom_u32();
+
+ if (unlikely(!sk->sk_txhash))
+ sk->sk_txhash = 1;
+}
+
static inline struct dst_entry *
__sk_dst_get(struct sock *sk)
{
diff --git a/net/ipv4/datagram.c b/net/ipv4/datagram.c
index 574fad9..f915abf 100644
--- a/net/ipv4/datagram.c
+++ b/net/ipv4/datagram.c
@@ -74,7 +74,7 @@ int __ip4_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len
inet->inet_daddr = fl4->daddr;
inet->inet_dport = usin->sin_port;
sk->sk_state = TCP_ESTABLISHED;
- inet_set_txhash(sk);
+ sk_set_txhash(sk);
inet->inet_id = jiffies;
sk_dst_set(sk, &rt->dst);
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 486ba96..d27eb54 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -222,7 +222,7 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
if (err)
goto failure;
- inet_set_txhash(sk);
+ sk_set_txhash(sk);
rt = ip_route_newports(fl4, rt, orig_sport, orig_dport,
inet->inet_sport, inet->inet_dport, sk);
@@ -1277,7 +1277,7 @@ struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
newinet->mc_ttl = ip_hdr(skb)->ttl;
newinet->rcv_tos = ip_hdr(skb)->tos;
inet_csk(newsk)->icsk_ext_hdr_len = 0;
- inet_set_txhash(newsk);
+ sk_set_txhash(newsk);
if (inet_opt)
inet_csk(newsk)->icsk_ext_hdr_len = inet_opt->opt.optlen;
newinet->inet_id = newtp->write_seq ^ jiffies;
diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c
index 2572a32..9aadd57 100644
--- a/net/ipv6/datagram.c
+++ b/net/ipv6/datagram.c
@@ -199,7 +199,7 @@ ipv4_connected:
NULL);
sk->sk_state = TCP_ESTABLISHED;
- ip6_set_txhash(sk);
+ sk_set_txhash(sk);
out:
fl6_sock_release(flowlabel);
return err;
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index d540846..52dd0d9 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -276,7 +276,7 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
if (err)
goto late_failure;
- ip6_set_txhash(sk);
+ sk_set_txhash(sk);
if (!tp->write_seq && likely(!tp->repair))
tp->write_seq = secure_tcpv6_sequence_number(np->saddr.s6_addr32,
@@ -1090,7 +1090,7 @@ static struct sock *tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
newsk->sk_v6_rcv_saddr = ireq->ir_v6_loc_addr;
newsk->sk_bound_dev_if = ireq->ir_iif;
- ip6_set_txhash(newsk);
+ sk_set_txhash(newsk);
/* Now IPv6 options...
--
1.8.1
^ permalink raw reply related [flat|nested] 15+ messages in thread
* [PATCH net-next 2/2] net: Recompute sk_txhash on negative routing advice
2015-07-28 23:02 [PATCH net-next 0/2] net: Initialize sk_hash to random value and reset for failing cnxs Tom Herbert
2015-07-28 23:02 ` [PATCH net-next 1/2] net: Set sk_txhash from a random number Tom Herbert
@ 2015-07-28 23:02 ` Tom Herbert
2015-07-30 5:44 ` [PATCH net-next 0/2] net: Initialize sk_hash to random value and reset for failing cnxs David Miller
2 siblings, 0 replies; 15+ messages in thread
From: Tom Herbert @ 2015-07-28 23:02 UTC (permalink / raw)
To: davem, netdev; +Cc: kernel-team
When a connection is failing a transport protocol calls
dst_negative_advice to try to get a better route. This patch includes
changing the sk_txhash in that function. This provides a rudimentary
method to try to find a different path in the network since sk_txhash
affects ECMP on the local host and through the network (via flow labels
or UDP source port in encapsulation).
Signed-off-by: Tom Herbert <tom@herbertland.com>
---
include/net/sock.h | 8 ++++++++
1 file changed, 8 insertions(+)
diff --git a/include/net/sock.h b/include/net/sock.h
index fe735c4..24aa75c 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -1695,6 +1695,12 @@ static inline void sk_set_txhash(struct sock *sk)
sk->sk_txhash = 1;
}
+static inline void sk_rethink_txhash(struct sock *sk)
+{
+ if (sk->sk_txhash)
+ sk_set_txhash(sk);
+}
+
static inline struct dst_entry *
__sk_dst_get(struct sock *sk)
{
@@ -1719,6 +1725,8 @@ static inline void dst_negative_advice(struct sock *sk)
{
struct dst_entry *ndst, *dst = __sk_dst_get(sk);
+ sk_rethink_txhash(sk);
+
if (dst && dst->ops->negative_advice) {
ndst = dst->ops->negative_advice(dst);
--
1.8.1
^ permalink raw reply related [flat|nested] 15+ messages in thread
* Re: [PATCH net-next 1/2] net: Set sk_txhash from a random number
2015-07-28 23:02 ` [PATCH net-next 1/2] net: Set sk_txhash from a random number Tom Herbert
@ 2015-07-29 9:13 ` Thomas Graf
2015-07-29 9:29 ` Eric Dumazet
2015-12-08 8:33 ` [net-next,1/2] " Alexander Drozdov
1 sibling, 1 reply; 15+ messages in thread
From: Thomas Graf @ 2015-07-29 9:13 UTC (permalink / raw)
To: Tom Herbert; +Cc: davem, netdev, kernel-team
On 07/28/15 at 04:02pm, Tom Herbert wrote:
> This patch creates sk_set_txhash and eliminates protocol specific
> inet_set_txhash and ip6_set_txhash. sk_set_txhash simply sets a
> random number instead of performing flow dissection. sk_set_txash
> is also allowed to be called multiple times for the same socket,
> we'll need this when redoing the hash for negative routing advice.
>
> Signed-off-by: Tom Herbert <tom@herbertland.com>
Doesn't this break TX hashing with SO_REUSEPORT?
^ permalink raw reply [flat|nested] 15+ messages in thread
* Re: [PATCH net-next 1/2] net: Set sk_txhash from a random number
2015-07-29 9:13 ` Thomas Graf
@ 2015-07-29 9:29 ` Eric Dumazet
2015-07-29 9:54 ` Thomas Graf
0 siblings, 1 reply; 15+ messages in thread
From: Eric Dumazet @ 2015-07-29 9:29 UTC (permalink / raw)
To: Thomas Graf; +Cc: Tom Herbert, davem, netdev, kernel-team
On Wed, 2015-07-29 at 11:13 +0200, Thomas Graf wrote:
> On 07/28/15 at 04:02pm, Tom Herbert wrote:
> > This patch creates sk_set_txhash and eliminates protocol specific
> > inet_set_txhash and ip6_set_txhash. sk_set_txhash simply sets a
> > random number instead of performing flow dissection. sk_set_txash
> > is also allowed to be called multiple times for the same socket,
> > we'll need this when redoing the hash for negative routing advice.
> >
> > Signed-off-by: Tom Herbert <tom@herbertland.com>
>
> Doesn't this break TX hashing with SO_REUSEPORT?
AFAIK nothing uses sk_txhash yet.
^ permalink raw reply [flat|nested] 15+ messages in thread
* Re: [PATCH net-next 1/2] net: Set sk_txhash from a random number
2015-07-29 9:29 ` Eric Dumazet
@ 2015-07-29 9:54 ` Thomas Graf
2015-07-29 10:06 ` Eric Dumazet
2015-07-29 15:58 ` Tom Herbert
0 siblings, 2 replies; 15+ messages in thread
From: Thomas Graf @ 2015-07-29 9:54 UTC (permalink / raw)
To: Eric Dumazet; +Cc: Tom Herbert, davem, netdev, kernel-team
On 07/29/15 at 11:29am, Eric Dumazet wrote:
> On Wed, 2015-07-29 at 11:13 +0200, Thomas Graf wrote:
> > On 07/28/15 at 04:02pm, Tom Herbert wrote:
> > > This patch creates sk_set_txhash and eliminates protocol specific
> > > inet_set_txhash and ip6_set_txhash. sk_set_txhash simply sets a
> > > random number instead of performing flow dissection. sk_set_txash
> > > is also allowed to be called multiple times for the same socket,
> > > we'll need this when redoing the hash for negative routing advice.
> > >
> > > Signed-off-by: Tom Herbert <tom@herbertland.com>
> >
> > Doesn't this break TX hashing with SO_REUSEPORT?
>
>
> AFAIK nothing uses sk_txhash yet.
skb_set_hash_from_sk()
skb_get_hash()
Am I misreading this? I'm not using SO_REUSEPORT and it might be OK
to assume that different sockets may go to different queues even if
the L4 tuple is identical.
^ permalink raw reply [flat|nested] 15+ messages in thread
* Re: [PATCH net-next 1/2] net: Set sk_txhash from a random number
2015-07-29 9:54 ` Thomas Graf
@ 2015-07-29 10:06 ` Eric Dumazet
2015-07-29 10:47 ` Thomas Graf
2015-07-29 15:58 ` Tom Herbert
1 sibling, 1 reply; 15+ messages in thread
From: Eric Dumazet @ 2015-07-29 10:06 UTC (permalink / raw)
To: Thomas Graf; +Cc: Tom Herbert, davem, netdev, kernel-team
On Wed, 2015-07-29 at 11:54 +0200, Thomas Graf wrote:
> skb_set_hash_from_sk()
> skb_get_hash()
>
> Am I misreading this? I'm not using SO_REUSEPORT and it might be OK
> to assume that different sockets may go to different queues even if
> the L4 tuple is identical.
skb_set_hash_from_sk() sets skb->hash in output path. Nothing uses it
then later. bonding uses its own hash functions.
SO_REUSEPORT is on input path, and uses its own hash anyway.
^ permalink raw reply [flat|nested] 15+ messages in thread
* Re: [PATCH net-next 1/2] net: Set sk_txhash from a random number
2015-07-29 10:06 ` Eric Dumazet
@ 2015-07-29 10:47 ` Thomas Graf
0 siblings, 0 replies; 15+ messages in thread
From: Thomas Graf @ 2015-07-29 10:47 UTC (permalink / raw)
To: Eric Dumazet; +Cc: Tom Herbert, davem, netdev, kernel-team
On 07/29/15 at 12:06pm, Eric Dumazet wrote:
> On Wed, 2015-07-29 at 11:54 +0200, Thomas Graf wrote:
>
> > skb_set_hash_from_sk()
> > skb_get_hash()
> >
> > Am I misreading this? I'm not using SO_REUSEPORT and it might be OK
> > to assume that different sockets may go to different queues even if
> > the L4 tuple is identical.
>
> skb_set_hash_from_sk() sets skb->hash in output path. Nothing uses it
> then later. bonding uses its own hash functions.
>
> SO_REUSEPORT is on input path, and uses its own hash anyway.
Yes, I'm talking about the output path only but after further reading,
the only case that could be a problem is if two SO_REUSEPORT sockets
connect to the same destination address and port which will be prevented
by connect anyway.
^ permalink raw reply [flat|nested] 15+ messages in thread
* Re: [PATCH net-next 1/2] net: Set sk_txhash from a random number
2015-07-29 9:54 ` Thomas Graf
2015-07-29 10:06 ` Eric Dumazet
@ 2015-07-29 15:58 ` Tom Herbert
2015-07-29 20:02 ` Thomas Graf
1 sibling, 1 reply; 15+ messages in thread
From: Tom Herbert @ 2015-07-29 15:58 UTC (permalink / raw)
To: Thomas Graf
Cc: Eric Dumazet, David S. Miller, Linux Kernel Network Developers,
Kernel Team
On Wed, Jul 29, 2015 at 2:54 AM, Thomas Graf <tgraf@suug.ch> wrote:
> On 07/29/15 at 11:29am, Eric Dumazet wrote:
>> On Wed, 2015-07-29 at 11:13 +0200, Thomas Graf wrote:
>> > On 07/28/15 at 04:02pm, Tom Herbert wrote:
>> > > This patch creates sk_set_txhash and eliminates protocol specific
>> > > inet_set_txhash and ip6_set_txhash. sk_set_txhash simply sets a
>> > > random number instead of performing flow dissection. sk_set_txash
>> > > is also allowed to be called multiple times for the same socket,
>> > > we'll need this when redoing the hash for negative routing advice.
>> > >
>> > > Signed-off-by: Tom Herbert <tom@herbertland.com>
>> >
>> > Doesn't this break TX hashing with SO_REUSEPORT?
>>
>>
>> AFAIK nothing uses sk_txhash yet.
>
> skb_set_hash_from_sk()
> skb_get_hash()
>
> Am I misreading this? I'm not using SO_REUSEPORT and it might be OK
> to assume that different sockets may go to different queues even if
> the L4 tuple is identical.
Hi Thomas,
The salient property of both sk_txhash and skb->hash is that they
provide a uniform distribution over flows. It is incorrect to assume
that either of these immutable during the lifetime of a flow, so yes
this means that packets of a flow may go to different receive queues
when hashes change. SO_REUSEPORT is a process in the receive path but
uses ehashfn over the ports. But even with SO_REUSEPORT we provide no
guarantee that packets of a "flow" will always hit the same socket,
the hashing is not consistent when new reuseport sockets are added or
removed-- this is actually a long standing issue with SO_REUSEPORT in
the TCP case since it is possible to orphan connections in SYN-RECV. I
believe Eric was working toward fixing that, so maybe in the future we
can use skb->hash if it is a savings.
Thanks,
Tom
^ permalink raw reply [flat|nested] 15+ messages in thread
* Re: [PATCH net-next 1/2] net: Set sk_txhash from a random number
2015-07-29 15:58 ` Tom Herbert
@ 2015-07-29 20:02 ` Thomas Graf
0 siblings, 0 replies; 15+ messages in thread
From: Thomas Graf @ 2015-07-29 20:02 UTC (permalink / raw)
To: Tom Herbert
Cc: Eric Dumazet, David S. Miller, Linux Kernel Network Developers,
Kernel Team
On 07/29/15 at 08:58am, Tom Herbert wrote:
> The salient property of both sk_txhash and skb->hash is that they
> provide a uniform distribution over flows. It is incorrect to assume
> that either of these immutable during the lifetime of a flow, so yes
> this means that packets of a flow may go to different receive queues
> when hashes change. SO_REUSEPORT is a process in the receive path but
> uses ehashfn over the ports. But even with SO_REUSEPORT we provide no
> guarantee that packets of a "flow" will always hit the same socket,
> the hashing is not consistent when new reuseport sockets are added or
> removed-- this is actually a long standing issue with SO_REUSEPORT in
> the TCP case since it is possible to orphan connections in SYN-RECV. I
> believe Eric was working toward fixing that, so maybe in the future we
> can use skb->hash if it is a savings.
Thanks for the explanation. I have no objections to the changes then.
^ permalink raw reply [flat|nested] 15+ messages in thread
* Re: [PATCH net-next 0/2] net: Initialize sk_hash to random value and reset for failing cnxs
2015-07-28 23:02 [PATCH net-next 0/2] net: Initialize sk_hash to random value and reset for failing cnxs Tom Herbert
2015-07-28 23:02 ` [PATCH net-next 1/2] net: Set sk_txhash from a random number Tom Herbert
2015-07-28 23:02 ` [PATCH net-next 2/2] net: Recompute sk_txhash on negative routing advice Tom Herbert
@ 2015-07-30 5:44 ` David Miller
2 siblings, 0 replies; 15+ messages in thread
From: David Miller @ 2015-07-30 5:44 UTC (permalink / raw)
To: tom; +Cc: netdev, kernel-team
From: Tom Herbert <tom@herbertland.com>
Date: Tue, 28 Jul 2015 16:02:04 -0700
> This patch set implements a common function to simply set sk_txhash to
> a random number instead of going through the trouble to call flow
> dissector. From dst_negative_advice we now reset the sk_txhash in hopes
> of finding a better ECMP path through the network. Changing sk_txhash
> affects:
> - IPv6 flow label and UDP source port which affect ECMP in the network
> - Local EMCP route selection (pending changes to use sk_txhash)
This looks fine, series applied, thanks Tom.
^ permalink raw reply [flat|nested] 15+ messages in thread
* Re: [net-next,1/2] net: Set sk_txhash from a random number
2015-07-28 23:02 ` [PATCH net-next 1/2] net: Set sk_txhash from a random number Tom Herbert
2015-07-29 9:13 ` Thomas Graf
@ 2015-12-08 8:33 ` Alexander Drozdov
2015-12-08 13:15 ` Eric Dumazet
1 sibling, 1 reply; 15+ messages in thread
From: Alexander Drozdov @ 2015-12-08 8:33 UTC (permalink / raw)
To: Tom Herbert, davem, netdev; +Cc: kernel-team
29.07.2015 02:02, Tom Herbert wrote:
> This patch creates sk_set_txhash and eliminates protocol specific
> inet_set_txhash and ip6_set_txhash. sk_set_txhash simply sets a
> random number instead of performing flow dissection. sk_set_txash
> is also allowed to be called multiple times for the same socket,
> we'll need this when redoing the hash for negative routing advice.
It seems that this patch and some previous txhash-related
ones break af_packet hash features for outgoing packets:
- PACKET_FANOUT_HASH
- TP_FT_REQ_FILL_RXHASH
af_packet now thinks that hashes for for incoming and outgoing
packets of the same TCP stream differ. That is true for TCP
sessions initiated by the host.
>
> Signed-off-by: Tom Herbert <tom@herbertland.com>
> ---
> include/net/ip.h | 16 ----------------
> include/net/ipv6.h | 19 -------------------
> include/net/sock.h | 8 ++++++++
> net/ipv4/datagram.c | 2 +-
> net/ipv4/tcp_ipv4.c | 4 ++--
> net/ipv6/datagram.c | 2 +-
> net/ipv6/tcp_ipv6.c | 4 ++--
> 7 files changed, 14 insertions(+), 41 deletions(-)
>
> diff --git a/include/net/ip.h b/include/net/ip.h
> index d5fe9f2..bee5f35 100644
> --- a/include/net/ip.h
> +++ b/include/net/ip.h
> @@ -370,22 +370,6 @@ static inline void iph_to_flow_copy_v4addrs(struct flow_keys *flow,
> flow->control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS;
> }
>
> -static inline void inet_set_txhash(struct sock *sk)
> -{
> - struct inet_sock *inet = inet_sk(sk);
> - struct flow_keys keys;
> -
> - memset(&keys, 0, sizeof(keys));
> -
> - keys.addrs.v4addrs.src = inet->inet_saddr;
> - keys.addrs.v4addrs.dst = inet->inet_daddr;
> - keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS;
> - keys.ports.src = inet->inet_sport;
> - keys.ports.dst = inet->inet_dport;
> -
> - sk->sk_txhash = flow_hash_from_keys(&keys);
> -}
> -
> static inline __wsum inet_gro_compute_pseudo(struct sk_buff *skb, int proto)
> {
> const struct iphdr *iph = skb_gro_network_header(skb);
> diff --git a/include/net/ipv6.h b/include/net/ipv6.h
> index 82dbdb0..7c79798 100644
> --- a/include/net/ipv6.h
> +++ b/include/net/ipv6.h
> @@ -707,25 +707,6 @@ static inline void iph_to_flow_copy_v6addrs(struct flow_keys *flow,
> }
>
> #if IS_ENABLED(CONFIG_IPV6)
> -static inline void ip6_set_txhash(struct sock *sk)
> -{
> - struct inet_sock *inet = inet_sk(sk);
> - struct ipv6_pinfo *np = inet6_sk(sk);
> - struct flow_keys keys;
> -
> - memset(&keys, 0, sizeof(keys));
> -
> - memcpy(&keys.addrs.v6addrs.src, &np->saddr,
> - sizeof(keys.addrs.v6addrs.src));
> - memcpy(&keys.addrs.v6addrs.dst, &sk->sk_v6_daddr,
> - sizeof(keys.addrs.v6addrs.dst));
> - keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
> - keys.ports.src = inet->inet_sport;
> - keys.ports.dst = inet->inet_dport;
> -
> - sk->sk_txhash = flow_hash_from_keys(&keys);
> -}
> -
> static inline __be32 ip6_make_flowlabel(struct net *net, struct sk_buff *skb,
> __be32 flowlabel, bool autolabel)
> {
> diff --git a/include/net/sock.h b/include/net/sock.h
> index 4353ef7..fe735c4 100644
> --- a/include/net/sock.h
> +++ b/include/net/sock.h
> @@ -1687,6 +1687,14 @@ static inline void sock_graft(struct sock *sk, struct socket *parent)
> kuid_t sock_i_uid(struct sock *sk);
> unsigned long sock_i_ino(struct sock *sk);
>
> +static inline void sk_set_txhash(struct sock *sk)
> +{
> + sk->sk_txhash = prandom_u32();
> +
> + if (unlikely(!sk->sk_txhash))
> + sk->sk_txhash = 1;
> +}
> +
> static inline struct dst_entry *
> __sk_dst_get(struct sock *sk)
> {
> diff --git a/net/ipv4/datagram.c b/net/ipv4/datagram.c
> index 574fad9..f915abf 100644
> --- a/net/ipv4/datagram.c
> +++ b/net/ipv4/datagram.c
> @@ -74,7 +74,7 @@ int __ip4_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len
> inet->inet_daddr = fl4->daddr;
> inet->inet_dport = usin->sin_port;
> sk->sk_state = TCP_ESTABLISHED;
> - inet_set_txhash(sk);
> + sk_set_txhash(sk);
> inet->inet_id = jiffies;
>
> sk_dst_set(sk, &rt->dst);
> diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
> index 486ba96..d27eb54 100644
> --- a/net/ipv4/tcp_ipv4.c
> +++ b/net/ipv4/tcp_ipv4.c
> @@ -222,7 +222,7 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
> if (err)
> goto failure;
>
> - inet_set_txhash(sk);
> + sk_set_txhash(sk);
>
> rt = ip_route_newports(fl4, rt, orig_sport, orig_dport,
> inet->inet_sport, inet->inet_dport, sk);
> @@ -1277,7 +1277,7 @@ struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
> newinet->mc_ttl = ip_hdr(skb)->ttl;
> newinet->rcv_tos = ip_hdr(skb)->tos;
> inet_csk(newsk)->icsk_ext_hdr_len = 0;
> - inet_set_txhash(newsk);
> + sk_set_txhash(newsk);
> if (inet_opt)
> inet_csk(newsk)->icsk_ext_hdr_len = inet_opt->opt.optlen;
> newinet->inet_id = newtp->write_seq ^ jiffies;
> diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c
> index 2572a32..9aadd57 100644
> --- a/net/ipv6/datagram.c
> +++ b/net/ipv6/datagram.c
> @@ -199,7 +199,7 @@ ipv4_connected:
> NULL);
>
> sk->sk_state = TCP_ESTABLISHED;
> - ip6_set_txhash(sk);
> + sk_set_txhash(sk);
> out:
> fl6_sock_release(flowlabel);
> return err;
> diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
> index d540846..52dd0d9 100644
> --- a/net/ipv6/tcp_ipv6.c
> +++ b/net/ipv6/tcp_ipv6.c
> @@ -276,7 +276,7 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
> if (err)
> goto late_failure;
>
> - ip6_set_txhash(sk);
> + sk_set_txhash(sk);
>
> if (!tp->write_seq && likely(!tp->repair))
> tp->write_seq = secure_tcpv6_sequence_number(np->saddr.s6_addr32,
> @@ -1090,7 +1090,7 @@ static struct sock *tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
> newsk->sk_v6_rcv_saddr = ireq->ir_v6_loc_addr;
> newsk->sk_bound_dev_if = ireq->ir_iif;
>
> - ip6_set_txhash(newsk);
> + sk_set_txhash(newsk);
>
> /* Now IPv6 options...
>
^ permalink raw reply [flat|nested] 15+ messages in thread
* Re: [net-next,1/2] net: Set sk_txhash from a random number
2015-12-08 8:33 ` [net-next,1/2] " Alexander Drozdov
@ 2015-12-08 13:15 ` Eric Dumazet
2015-12-08 16:33 ` Tom Herbert
0 siblings, 1 reply; 15+ messages in thread
From: Eric Dumazet @ 2015-12-08 13:15 UTC (permalink / raw)
To: Alexander Drozdov; +Cc: Tom Herbert, davem, netdev, kernel-team
On Tue, 2015-12-08 at 11:33 +0300, Alexander Drozdov wrote:
> 29.07.2015 02:02, Tom Herbert wrote:
> > This patch creates sk_set_txhash and eliminates protocol specific
> > inet_set_txhash and ip6_set_txhash. sk_set_txhash simply sets a
> > random number instead of performing flow dissection. sk_set_txash
> > is also allowed to be called multiple times for the same socket,
> > we'll need this when redoing the hash for negative routing advice.
> It seems that this patch and some previous txhash-related
> ones break af_packet hash features for outgoing packets:
> - PACKET_FANOUT_HASH
> - TP_FT_REQ_FILL_RXHASH
>
> af_packet now thinks that hashes for for incoming and outgoing
> packets of the same TCP stream differ. That is true for TCP
> sessions initiated by the host.
There never has been such guarantee. Even rx hashes for a single TCP
flow can differ, if packets are received on two different NIC with
different RSSS keys.
^ permalink raw reply [flat|nested] 15+ messages in thread
* Re: [net-next,1/2] net: Set sk_txhash from a random number
2015-12-08 13:15 ` Eric Dumazet
@ 2015-12-08 16:33 ` Tom Herbert
2015-12-09 11:14 ` Alexander Drozdov
0 siblings, 1 reply; 15+ messages in thread
From: Tom Herbert @ 2015-12-08 16:33 UTC (permalink / raw)
To: Eric Dumazet
Cc: Alexander Drozdov, David S. Miller,
Linux Kernel Network Developers, Kernel Team
On Tue, Dec 8, 2015 at 5:15 AM, Eric Dumazet <eric.dumazet@gmail.com> wrote:
> On Tue, 2015-12-08 at 11:33 +0300, Alexander Drozdov wrote:
>> 29.07.2015 02:02, Tom Herbert wrote:
>> > This patch creates sk_set_txhash and eliminates protocol specific
>> > inet_set_txhash and ip6_set_txhash. sk_set_txhash simply sets a
>> > random number instead of performing flow dissection. sk_set_txash
>> > is also allowed to be called multiple times for the same socket,
>> > we'll need this when redoing the hash for negative routing advice.
>> It seems that this patch and some previous txhash-related
>> ones break af_packet hash features for outgoing packets:
>> - PACKET_FANOUT_HASH
>> - TP_FT_REQ_FILL_RXHASH
>>
>> af_packet now thinks that hashes for for incoming and outgoing
>> packets of the same TCP stream differ. That is true for TCP
>> sessions initiated by the host.
>
> There never has been such guarantee. Even rx hashes for a single TCP
> flow can differ, if packets are received on two different NIC with
> different RSSS keys.
>
+1, it is a salient property that hashes can differ in each direction
for a flow and that the hash for a flow can change over time.
>
>
^ permalink raw reply [flat|nested] 15+ messages in thread
* Re: [net-next,1/2] net: Set sk_txhash from a random number
2015-12-08 16:33 ` Tom Herbert
@ 2015-12-09 11:14 ` Alexander Drozdov
0 siblings, 0 replies; 15+ messages in thread
From: Alexander Drozdov @ 2015-12-09 11:14 UTC (permalink / raw)
To: Tom Herbert, Eric Dumazet
Cc: David S. Miller, Linux Kernel Network Developers, Kernel Team
08.12.2015 19:33, Tom Herbert wrote:
> On Tue, Dec 8, 2015 at 5:15 AM, Eric Dumazet <eric.dumazet@gmail.com> wrote:
>> There never has been such guarantee. Even rx hashes for a single TCP
>> flow can differ, if packets are received on two different NIC with
>> different RSSS keys.
>>
> +1, it is a salient property that hashes can differ in each direction
> for a flow and that the hash for a flow can change over time.
Thanks! I'll then try to move onto BPF fanout.
^ permalink raw reply [flat|nested] 15+ messages in thread
end of thread, other threads:[~2015-12-09 11:14 UTC | newest]
Thread overview: 15+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2015-07-28 23:02 [PATCH net-next 0/2] net: Initialize sk_hash to random value and reset for failing cnxs Tom Herbert
2015-07-28 23:02 ` [PATCH net-next 1/2] net: Set sk_txhash from a random number Tom Herbert
2015-07-29 9:13 ` Thomas Graf
2015-07-29 9:29 ` Eric Dumazet
2015-07-29 9:54 ` Thomas Graf
2015-07-29 10:06 ` Eric Dumazet
2015-07-29 10:47 ` Thomas Graf
2015-07-29 15:58 ` Tom Herbert
2015-07-29 20:02 ` Thomas Graf
2015-12-08 8:33 ` [net-next,1/2] " Alexander Drozdov
2015-12-08 13:15 ` Eric Dumazet
2015-12-08 16:33 ` Tom Herbert
2015-12-09 11:14 ` Alexander Drozdov
2015-07-28 23:02 ` [PATCH net-next 2/2] net: Recompute sk_txhash on negative routing advice Tom Herbert
2015-07-30 5:44 ` [PATCH net-next 0/2] net: Initialize sk_hash to random value and reset for failing cnxs David Miller
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).