* [RFT Patch net 1/2] ipv6: invalidate the socket cached route on pmtu events if possible
@ 2016-03-24 19:25 Cong Wang
2016-03-24 19:25 ` [RFT Patch net 2/2] ipv6: add a socket release callback for datagram sockets Cong Wang
2016-03-24 20:35 ` [RFT Patch net 1/2] ipv6: invalidate the socket cached route on pmtu events if possible Eric Dumazet
0 siblings, 2 replies; 10+ messages in thread
From: Cong Wang @ 2016-03-24 19:25 UTC (permalink / raw)
To: netdev
Cc: weiwan, Cong Wang, Steffen Klassert, Martin KaFai Lau,
Hannes Frederic Sowa, Julian Anastasov
Similar to commit 9cb3a50c5f63, with this patch we invalidate the
socket cached route if possible. If the socket is owened by the
user, we can't update the cached route directly.
Reported-by: Wei Wang <weiwan@google.com>
Cc: Steffen Klassert <steffen.klassert@secunet.com>
Cc: Martin KaFai Lau <kafai@fb.com>
Cc: Hannes Frederic Sowa <hannes@stressinduktion.org>
Cc: Steffen Klassert <steffen.klassert@secunet.com>
Cc: Julian Anastasov <ja@ssi.bg>
Signed-off-by: Cong Wang <xiyou.wangcong@gmail.com>
---
net/ipv6/route.c | 74 +++++++++++++++++++++++++++++++++++++++++++++++++-------
1 file changed, 65 insertions(+), 9 deletions(-)
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index ed44663..2c16cbc 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -1346,18 +1346,20 @@ static bool rt6_cache_allowed_for_pmtu(const struct rt6_info *rt)
(rt->rt6i_flags & RTF_PCPU || rt->rt6i_node);
}
-static void __ip6_rt_update_pmtu(struct dst_entry *dst, const struct sock *sk,
- const struct ipv6hdr *iph, u32 mtu)
+static struct dst_entry *__ip6_rt_update_pmtu(struct dst_entry *dst,
+ const struct sock *sk,
+ const struct ipv6hdr *iph,
+ u32 mtu, bool hold)
{
struct rt6_info *rt6 = (struct rt6_info *)dst;
if (rt6->rt6i_flags & RTF_LOCAL)
- return;
+ return dst;
dst_confirm(dst);
mtu = max_t(u32, mtu, IPV6_MIN_MTU);
if (mtu >= dst_mtu(dst))
- return;
+ return dst;
if (!rt6_cache_allowed_for_pmtu(rt6)) {
rt6_do_update_pmtu(rt6, mtu);
@@ -1372,11 +1374,13 @@ static void __ip6_rt_update_pmtu(struct dst_entry *dst, const struct sock *sk,
daddr = &sk->sk_v6_daddr;
saddr = &inet6_sk(sk)->saddr;
} else {
- return;
+ return dst;
}
nrt6 = ip6_rt_cache_alloc(rt6, daddr, saddr);
if (nrt6) {
rt6_do_update_pmtu(nrt6, mtu);
+ if (hold)
+ dst_hold(&nrt6->dst);
/* ip6_ins_rt(nrt6) will bump the
* rt6->rt6i_node->fn_sernum
@@ -1384,14 +1388,17 @@ static void __ip6_rt_update_pmtu(struct dst_entry *dst, const struct sock *sk,
* invalidate the sk->sk_dst_cache.
*/
ip6_ins_rt(nrt6);
+ return &nrt6->dst;
}
}
+
+ return dst;
}
static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
struct sk_buff *skb, u32 mtu)
{
- __ip6_rt_update_pmtu(dst, sk, skb ? ipv6_hdr(skb) : NULL, mtu);
+ __ip6_rt_update_pmtu(dst, sk, skb ? ipv6_hdr(skb) : NULL, mtu, false);
}
void ip6_update_pmtu(struct sk_buff *skb, struct net *net, __be32 mtu,
@@ -1410,15 +1417,64 @@ void ip6_update_pmtu(struct sk_buff *skb, struct net *net, __be32 mtu,
dst = ip6_route_output(net, NULL, &fl6);
if (!dst->error)
- __ip6_rt_update_pmtu(dst, NULL, iph, ntohl(mtu));
+ __ip6_rt_update_pmtu(dst, NULL, iph, ntohl(mtu), false);
dst_release(dst);
}
EXPORT_SYMBOL_GPL(ip6_update_pmtu);
void ip6_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, __be32 mtu)
{
- ip6_update_pmtu(skb, sock_net(sk), mtu,
- sk->sk_bound_dev_if, sk->sk_mark);
+ const struct ipv6hdr *iph = (struct ipv6hdr *)skb->data;
+ struct net *net = sock_net(sk);
+ struct dst_entry *ndst, *dst;
+ struct flowi6 fl6;
+ bool new = false;
+
+ memset(&fl6, 0, sizeof(fl6));
+
+ bh_lock_sock(sk);
+
+ fl6.flowi6_oif = sk->sk_bound_dev_if;
+ fl6.flowi6_mark = sk->sk_mark ? : IP6_REPLY_MARK(net, skb->mark);
+ fl6.daddr = iph->daddr;
+ fl6.saddr = iph->saddr;
+ fl6.flowlabel = ip6_flowinfo(iph);
+
+ dst = sk_dst_get(sk);
+ if (sock_owned_by_user(sk) || !dst) {
+ ip6_update_pmtu(skb, net, mtu, fl6.flowi6_oif, fl6.flowi6_mark);
+ goto out;
+ }
+
+ if (!dst_check(dst, 0)) {
+ dst_release(dst);
+ dst = ip6_route_output(net, sk, &fl6);
+ if (dst->error)
+ goto out;
+
+ new = true;
+ }
+
+ ndst = __ip6_rt_update_pmtu(dst->path, sk, iph, ntohl(mtu), true);
+ if (!dst_check(ndst, 0)) {
+ if (ndst != dst)
+ dst_release(dst);
+
+ dst = ip6_route_output(net, sk, &fl6);
+ if (dst->error)
+ goto out;
+
+ new = true;
+ } else if (ndst != dst) {
+ new = true;
+ }
+
+ if (new)
+ ip6_dst_store(sk, dst, NULL, NULL);
+
+out:
+ bh_unlock_sock(sk);
+ dst_release(dst);
}
EXPORT_SYMBOL_GPL(ip6_sk_update_pmtu);
--
2.1.0
^ permalink raw reply related [flat|nested] 10+ messages in thread* [RFT Patch net 2/2] ipv6: add a socket release callback for datagram sockets
2016-03-24 19:25 [RFT Patch net 1/2] ipv6: invalidate the socket cached route on pmtu events if possible Cong Wang
@ 2016-03-24 19:25 ` Cong Wang
2016-03-24 19:40 ` Cong Wang
2016-03-24 20:35 ` [RFT Patch net 1/2] ipv6: invalidate the socket cached route on pmtu events if possible Eric Dumazet
1 sibling, 1 reply; 10+ messages in thread
From: Cong Wang @ 2016-03-24 19:25 UTC (permalink / raw)
To: netdev
Cc: weiwan, Cong Wang, Steffen Klassert, Martin KaFai Lau,
Hannes Frederic Sowa, Julian Anastasov
Similar to commit 8141ed9fcedb, this implements a socket
release callback function to check if an IPv6 socket cached
route got invalid during the time we owned the socket.
The function is used from udp, raw sockets.
Cc: Steffen Klassert <steffen.klassert@secunet.com>
Cc: Martin KaFai Lau <kafai@fb.com>
Cc: Hannes Frederic Sowa <hannes@stressinduktion.org>
Cc: Steffen Klassert <steffen.klassert@secunet.com>
Cc: Julian Anastasov <ja@ssi.bg>
Signed-off-by: Cong Wang <xiyou.wangcong@gmail.com>
---
include/net/ipv6.h | 1 +
net/ipv6/datagram.c | 40 ++++++++++++++++++++++++++++++++++++++++
net/ipv6/raw.c | 1 +
net/ipv6/udp.c | 1 +
4 files changed, 43 insertions(+)
diff --git a/include/net/ipv6.h b/include/net/ipv6.h
index d0aeb97..890456d 100644
--- a/include/net/ipv6.h
+++ b/include/net/ipv6.h
@@ -959,6 +959,7 @@ int compat_ipv6_getsockopt(struct sock *sk, int level, int optname,
int ip6_datagram_connect(struct sock *sk, struct sockaddr *addr, int addr_len);
int ip6_datagram_connect_v6_only(struct sock *sk, struct sockaddr *addr,
int addr_len);
+void ip6_datagram_release_cb(struct sock *sk);
int ipv6_recv_error(struct sock *sk, struct msghdr *msg, int len,
int *addr_len);
diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c
index 4281621..a743caa 100644
--- a/net/ipv6/datagram.c
+++ b/net/ipv6/datagram.c
@@ -231,6 +231,46 @@ int ip6_datagram_connect_v6_only(struct sock *sk, struct sockaddr *uaddr,
}
EXPORT_SYMBOL_GPL(ip6_datagram_connect_v6_only);
+void ip6_datagram_release_cb(struct sock *sk)
+{
+ const struct inet_sock *inet = inet_sk(sk);
+ struct ipv6_pinfo *np = inet6_sk(sk);
+ struct in6_addr *final_p, final;
+ struct ipv6_txoptions *opt;
+ struct dst_entry *dst;
+ struct flowi6 fl6;
+ struct rtable *rt;
+
+ rcu_read_lock();
+
+ dst = __sk_dst_get(sk);
+ if (!dst || !dst->obsolete || dst->ops->check(dst, 0)) {
+ rcu_read_unlock();
+ return;
+ }
+
+ memset(&fl6, 0, sizeof(fl6));
+ fl6.flowi6_proto = sk->sk_protocol;
+ fl6.daddr = sk->sk_v6_daddr;
+ fl6.saddr = np->saddr;
+ fl6.flowi6_oif = sk->sk_bound_dev_if;
+ fl6.flowi6_mark = sk->sk_mark;
+ fl6.fl6_dport = inet->inet_dport;
+ fl6.fl6_sport = inet->inet_sport;
+
+ rcu_read_lock();
+ opt = rcu_dereference(np->opt);
+ final_p = fl6_update_dst(&fl6, opt, &final);
+ rcu_read_unlock();
+
+ dst = ip6_dst_lookup_flow(sk, &fl6, final_p);
+ dst = !IS_ERR(rt) ? &rt->dst : NULL;
+ sk_dst_set(sk, dst);
+
+ rcu_read_unlock();
+}
+EXPORT_SYMBOL_GPL(ip6_datagram_release_cb);
+
void ipv6_icmp_error(struct sock *sk, struct sk_buff *skb, int err,
__be16 port, u32 info, u8 *payload)
{
diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c
index fa59dd7..4319e65 100644
--- a/net/ipv6/raw.c
+++ b/net/ipv6/raw.c
@@ -1235,6 +1235,7 @@ struct proto rawv6_prot = {
.recvmsg = rawv6_recvmsg,
.bind = rawv6_bind,
.backlog_rcv = rawv6_rcv_skb,
+ .release_cb = ip6_datagram_release_cb,
.hash = raw_hash_sk,
.unhash = raw_unhash_sk,
.obj_size = sizeof(struct raw6_sock),
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index fd25e44..0fdaf8f 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -1539,6 +1539,7 @@ struct proto udpv6_prot = {
.sendmsg = udpv6_sendmsg,
.recvmsg = udpv6_recvmsg,
.backlog_rcv = __udpv6_queue_rcv_skb,
+ .release_cb = ip6_datagram_release_cb,
.hash = udp_lib_hash,
.unhash = udp_lib_unhash,
.rehash = udp_v6_rehash,
--
2.1.0
^ permalink raw reply related [flat|nested] 10+ messages in thread* Re: [RFT Patch net 2/2] ipv6: add a socket release callback for datagram sockets
2016-03-24 19:25 ` [RFT Patch net 2/2] ipv6: add a socket release callback for datagram sockets Cong Wang
@ 2016-03-24 19:40 ` Cong Wang
0 siblings, 0 replies; 10+ messages in thread
From: Cong Wang @ 2016-03-24 19:40 UTC (permalink / raw)
To: Linux Kernel Network Developers
Cc: Wei Wang, Cong Wang, Steffen Klassert, Martin KaFai Lau,
Hannes Frederic Sowa, Julian Anastasov
On Thu, Mar 24, 2016 at 12:25 PM, Cong Wang <xiyou.wangcong@gmail.com> wrote:
> + dst = ip6_dst_lookup_flow(sk, &fl6, final_p);
> + dst = !IS_ERR(rt) ? &rt->dst : NULL;
Oops, this one is clearly a copy-n-paste mistake...
^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [RFT Patch net 1/2] ipv6: invalidate the socket cached route on pmtu events if possible
2016-03-24 19:25 [RFT Patch net 1/2] ipv6: invalidate the socket cached route on pmtu events if possible Cong Wang
2016-03-24 19:25 ` [RFT Patch net 2/2] ipv6: add a socket release callback for datagram sockets Cong Wang
@ 2016-03-24 20:35 ` Eric Dumazet
2016-03-24 21:38 ` Wei Wang
2016-03-25 0:15 ` Cong Wang
1 sibling, 2 replies; 10+ messages in thread
From: Eric Dumazet @ 2016-03-24 20:35 UTC (permalink / raw)
To: Cong Wang
Cc: netdev, weiwan, Steffen Klassert, Martin KaFai Lau,
Hannes Frederic Sowa, Julian Anastasov
On Thu, 2016-03-24 at 12:25 -0700, Cong Wang wrote:
> void ip6_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, __be32 mtu)
> {
> - ip6_update_pmtu(skb, sock_net(sk), mtu,
> - sk->sk_bound_dev_if, sk->sk_mark);
> + const struct ipv6hdr *iph = (struct ipv6hdr *)skb->data;
> + struct net *net = sock_net(sk);
> + struct dst_entry *ndst, *dst;
> + struct flowi6 fl6;
> + bool new = false;
> +
> + memset(&fl6, 0, sizeof(fl6));
> +
> + bh_lock_sock(sk);
> +
This is not clear why you need to acquire socket lock.
sk_dst_cache is protected by an atomic operation.
udp for example calls ip6_dst_store() without socket being locked.
^ permalink raw reply [flat|nested] 10+ messages in thread* Re: [RFT Patch net 1/2] ipv6: invalidate the socket cached route on pmtu events if possible
2016-03-24 20:35 ` [RFT Patch net 1/2] ipv6: invalidate the socket cached route on pmtu events if possible Eric Dumazet
@ 2016-03-24 21:38 ` Wei Wang
2016-03-25 0:15 ` Cong Wang
1 sibling, 0 replies; 10+ messages in thread
From: Wei Wang @ 2016-03-24 21:38 UTC (permalink / raw)
To: Cong Wang
Cc: Linux Kernel Network Developers, Wei Wang, Steffen Klassert,
Martin KaFai Lau, Hannes Frederic Sowa, Julian Anastasov,
Eric Dumazet
> + if (new)
> + ip6_dst_store(sk, dst, NULL, NULL);
> +
I read through the code for ip6_dst_store(), the last 2 params are for
sk->daddr_cache and sk->saddr_cache.
Those are examined and used in ip6_sk_dst_lookup_flow(). If those 2
(mostly only check sk->daddr_cache) are valid, it will try to use
sk->sk_dst_cache and avoid ip6_route_output() call. If we put it as
NULL, it is guaranteed to not pass the validation check and do
ip6_route_output() lookup.
So we probably don't want to use NULL but something similar as used in
udpv6_sendmsg() like the following?
ip6_dst_store(sk, dst,
ipv6_addr_equal(&fl6.daddr, &sk->sk_v6_daddr) ?
&sk->sk_v6_daddr : NULL,
#ifdef CONFIG_IPV6_SUBTREES
ipv6_addr_equal(&fl6.saddr, &np->saddr) ?
&np->saddr :
#endif
NULL);
Thanks.
Wei
^ permalink raw reply [flat|nested] 10+ messages in thread* Re: [RFT Patch net 1/2] ipv6: invalidate the socket cached route on pmtu events if possible
2016-03-24 20:35 ` [RFT Patch net 1/2] ipv6: invalidate the socket cached route on pmtu events if possible Eric Dumazet
2016-03-24 21:38 ` Wei Wang
@ 2016-03-25 0:15 ` Cong Wang
2016-03-25 1:51 ` Eric Dumazet
1 sibling, 1 reply; 10+ messages in thread
From: Cong Wang @ 2016-03-25 0:15 UTC (permalink / raw)
To: Eric Dumazet
Cc: Linux Kernel Network Developers, Wei Wang, Steffen Klassert,
Martin KaFai Lau, Hannes Frederic Sowa, Julian Anastasov
On Thu, Mar 24, 2016 at 1:35 PM, Eric Dumazet <eric.dumazet@gmail.com> wrote:
> On Thu, 2016-03-24 at 12:25 -0700, Cong Wang wrote:
>
>> void ip6_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, __be32 mtu)
>> {
>> - ip6_update_pmtu(skb, sock_net(sk), mtu,
>> - sk->sk_bound_dev_if, sk->sk_mark);
>> + const struct ipv6hdr *iph = (struct ipv6hdr *)skb->data;
>> + struct net *net = sock_net(sk);
>> + struct dst_entry *ndst, *dst;
>> + struct flowi6 fl6;
>> + bool new = false;
>> +
>> + memset(&fl6, 0, sizeof(fl6));
>> +
>> + bh_lock_sock(sk);
>> +
>
>
> This is not clear why you need to acquire socket lock.
>
> sk_dst_cache is protected by an atomic operation.
>
> udp for example calls ip6_dst_store() without socket being locked.
My understanding is that bh_lock_sock() prevents concurrent
access to sock struct. Since this is in softirq context, multiple
CPU's could call this function concurrently, the whole pmtu
update needs to be done atomically.
UDP, on the other hand, doesn't do this logic, it just looks up
for dst and save it in sk_dst_cache.
^ permalink raw reply [flat|nested] 10+ messages in thread* Re: [RFT Patch net 1/2] ipv6: invalidate the socket cached route on pmtu events if possible
2016-03-25 0:15 ` Cong Wang
@ 2016-03-25 1:51 ` Eric Dumazet
2016-03-25 17:17 ` Cong Wang
0 siblings, 1 reply; 10+ messages in thread
From: Eric Dumazet @ 2016-03-25 1:51 UTC (permalink / raw)
To: Cong Wang
Cc: Linux Kernel Network Developers, Wei Wang, Steffen Klassert,
Martin KaFai Lau, Hannes Frederic Sowa, Julian Anastasov
On Thu, 2016-03-24 at 17:15 -0700, Cong Wang wrote:
> My understanding is that bh_lock_sock() prevents concurrent
> access to sock struct. Since this is in softirq context, multiple
> CPU's could call this function concurrently, the whole pmtu
> update needs to be done atomically.
>
> UDP, on the other hand, doesn't do this logic, it just looks up
> for dst and save it in sk_dst_cache.
Two ICMP messages processed on two different cpus can still get two
different sockets pointing to the same dst.
I do not see how dst pmtu update could be protected by a lock on each
socket. It would require a dst lock , or some simple memory writes that
do not require special synchro.
^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [RFT Patch net 1/2] ipv6: invalidate the socket cached route on pmtu events if possible
2016-03-25 1:51 ` Eric Dumazet
@ 2016-03-25 17:17 ` Cong Wang
2016-03-25 18:11 ` Eric Dumazet
0 siblings, 1 reply; 10+ messages in thread
From: Cong Wang @ 2016-03-25 17:17 UTC (permalink / raw)
To: Eric Dumazet
Cc: Linux Kernel Network Developers, Wei Wang, Steffen Klassert,
Martin KaFai Lau, Hannes Frederic Sowa, Julian Anastasov
On Thu, Mar 24, 2016 at 6:51 PM, Eric Dumazet <eric.dumazet@gmail.com> wrote:
> On Thu, 2016-03-24 at 17:15 -0700, Cong Wang wrote:
>
>> My understanding is that bh_lock_sock() prevents concurrent
>> access to sock struct. Since this is in softirq context, multiple
>> CPU's could call this function concurrently, the whole pmtu
>> update needs to be done atomically.
>>
>> UDP, on the other hand, doesn't do this logic, it just looks up
>> for dst and save it in sk_dst_cache.
>
> Two ICMP messages processed on two different cpus can still get two
> different sockets pointing to the same dst.
>
Yeah, and dst is refcounted so a shared dst will not be freed by both.
> I do not see how dst pmtu update could be protected by a lock on each
> socket. It would require a dst lock , or some simple memory writes that
> do not require special synchro.
I am lost here because you removed the dst lock in ipv4_sk_update_pmtu():
commit 7f502361531e9eecb396cf99bdc9e9a59f7ebd7f
Author: Eric Dumazet <edumazet@google.com>
Date: Mon Jun 30 01:26:23 2014 -0700
ipv4: irq safe sk_dst_[re]set() and ipv4_sk_update_pmtu() fix
We have two different ways to handle changes to sk->sk_dst
First way (used by TCP) assumes socket lock is owned by caller, and use
no extra lock : __sk_dst_set() & __sk_dst_reset()
Another way (used by UDP) uses sk_dst_lock because socket lock is not
always taken. Note that sk_dst_lock is not softirq safe.
These ways are not inter changeable for a given socket type.
ipv4_sk_update_pmtu(), added in linux-3.8, added a race, as it used
the socket lock as synchronization, but users might be UDP sockets.
Instead of converting sk_dst_lock to a softirq safe version, use xchg()
as we did for sk_rx_dst in commit e47eb5dfb296b ("udp: ipv4: do not use
sk_dst_lock from softirq context")
In a follow up patch, we probably can remove sk_dst_lock, as it is
only used in IPv6.
My understand is that:
1) sock lock protects the whole update: the whole check, update, recheck,
set logic, to make sure another CPU will not do the same to the same socket
at the same time.
2) the dst itself is safe, because it is always refcounted, and we use xchg()
to change the pointer in sk_dst_cache.
Or am I still missing anything here?
Thanks.
^ permalink raw reply [flat|nested] 10+ messages in thread* Re: [RFT Patch net 1/2] ipv6: invalidate the socket cached route on pmtu events if possible
2016-03-25 17:17 ` Cong Wang
@ 2016-03-25 18:11 ` Eric Dumazet
2016-03-28 22:34 ` Cong Wang
0 siblings, 1 reply; 10+ messages in thread
From: Eric Dumazet @ 2016-03-25 18:11 UTC (permalink / raw)
To: Cong Wang
Cc: Linux Kernel Network Developers, Wei Wang, Steffen Klassert,
Martin KaFai Lau, Hannes Frederic Sowa, Julian Anastasov
On Fri, 2016-03-25 at 10:17 -0700, Cong Wang wrote:
> 1) sock lock protects the whole update: the whole check, update, recheck,
> set logic, to make sure another CPU will not do the same to the same socket
> at the same time.
>
> 2) the dst itself is safe, because it is always refcounted, and we use xchg()
> to change the pointer in sk_dst_cache.
>
> Or am I still missing anything here?
As TCP always lock the socket before doing its heavy stuff,
it can use a variant of sk_dst_cache manipulations that do not use extra
atomic operations.
But UDP gets xchg() to safely exchange sk_dst_cache, because we do not
feel locking the socket is needed for UDP for typical uses (! cork)
If you hold the socket lock in ICMP handler, then it would be
inconsistent with udp sendmsg() where we do not hold the socket lock.
Since I believe udp sendmsg() is fine, I do believe you do not need to
lock the socket, and then care about socket being owned by the user.
^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [RFT Patch net 1/2] ipv6: invalidate the socket cached route on pmtu events if possible
2016-03-25 18:11 ` Eric Dumazet
@ 2016-03-28 22:34 ` Cong Wang
0 siblings, 0 replies; 10+ messages in thread
From: Cong Wang @ 2016-03-28 22:34 UTC (permalink / raw)
To: Eric Dumazet
Cc: Linux Kernel Network Developers, Wei Wang, Steffen Klassert,
Martin KaFai Lau, Hannes Frederic Sowa, Julian Anastasov
On Fri, Mar 25, 2016 at 11:11 AM, Eric Dumazet <eric.dumazet@gmail.com> wrote:
> On Fri, 2016-03-25 at 10:17 -0700, Cong Wang wrote:
>
>> 1) sock lock protects the whole update: the whole check, update, recheck,
>> set logic, to make sure another CPU will not do the same to the same socket
>> at the same time.
>>
>> 2) the dst itself is safe, because it is always refcounted, and we use xchg()
>> to change the pointer in sk_dst_cache.
>>
>> Or am I still missing anything here?
>
> As TCP always lock the socket before doing its heavy stuff,
> it can use a variant of sk_dst_cache manipulations that do not use extra
> atomic operations.
>
> But UDP gets xchg() to safely exchange sk_dst_cache, because we do not
> feel locking the socket is needed for UDP for typical uses (! cork)
>
> If you hold the socket lock in ICMP handler, then it would be
> inconsistent with udp sendmsg() where we do not hold the socket lock.
>
> Since I believe udp sendmsg() is fine, I do believe you do not need to
> lock the socket, and then care about socket being owned by the user.
I see, seems the whole update logic is safe to become lock-free, then
commit 8141ed9fcedb278f4a3a78680591bef1e55f75fb can be just
reverted.
OTOH, other bh_lock_sock() callers need it to update queues etc.,
here we only need to check and update one single pointer in sk.
Steffen?
Thanks.
^ permalink raw reply [flat|nested] 10+ messages in thread
end of thread, other threads:[~2016-03-28 22:34 UTC | newest]
Thread overview: 10+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2016-03-24 19:25 [RFT Patch net 1/2] ipv6: invalidate the socket cached route on pmtu events if possible Cong Wang
2016-03-24 19:25 ` [RFT Patch net 2/2] ipv6: add a socket release callback for datagram sockets Cong Wang
2016-03-24 19:40 ` Cong Wang
2016-03-24 20:35 ` [RFT Patch net 1/2] ipv6: invalidate the socket cached route on pmtu events if possible Eric Dumazet
2016-03-24 21:38 ` Wei Wang
2016-03-25 0:15 ` Cong Wang
2016-03-25 1:51 ` Eric Dumazet
2016-03-25 17:17 ` Cong Wang
2016-03-25 18:11 ` Eric Dumazet
2016-03-28 22:34 ` Cong Wang
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).