* [PATCH 1/2] ipv4: Invalidate the socket cached route on pmtu events if possible
@ 2013-01-21 11:59 Steffen Klassert
2013-01-21 12:00 ` [PATCH 2/2] ipv4: Add a socket release callback for datagram sockets Steffen Klassert
` (2 more replies)
0 siblings, 3 replies; 7+ messages in thread
From: Steffen Klassert @ 2013-01-21 11:59 UTC (permalink / raw)
To: David Miller; +Cc: Yurij M. Plotnikov, Julian Anastasov, netdev
The route lookup in ipv4_sk_update_pmtu() might return a route
different from the route we cached at the socket. This is because
standart routes are per cpu, so each cpu has it's own struct rtable.
This means that we do not invalidate the socket cached route if the
NET_RX_SOFTIRQ is not served by the same cpu that the sending socket
uses. As a result, the cached route reused until we disconnect.
With this patch we invalidate the socket cached route if possible.
If the socket is owened by the user, we can't update the cached
route directly. A followup patch will implement socket release
callback functions for datagram sockets to handle this case.
Reported-by: Yurij M. Plotnikov <Yurij.Plotnikov@oktetlabs.ru>
Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
---
net/ipv4/route.c | 42 +++++++++++++++++++++++++++++++++++++++++-
1 file changed, 41 insertions(+), 1 deletion(-)
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 259cbee..132737a 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -965,7 +965,7 @@ void ipv4_update_pmtu(struct sk_buff *skb, struct net *net, u32 mtu,
}
EXPORT_SYMBOL_GPL(ipv4_update_pmtu);
-void ipv4_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, u32 mtu)
+static void __ipv4_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, u32 mtu)
{
const struct iphdr *iph = (const struct iphdr *) skb->data;
struct flowi4 fl4;
@@ -978,6 +978,46 @@ void ipv4_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, u32 mtu)
ip_rt_put(rt);
}
}
+
+void ipv4_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, u32 mtu)
+{
+ const struct iphdr *iph = (const struct iphdr *) skb->data;
+ struct flowi4 fl4;
+ struct rtable *rt;
+ struct dst_entry *dst;
+
+ bh_lock_sock(sk);
+ rt = (struct rtable *) __sk_dst_get(sk);
+
+ if (sock_owned_by_user(sk) || !rt) {
+ __ipv4_sk_update_pmtu(skb, sk, mtu);
+ goto out;
+ }
+
+ __build_flow_key(&fl4, sk, iph, 0, 0, 0, 0, 0);
+
+ if (!__sk_dst_check(sk, 0)) {
+ rt = ip_route_output_flow(sock_net(sk), &fl4, sk);
+ if (IS_ERR(rt))
+ goto out;
+ }
+
+ __ip_rt_update_pmtu((struct rtable *) rt->dst.path, &fl4, mtu);
+
+ dst = dst_check(&rt->dst, 0);
+ if (!dst) {
+ rt = ip_route_output_flow(sock_net(sk), &fl4, sk);
+ if (IS_ERR(rt))
+ goto out;
+
+ dst = &rt->dst;
+ }
+
+ __sk_dst_set(sk, dst);
+
+out:
+ bh_unlock_sock(sk);
+}
EXPORT_SYMBOL_GPL(ipv4_sk_update_pmtu);
void ipv4_redirect(struct sk_buff *skb, struct net *net,
--
1.7.9.5
^ permalink raw reply related [flat|nested] 7+ messages in thread
* [PATCH 2/2] ipv4: Add a socket release callback for datagram sockets
2013-01-21 11:59 [PATCH 1/2] ipv4: Invalidate the socket cached route on pmtu events if possible Steffen Klassert
@ 2013-01-21 12:00 ` Steffen Klassert
2013-01-21 19:18 ` David Miller
2013-01-21 19:18 ` [PATCH 1/2] ipv4: Invalidate the socket cached route on pmtu events if possible David Miller
2013-01-21 21:18 ` Julian Anastasov
2 siblings, 1 reply; 7+ messages in thread
From: Steffen Klassert @ 2013-01-21 12:00 UTC (permalink / raw)
To: David Miller; +Cc: Yurij M. Plotnikov, Julian Anastasov, netdev
This implements a socket release callback function to check
if the socket cached route got invalid during the time
we owned the socket. The function is used from udp, raw
and ping sockets.
Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
---
include/net/ip.h | 2 ++
net/ipv4/datagram.c | 25 +++++++++++++++++++++++++
net/ipv4/ping.c | 1 +
net/ipv4/raw.c | 1 +
net/ipv4/udp.c | 1 +
5 files changed, 30 insertions(+)
diff --git a/include/net/ip.h b/include/net/ip.h
index 0707fb9..a68f838 100644
--- a/include/net/ip.h
+++ b/include/net/ip.h
@@ -143,6 +143,8 @@ static inline struct sk_buff *ip_finish_skb(struct sock *sk, struct flowi4 *fl4)
extern int ip4_datagram_connect(struct sock *sk,
struct sockaddr *uaddr, int addr_len);
+extern void ip4_datagram_release_cb(struct sock *sk);
+
struct ip_reply_arg {
struct kvec iov[1];
int flags;
diff --git a/net/ipv4/datagram.c b/net/ipv4/datagram.c
index 424fafb..b28e863 100644
--- a/net/ipv4/datagram.c
+++ b/net/ipv4/datagram.c
@@ -85,3 +85,28 @@ out:
return err;
}
EXPORT_SYMBOL(ip4_datagram_connect);
+
+void ip4_datagram_release_cb(struct sock *sk)
+{
+ const struct inet_sock *inet = inet_sk(sk);
+ const struct ip_options_rcu *inet_opt;
+ __be32 daddr = inet->inet_daddr;
+ struct flowi4 fl4;
+ struct rtable *rt;
+
+ if (! __sk_dst_get(sk) || __sk_dst_check(sk, 0))
+ return;
+
+ rcu_read_lock();
+ inet_opt = rcu_dereference(inet->inet_opt);
+ if (inet_opt && inet_opt->opt.srr)
+ daddr = inet_opt->opt.faddr;
+ rt = ip_route_output_ports(sock_net(sk), &fl4, sk, daddr,
+ inet->inet_saddr, inet->inet_dport,
+ inet->inet_sport, sk->sk_protocol,
+ RT_CONN_FLAGS(sk), sk->sk_bound_dev_if);
+ if (!IS_ERR(rt))
+ __sk_dst_set(sk, &rt->dst);
+ rcu_read_unlock();
+}
+EXPORT_SYMBOL_GPL(ip4_datagram_release_cb);
diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c
index 8f3d054..6f9c072 100644
--- a/net/ipv4/ping.c
+++ b/net/ipv4/ping.c
@@ -738,6 +738,7 @@ struct proto ping_prot = {
.recvmsg = ping_recvmsg,
.bind = ping_bind,
.backlog_rcv = ping_queue_rcv_skb,
+ .release_cb = ip4_datagram_release_cb,
.hash = ping_v4_hash,
.unhash = ping_v4_unhash,
.get_port = ping_v4_get_port,
diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c
index 73d1e4d..6f08991 100644
--- a/net/ipv4/raw.c
+++ b/net/ipv4/raw.c
@@ -894,6 +894,7 @@ struct proto raw_prot = {
.recvmsg = raw_recvmsg,
.bind = raw_bind,
.backlog_rcv = raw_rcv_skb,
+ .release_cb = ip4_datagram_release_cb,
.hash = raw_hash_sk,
.unhash = raw_unhash_sk,
.obj_size = sizeof(struct raw_sock),
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 79c8dbe..1f4d405 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -1952,6 +1952,7 @@ struct proto udp_prot = {
.recvmsg = udp_recvmsg,
.sendpage = udp_sendpage,
.backlog_rcv = __udp_queue_rcv_skb,
+ .release_cb = ip4_datagram_release_cb,
.hash = udp_lib_hash,
.unhash = udp_lib_unhash,
.rehash = udp_v4_rehash,
--
1.7.9.5
^ permalink raw reply related [flat|nested] 7+ messages in thread
* Re: [PATCH 1/2] ipv4: Invalidate the socket cached route on pmtu events if possible
2013-01-21 11:59 [PATCH 1/2] ipv4: Invalidate the socket cached route on pmtu events if possible Steffen Klassert
2013-01-21 12:00 ` [PATCH 2/2] ipv4: Add a socket release callback for datagram sockets Steffen Klassert
@ 2013-01-21 19:18 ` David Miller
2013-01-21 21:18 ` Julian Anastasov
2 siblings, 0 replies; 7+ messages in thread
From: David Miller @ 2013-01-21 19:18 UTC (permalink / raw)
To: steffen.klassert; +Cc: Yurij.Plotnikov, ja, netdev
From: Steffen Klassert <steffen.klassert@secunet.com>
Date: Mon, 21 Jan 2013 12:59:11 +0100
> The route lookup in ipv4_sk_update_pmtu() might return a route
> different from the route we cached at the socket. This is because
> standart routes are per cpu, so each cpu has it's own struct rtable.
> This means that we do not invalidate the socket cached route if the
> NET_RX_SOFTIRQ is not served by the same cpu that the sending socket
> uses. As a result, the cached route reused until we disconnect.
>
> With this patch we invalidate the socket cached route if possible.
> If the socket is owened by the user, we can't update the cached
> route directly. A followup patch will implement socket release
> callback functions for datagram sockets to handle this case.
>
> Reported-by: Yurij M. Plotnikov <Yurij.Plotnikov@oktetlabs.ru>
> Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
Applied.
^ permalink raw reply [flat|nested] 7+ messages in thread
* Re: [PATCH 2/2] ipv4: Add a socket release callback for datagram sockets
2013-01-21 12:00 ` [PATCH 2/2] ipv4: Add a socket release callback for datagram sockets Steffen Klassert
@ 2013-01-21 19:18 ` David Miller
0 siblings, 0 replies; 7+ messages in thread
From: David Miller @ 2013-01-21 19:18 UTC (permalink / raw)
To: steffen.klassert; +Cc: Yurij.Plotnikov, ja, netdev
From: Steffen Klassert <steffen.klassert@secunet.com>
Date: Mon, 21 Jan 2013 13:00:03 +0100
> This implements a socket release callback function to check
> if the socket cached route got invalid during the time
> we owned the socket. The function is used from udp, raw
> and ping sockets.
>
> Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
Applied.
^ permalink raw reply [flat|nested] 7+ messages in thread
* Re: [PATCH 1/2] ipv4: Invalidate the socket cached route on pmtu events if possible
2013-01-21 11:59 [PATCH 1/2] ipv4: Invalidate the socket cached route on pmtu events if possible Steffen Klassert
2013-01-21 12:00 ` [PATCH 2/2] ipv4: Add a socket release callback for datagram sockets Steffen Klassert
2013-01-21 19:18 ` [PATCH 1/2] ipv4: Invalidate the socket cached route on pmtu events if possible David Miller
@ 2013-01-21 21:18 ` Julian Anastasov
2013-01-22 8:42 ` Steffen Klassert
2 siblings, 1 reply; 7+ messages in thread
From: Julian Anastasov @ 2013-01-21 21:18 UTC (permalink / raw)
To: Steffen Klassert; +Cc: David Miller, Yurij M. Plotnikov, netdev
Hello,
On Mon, 21 Jan 2013, Steffen Klassert wrote:
> The route lookup in ipv4_sk_update_pmtu() might return a route
> different from the route we cached at the socket. This is because
> standart routes are per cpu, so each cpu has it's own struct rtable.
> This means that we do not invalidate the socket cached route if the
> NET_RX_SOFTIRQ is not served by the same cpu that the sending socket
> uses. As a result, the cached route reused until we disconnect.
>
> With this patch we invalidate the socket cached route if possible.
> If the socket is owened by the user, we can't update the cached
> route directly. A followup patch will implement socket release
> callback functions for datagram sockets to handle this case.
>
> Reported-by: Yurij M. Plotnikov <Yurij.Plotnikov@oktetlabs.ru>
> Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
> ---
> net/ipv4/route.c | 42 +++++++++++++++++++++++++++++++++++++++++-
> 1 file changed, 41 insertions(+), 1 deletion(-)
>
> diff --git a/net/ipv4/route.c b/net/ipv4/route.c
> index 259cbee..132737a 100644
> --- a/net/ipv4/route.c
> +++ b/net/ipv4/route.c
> @@ -965,7 +965,7 @@ void ipv4_update_pmtu(struct sk_buff *skb, struct net *net, u32 mtu,
> }
> EXPORT_SYMBOL_GPL(ipv4_update_pmtu);
>
> -void ipv4_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, u32 mtu)
> +static void __ipv4_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, u32 mtu)
> {
> const struct iphdr *iph = (const struct iphdr *) skb->data;
> struct flowi4 fl4;
> @@ -978,6 +978,46 @@ void ipv4_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, u32 mtu)
> ip_rt_put(rt);
> }
> }
> +
> +void ipv4_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, u32 mtu)
> +{
> + const struct iphdr *iph = (const struct iphdr *) skb->data;
> + struct flowi4 fl4;
> + struct rtable *rt;
> + struct dst_entry *dst;
> +
> + bh_lock_sock(sk);
> + rt = (struct rtable *) __sk_dst_get(sk);
I just saw another problem, sorry that
I missed it the first time. Here __sk_dst_get
does not get reference...
> +
> + if (sock_owned_by_user(sk) || !rt) {
> + __ipv4_sk_update_pmtu(skb, sk, mtu);
> + goto out;
> + }
> +
> + __build_flow_key(&fl4, sk, iph, 0, 0, 0, 0, 0);
> +
> + if (!__sk_dst_check(sk, 0)) {
> + rt = ip_route_output_flow(sock_net(sk), &fl4, sk);
> + if (IS_ERR(rt))
> + goto out;
but here rt->dst comes with reference.
> + }
May be here we can use 'else dst_hold(&rt->dst);' ?
It is needed for __sk_dst_set.
> +
> + __ip_rt_update_pmtu((struct rtable *) rt->dst.path, &fl4, mtu);
> +
> + dst = dst_check(&rt->dst, 0);
> + if (!dst) {
dst_release(&rt->dst);
> + rt = ip_route_output_flow(sock_net(sk), &fl4, sk);
> + if (IS_ERR(rt))
> + goto out;
> +
> + dst = &rt->dst;
Remove above line...
> + }
> +
and use here __sk_dst_set(sk, &rt->dst) instead:
> + __sk_dst_set(sk, dst);
Another variant is to remember with flag 'new_rt'
that we should call __sk_dst_set, eg. when rt comes from
ip_route_output_flow. By this way we can avoid some of
the dst_hold/dst_release calls if sk_dst_cache is not
changed. IIRC, according to sk_dst_check, dst_check can
not return different dst from the ->check method.
> +
> +out:
> + bh_unlock_sock(sk);
> +}
> EXPORT_SYMBOL_GPL(ipv4_sk_update_pmtu);
>
> void ipv4_redirect(struct sk_buff *skb, struct net *net,
> --
> 1.7.9.5
Regards
--
Julian Anastasov <ja@ssi.bg>
^ permalink raw reply [flat|nested] 7+ messages in thread
* Re: [PATCH 1/2] ipv4: Invalidate the socket cached route on pmtu events if possible
2013-01-21 21:18 ` Julian Anastasov
@ 2013-01-22 8:42 ` Steffen Klassert
2013-01-22 9:04 ` Julian Anastasov
0 siblings, 1 reply; 7+ messages in thread
From: Steffen Klassert @ 2013-01-22 8:42 UTC (permalink / raw)
To: Julian Anastasov; +Cc: David Miller, Yurij M. Plotnikov, netdev
On Mon, Jan 21, 2013 at 11:18:27PM +0200, Julian Anastasov wrote:
>
> Hello,
>
> On Mon, 21 Jan 2013, Steffen Klassert wrote:
>
> > +{
> > + const struct iphdr *iph = (const struct iphdr *) skb->data;
> > + struct flowi4 fl4;
> > + struct rtable *rt;
> > + struct dst_entry *dst;
> > +
> > + bh_lock_sock(sk);
> > + rt = (struct rtable *) __sk_dst_get(sk);
>
> I just saw another problem, sorry that
> I missed it the first time. Here __sk_dst_get
> does not get reference...
>
Somehow I thought that I can reuse the the refcount
from the socket, but __sk_dst_set() will release it
when we continue to use the same route.
> > +
> > + if (sock_owned_by_user(sk) || !rt) {
> > + __ipv4_sk_update_pmtu(skb, sk, mtu);
> > + goto out;
> > + }
> > +
> > + __build_flow_key(&fl4, sk, iph, 0, 0, 0, 0, 0);
> > +
> > + if (!__sk_dst_check(sk, 0)) {
> > + rt = ip_route_output_flow(sock_net(sk), &fl4, sk);
> > + if (IS_ERR(rt))
> > + goto out;
>
> but here rt->dst comes with reference.
>
> > + }
>
> May be here we can use 'else dst_hold(&rt->dst);' ?
> It is needed for __sk_dst_set.
>
> > +
> > + __ip_rt_update_pmtu((struct rtable *) rt->dst.path, &fl4, mtu);
> > +
> > + dst = dst_check(&rt->dst, 0);
> > + if (!dst) {
>
> dst_release(&rt->dst);
>
> > + rt = ip_route_output_flow(sock_net(sk), &fl4, sk);
> > + if (IS_ERR(rt))
> > + goto out;
> > +
> > + dst = &rt->dst;
>
> Remove above line...
>
> > + }
> > +
>
> and use here __sk_dst_set(sk, &rt->dst) instead:
>
> > + __sk_dst_set(sk, dst);
>
> Another variant is to remember with flag 'new_rt'
> that we should call __sk_dst_set, eg. when rt comes from
> ip_route_output_flow. By this way we can avoid some of
> the dst_hold/dst_release calls if sk_dst_cache is not
> changed. IIRC, according to sk_dst_check, dst_check can
> not return different dst from the ->check method.
>
With this variant, we also need to check the new_rt flag
before we call the dst_release() function you mentioned
above, right?
I think we should avoid refcounts whenever we can,
so I prefer this variant. I'll do a patch.
Thanks!
^ permalink raw reply [flat|nested] 7+ messages in thread
* Re: [PATCH 1/2] ipv4: Invalidate the socket cached route on pmtu events if possible
2013-01-22 8:42 ` Steffen Klassert
@ 2013-01-22 9:04 ` Julian Anastasov
0 siblings, 0 replies; 7+ messages in thread
From: Julian Anastasov @ 2013-01-22 9:04 UTC (permalink / raw)
To: Steffen Klassert; +Cc: David Miller, Yurij M. Plotnikov, netdev
Hello,
On Tue, 22 Jan 2013, Steffen Klassert wrote:
> > Another variant is to remember with flag 'new_rt'
> > that we should call __sk_dst_set, eg. when rt comes from
> > ip_route_output_flow. By this way we can avoid some of
> > the dst_hold/dst_release calls if sk_dst_cache is not
> > changed. IIRC, according to sk_dst_check, dst_check can
> > not return different dst from the ->check method.
> >
>
> With this variant, we also need to check the new_rt flag
> before we call the dst_release() function you mentioned
> above, right?
Yes, also before the dst_release we add
after the first ip_route_output_flow.
> I think we should avoid refcounts whenever we can,
> so I prefer this variant. I'll do a patch.
Yes, the flag is preferred because we should
keep sk_dst_cache until we decide to replace it with
__sk_dst_set.
Regards
--
Julian Anastasov <ja@ssi.bg>
^ permalink raw reply [flat|nested] 7+ messages in thread
end of thread, other threads:[~2013-01-22 9:00 UTC | newest]
Thread overview: 7+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2013-01-21 11:59 [PATCH 1/2] ipv4: Invalidate the socket cached route on pmtu events if possible Steffen Klassert
2013-01-21 12:00 ` [PATCH 2/2] ipv4: Add a socket release callback for datagram sockets Steffen Klassert
2013-01-21 19:18 ` David Miller
2013-01-21 19:18 ` [PATCH 1/2] ipv4: Invalidate the socket cached route on pmtu events if possible David Miller
2013-01-21 21:18 ` Julian Anastasov
2013-01-22 8:42 ` Steffen Klassert
2013-01-22 9:04 ` Julian Anastasov
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).