netdev.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: David Ahern <dsa@cumulusnetworks.com>
To: netdev@vger.kernel.org
Cc: tom@herbertland.com, David Ahern <dsa@cumulusnetworks.com>
Subject: [PATCH 2/2 v2] net: Remove VRF change to udp_sendmsg
Date: Wed,  9 Sep 2015 15:57:42 -0600	[thread overview]
Message-ID: <1441835862-41403-2-git-send-email-dsa@cumulusnetworks.com> (raw)
In-Reply-To: <1441835862-41403-1-git-send-email-dsa@cumulusnetworks.com>

Remove the VRF change in udp_sendmsg to set the source address. The VRF
driver already has access to the packet on the TX path via the dst. It
can be used to update the source address in the header. Since the VRF
device is directly associated with a table use fib_table_lookup rather
than the ip_route_output lookup functions.

Function to update source address based on similar code in OVS.

Cc: Tom Herbert <tom@herbertland.com>
Signed-off-by: David Ahern <dsa@cumulusnetworks.com>
---
v2
- use fib_table_lookup over __ip_route_output_key since VRF device
  is associated with a table

Dave: not sure if you wanted this for net or wait until net-next.

 drivers/net/vrf.c | 71 +++++++++++++++++++++++++++++++++++++++++++++++++++----
 net/ipv4/udp.c    | 18 --------------
 2 files changed, 66 insertions(+), 23 deletions(-)

diff --git a/drivers/net/vrf.c b/drivers/net/vrf.c
index e7094fbd7568..4ae0295d4c63 100644
--- a/drivers/net/vrf.c
+++ b/drivers/net/vrf.c
@@ -160,6 +160,65 @@ static netdev_tx_t vrf_process_v6_outbound(struct sk_buff *skb,
 	return NET_XMIT_DROP;
 }
 
+static void update_ipv4_saddr(struct sk_buff *skb, struct iphdr *iph,
+			      __be32 new_addr)
+{
+	int tlen = skb->len - skb_transport_offset(skb);
+
+	if (iph->protocol == IPPROTO_TCP) {
+		if (likely(tlen >= sizeof(struct tcphdr))) {
+			inet_proto_csum_replace4(&tcp_hdr(skb)->check, skb,
+						 iph->saddr, new_addr, 1);
+		}
+	} else if (iph->protocol == IPPROTO_UDP) {
+		if (likely(tlen >= sizeof(struct udphdr))) {
+			struct udphdr *uh = udp_hdr(skb);
+
+			if (uh->check || skb->ip_summed == CHECKSUM_PARTIAL) {
+				inet_proto_csum_replace4(&uh->check, skb,
+							 iph->saddr, new_addr,
+							 1);
+			if (!uh->check)
+				uh->check = CSUM_MANGLED_0;
+			}
+		}
+	}
+
+	csum_replace4(&iph->check, iph->saddr, new_addr);
+	skb_clear_hash(skb);
+	iph->saddr = new_addr;
+}
+
+static int vrf_set_ip_saddr(struct sk_buff *skb, struct net_device *dev)
+{
+	struct iphdr *ip4h = ip_hdr(skb);
+	struct flowi4 fl4 = {
+		.flowi4_oif = dev->ifindex,
+		.flowi4_iif = LOOPBACK_IFINDEX,
+		.flowi4_tos = RT_TOS(ip4h->tos),
+		.flowi4_flags = FLOWI_FLAG_ANYSRC | FLOWI_FLAG_VRFSRC,
+		.daddr = ip4h->daddr,
+	};
+	struct net_vrf *vrf = netdev_priv(dev);
+	struct net *net = dev_net(dev);
+	struct fib_result res;
+	struct fib_table *tb;
+
+	res.tclassid = 0;
+
+	rcu_read_lock();
+
+	tb = fib_get_table(net, vrf->tb_id);
+	if (tb && !fib_table_lookup(tb, &fl4, &res, FIB_LOOKUP_NOREF)) {
+		fib_select_path(net, &res, &fl4);
+		update_ipv4_saddr(skb, ip4h, fl4.saddr);
+	}
+
+	rcu_read_unlock();
+
+	return 0;
+}
+
 static int vrf_send_v4_prep(struct sk_buff *skb, struct flowi4 *fl4,
 			    struct net_device *vrf_dev)
 {
@@ -195,16 +254,12 @@ static netdev_tx_t vrf_process_v4_outbound(struct sk_buff *skb,
 		.flowi4_tos = RT_TOS(ip4h->tos),
 		.flowi4_flags = FLOWI_FLAG_ANYSRC | FLOWI_FLAG_VRFSRC,
 		.daddr = ip4h->daddr,
+		.saddr = ip4h->saddr,
 	};
 
 	if (vrf_send_v4_prep(skb, &fl4, vrf_dev))
 		goto err;
 
-	if (!ip4h->saddr) {
-		ip4h->saddr = inet_select_addr(skb_dst(skb)->dev, 0,
-					       RT_SCOPE_LINK);
-	}
-
 	ret = ip_local_out(skb);
 	if (unlikely(net_xmit_eval(ret)))
 		vrf_dev->stats.tx_errors++;
@@ -298,12 +353,18 @@ static int vrf_finish_output(struct sock *sk, struct sk_buff *skb)
 static int vrf_output(struct sock *sk, struct sk_buff *skb)
 {
 	struct net_device *dev = skb_dst(skb)->dev;
+	struct iphdr *iph = ip_hdr(skb);
 
 	IP_UPD_PO_STATS(dev_net(dev), IPSTATS_MIB_OUT, skb->len);
 
 	skb->dev = dev;
 	skb->protocol = htons(ETH_P_IP);
 
+	if (!iph->saddr && vrf_set_ip_saddr(skb, dev)) {
+		vrf_tx_error(dev, skb);
+		return -EINVAL;
+	}
+
 	return NF_HOOK_COND(NFPROTO_IPV4, NF_INET_POST_ROUTING, sk, skb,
 			    NULL, dev,
 			    vrf_finish_output,
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index c0a15e7f359f..ee3ba30f1ca5 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -1017,24 +1017,6 @@ int udp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
 
 		fl4 = &fl4_stack;
 
-		/* unconnected socket. If output device is enslaved to a VRF
-		 * device lookup source address from VRF table. This mimics
-		 * behavior of ip_route_connect{_init}.
-		 */
-		if (netif_index_is_vrf(net, ipc.oif)) {
-			flowi4_init_output(fl4, ipc.oif, sk->sk_mark, tos,
-					   RT_SCOPE_UNIVERSE, sk->sk_protocol,
-					   (flow_flags | FLOWI_FLAG_VRFSRC),
-					   faddr, saddr, dport,
-					   inet->inet_sport);
-
-			rt = ip_route_output_flow(net, fl4, sk);
-			if (!IS_ERR(rt)) {
-				saddr = fl4->saddr;
-				ip_rt_put(rt);
-			}
-		}
-
 		flowi4_init_output(fl4, ipc.oif, sk->sk_mark, tos,
 				   RT_SCOPE_UNIVERSE, sk->sk_protocol,
 				   flow_flags,
-- 
2.3.2 (Apple Git-55)

  reply	other threads:[~2015-09-09 21:57 UTC|newest]

Thread overview: 10+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2015-09-09 21:57 [PATCH net-next 1/2] net: Refactor path selection in __ip_route_output_key David Ahern
2015-09-09 21:57 ` David Ahern [this message]
2015-09-10  0:04   ` [PATCH 2/2 v2] net: Remove VRF change to udp_sendmsg Tom Herbert
2015-09-10  0:23     ` David Ahern
2015-09-10  0:51       ` Tom Herbert
2015-09-10  1:10         ` David Ahern
2015-09-10  3:20         ` David Miller
2015-09-10  3:32           ` David Ahern
2015-09-09 22:56 ` [PATCH net-next 1/2] net: Refactor path selection in __ip_route_output_key David Ahern
2015-09-10  0:00   ` David Miller

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1441835862-41403-2-git-send-email-dsa@cumulusnetworks.com \
    --to=dsa@cumulusnetworks.com \
    --cc=netdev@vger.kernel.org \
    --cc=tom@herbertland.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).