netfilter-devel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Pablo Neira Ayuso <pablo@netfilter.org>
To: netfilter-devel@vger.kernel.org
Cc: davem@davemloft.net, netdev@vger.kernel.org
Subject: [PATCH 21/34] ipvs: Add generic ensure_mtu_is_adequate to handle mixed pools
Date: Mon, 29 Sep 2014 14:39:10 +0200	[thread overview]
Message-ID: <1411994363-8451-22-git-send-email-pablo@netfilter.org> (raw)
In-Reply-To: <1411994363-8451-1-git-send-email-pablo@netfilter.org>

From: Alex Gartrell <agartrell@fb.com>

The out_rt functions check to see if the mtu is large enough for the packet
and, if not, send icmp messages (TOOBIG or DEST_UNREACH) to the source and
bail out.  We needed the ability to send ICMP from the out_rt_v6 function
and DEST_UNREACH from the out_rt function, so we just pulled it out into a
common function.

Signed-off-by: Alex Gartrell <agartrell@fb.com>
Acked-by: Julian Anastasov <ja@ssi.bg>
Signed-off-by: Simon Horman <horms@verge.net.au>
---
 net/netfilter/ipvs/ip_vs_xmit.c |   77 ++++++++++++++++++++++++++-------------
 1 file changed, 51 insertions(+), 26 deletions(-)

diff --git a/net/netfilter/ipvs/ip_vs_xmit.c b/net/netfilter/ipvs/ip_vs_xmit.c
index 034a282..fa2fdd7 100644
--- a/net/netfilter/ipvs/ip_vs_xmit.c
+++ b/net/netfilter/ipvs/ip_vs_xmit.c
@@ -213,17 +213,57 @@ static inline void maybe_update_pmtu(int skb_af, struct sk_buff *skb, int mtu)
 		ort->dst.ops->update_pmtu(&ort->dst, sk, NULL, mtu);
 }
 
+static inline bool ensure_mtu_is_adequate(int skb_af, int rt_mode,
+					  struct ip_vs_iphdr *ipvsh,
+					  struct sk_buff *skb, int mtu)
+{
+#ifdef CONFIG_IP_VS_IPV6
+	if (skb_af == AF_INET6) {
+		struct net *net = dev_net(skb_dst(skb)->dev);
+
+		if (unlikely(__mtu_check_toobig_v6(skb, mtu))) {
+			if (!skb->dev)
+				skb->dev = net->loopback_dev;
+			/* only send ICMP too big on first fragment */
+			if (!ipvsh->fragoffs)
+				icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
+			IP_VS_DBG(1, "frag needed for %pI6c\n",
+				  &ipv6_hdr(skb)->saddr);
+			return false;
+		}
+	} else
+#endif
+	{
+		struct netns_ipvs *ipvs = net_ipvs(skb_net(skb));
+
+		/* If we're going to tunnel the packet and pmtu discovery
+		 * is disabled, we'll just fragment it anyway
+		 */
+		if ((rt_mode & IP_VS_RT_MODE_TUNNEL) && !sysctl_pmtu_disc(ipvs))
+			return true;
+
+		if (unlikely(ip_hdr(skb)->frag_off & htons(IP_DF) &&
+			     skb->len > mtu && !skb_is_gso(skb))) {
+			icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED,
+				  htonl(mtu));
+			IP_VS_DBG(1, "frag needed for %pI4\n",
+				  &ip_hdr(skb)->saddr);
+			return false;
+		}
+	}
+
+	return true;
+}
+
 /* Get route to destination or remote server */
 static int
 __ip_vs_get_out_rt(int skb_af, struct sk_buff *skb, struct ip_vs_dest *dest,
-		   __be32 daddr, int rt_mode, __be32 *ret_saddr)
+		   __be32 daddr, int rt_mode, __be32 *ret_saddr,
+		   struct ip_vs_iphdr *ipvsh)
 {
 	struct net *net = dev_net(skb_dst(skb)->dev);
-	struct netns_ipvs *ipvs = net_ipvs(net);
 	struct ip_vs_dest_dst *dest_dst;
 	struct rtable *rt;			/* Route to the other host */
-	struct iphdr *iph;
-	__be16 df;
 	int mtu;
 	int local, noref = 1;
 
@@ -279,7 +319,6 @@ __ip_vs_get_out_rt(int skb_af, struct sk_buff *skb, struct ip_vs_dest *dest,
 			     " daddr=%pI4\n", &dest->addr.ip);
 		goto err_put;
 	}
-	iph = ip_hdr(skb);
 
 	if (unlikely(local)) {
 		/* skb to local stack, preserve old route */
@@ -290,7 +329,6 @@ __ip_vs_get_out_rt(int skb_af, struct sk_buff *skb, struct ip_vs_dest *dest,
 
 	if (likely(!(rt_mode & IP_VS_RT_MODE_TUNNEL))) {
 		mtu = dst_mtu(&rt->dst);
-		df = iph->frag_off & htons(IP_DF);
 	} else {
 		mtu = dst_mtu(&rt->dst) - sizeof(struct iphdr);
 		if (mtu < 68) {
@@ -298,16 +336,10 @@ __ip_vs_get_out_rt(int skb_af, struct sk_buff *skb, struct ip_vs_dest *dest,
 			goto err_put;
 		}
 		maybe_update_pmtu(skb_af, skb, mtu);
-		/* MTU check allowed? */
-		df = sysctl_pmtu_disc(ipvs) ? iph->frag_off & htons(IP_DF) : 0;
 	}
 
-	/* MTU checking */
-	if (unlikely(df && skb->len > mtu && !skb_is_gso(skb))) {
-		icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu));
-		IP_VS_DBG(1, "frag needed for %pI4\n", &iph->saddr);
+	if (!ensure_mtu_is_adequate(skb_af, rt_mode, ipvsh, skb, mtu))
 		goto err_put;
-	}
 
 	skb_dst_drop(skb);
 	if (noref) {
@@ -450,15 +482,8 @@ __ip_vs_get_out_rt_v6(int skb_af, struct sk_buff *skb, struct ip_vs_dest *dest,
 		maybe_update_pmtu(skb_af, skb, mtu);
 	}
 
-	if (unlikely(__mtu_check_toobig_v6(skb, mtu))) {
-		if (!skb->dev)
-			skb->dev = net->loopback_dev;
-		/* only send ICMP too big on first fragment */
-		if (!ipvsh->fragoffs)
-			icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
-		IP_VS_DBG(1, "frag needed for %pI6c\n", &ipv6_hdr(skb)->saddr);
+	if (!ensure_mtu_is_adequate(skb_af, rt_mode, ipvsh, skb, mtu))
 		goto err_put;
-	}
 
 	skb_dst_drop(skb);
 	if (noref) {
@@ -565,7 +590,7 @@ ip_vs_bypass_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
 
 	rcu_read_lock();
 	if (__ip_vs_get_out_rt(cp->af, skb, NULL, iph->daddr,
-			       IP_VS_RT_MODE_NON_LOCAL, NULL) < 0)
+			       IP_VS_RT_MODE_NON_LOCAL, NULL, ipvsh) < 0)
 		goto tx_error;
 
 	ip_send_check(iph);
@@ -644,7 +669,7 @@ ip_vs_nat_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
 	local = __ip_vs_get_out_rt(cp->af, skb, cp->dest, cp->daddr.ip,
 				   IP_VS_RT_MODE_LOCAL |
 				   IP_VS_RT_MODE_NON_LOCAL |
-				   IP_VS_RT_MODE_RDR, NULL);
+				   IP_VS_RT_MODE_RDR, NULL, ipvsh);
 	if (local < 0)
 		goto tx_error;
 	rt = skb_rtable(skb);
@@ -841,7 +866,7 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
 				   IP_VS_RT_MODE_LOCAL |
 				   IP_VS_RT_MODE_NON_LOCAL |
 				   IP_VS_RT_MODE_CONNECT |
-				   IP_VS_RT_MODE_TUNNEL, &saddr);
+				   IP_VS_RT_MODE_TUNNEL, &saddr, ipvsh);
 	if (local < 0)
 		goto tx_error;
 	if (local) {
@@ -1032,7 +1057,7 @@ ip_vs_dr_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
 	local = __ip_vs_get_out_rt(cp->af, skb, cp->dest, cp->daddr.ip,
 				   IP_VS_RT_MODE_LOCAL |
 				   IP_VS_RT_MODE_NON_LOCAL |
-				   IP_VS_RT_MODE_KNOWN_NH, NULL);
+				   IP_VS_RT_MODE_KNOWN_NH, NULL, ipvsh);
 	if (local < 0)
 		goto tx_error;
 	if (local) {
@@ -1137,7 +1162,7 @@ ip_vs_icmp_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
 		  IP_VS_RT_MODE_RDR : IP_VS_RT_MODE_NON_LOCAL;
 	rcu_read_lock();
 	local = __ip_vs_get_out_rt(cp->af, skb, cp->dest, cp->daddr.ip, rt_mode,
-				   NULL);
+				   NULL, iph);
 	if (local < 0)
 		goto tx_error;
 	rt = skb_rtable(skb);
-- 
1.7.10.4


  parent reply	other threads:[~2014-09-29 12:38 UTC|newest]

Thread overview: 47+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2014-09-29 12:38 [PATCH 00/34] pull request: netfilter/ipvs updates for net-next Pablo Neira Ayuso
2014-09-29 12:38 ` [PATCH 01/34] netfilter: fix compilation of masquerading without IP_NF_TARGET_MASQUERADE Pablo Neira Ayuso
2014-09-29 12:38 ` [PATCH 02/34] netfilter: nf_tables: add NFTA_MASQ_UNSPEC to nft_masq_attributes Pablo Neira Ayuso
2014-09-29 12:38 ` [PATCH 03/34] netfilter: NFT_CHAIN_NAT_IPV* is independent of NFT_NAT Pablo Neira Ayuso
2014-09-29 12:38 ` [PATCH 04/34] netfilter: masquerading needs to be independent of x_tables in Kconfig Pablo Neira Ayuso
2014-09-29 12:38 ` [PATCH 05/34] netfilter: ipset: Fix static checker warning in ip_set_core.c Pablo Neira Ayuso
2014-09-29 12:38 ` [PATCH 06/34] netfilter: ipset: Add skbinfo extension kernel support in the ipset core Pablo Neira Ayuso
2014-09-29 12:38 ` [PATCH 07/34] netfilter: ipset: Add skbinfo extension kernel support for the bitmap set types Pablo Neira Ayuso
2014-09-29 12:38 ` [PATCH 08/34] netfilter: ipset: Add skbinfo extension kernel support for the hash " Pablo Neira Ayuso
2014-09-29 12:38 ` [PATCH 09/34] netfilter: ipset: Add skbinfo extension kernel support for the list set type Pablo Neira Ayuso
2014-09-29 12:38 ` [PATCH 10/34] netfilter: ipset: Add skbinfo extension support to SET target Pablo Neira Ayuso
2014-09-29 12:39 ` [PATCH 11/34] netfilter: ipset: send nonzero skbinfo extensions only Pablo Neira Ayuso
2014-09-29 12:39 ` [PATCH 12/34] netfilter: ipset: hash:mac type added to ipset Pablo Neira Ayuso
2014-09-29 12:39 ` [PATCH 13/34] ipvs: Add simple weighted failover scheduler Pablo Neira Ayuso
2014-09-29 12:39 ` [PATCH 14/34] ipvs: Add destination address family to netlink interface Pablo Neira Ayuso
2014-09-29 12:39 ` [PATCH 15/34] ipvs: Supply destination addr family to ip_vs_{lookup_dest,find_dest} Pablo Neira Ayuso
2014-09-29 12:39 ` [PATCH 16/34] ipvs: Pass destination address family to ip_vs_trash_get_dest Pablo Neira Ayuso
2014-09-29 12:39 ` [PATCH 17/34] ipvs: Supply destination address family to ip_vs_conn_new Pablo Neira Ayuso
2014-09-29 12:39 ` [PATCH 18/34] ipvs: prevent mixing heterogeneous pools and synchronization Pablo Neira Ayuso
2014-09-29 16:17   ` Sergei Shtylyov
2014-09-30  2:21     ` Simon Horman
2014-09-29 12:39 ` [PATCH 19/34] ipvs: Pull out crosses_local_route_boundary logic Pablo Neira Ayuso
2014-09-29 12:39 ` [PATCH 20/34] ipvs: Pull out update_pmtu code Pablo Neira Ayuso
2014-09-29 12:39 ` Pablo Neira Ayuso [this message]
2014-09-29 12:39 ` [PATCH 22/34] ipvs: support ipv4 in ipv6 and ipv6 in ipv4 tunnel forwarding Pablo Neira Ayuso
2014-09-29 12:39 ` [PATCH 23/34] ipvs: address family of LBLC entry depends on svc family Pablo Neira Ayuso
2014-09-29 12:39 ` [PATCH 24/34] ipvs: address family of LBLCR " Pablo Neira Ayuso
2014-09-29 12:39 ` [PATCH 25/34] ipvs: use correct address family in scheduler logs Pablo Neira Ayuso
2014-09-29 12:39 ` [PATCH 26/34] ipvs: use the new dest addr family field Pablo Neira Ayuso
2014-09-29 12:39 ` [PATCH 27/34] ipvs: Allow heterogeneous pools now that we support them Pablo Neira Ayuso
2014-09-29 12:39 ` [PATCH 28/34] netfilter: nfnetlink: use original skbuff when committing/aborting Pablo Neira Ayuso
2014-09-29 12:39 ` [PATCH 29/34] netfilter: nf_tables: export rule-set generation ID Pablo Neira Ayuso
2014-09-29 12:39 ` [PATCH 30/34] net/netfilter/x_tables.c: use __seq_open_private() Pablo Neira Ayuso
2014-09-29 16:07   ` Sergei Shtylyov
2014-09-29 12:39 ` [PATCH 31/34] netfilter: bridge: nf_bridge_copy_header as static inline in header Pablo Neira Ayuso
2014-09-29 12:39 ` [PATCH 32/34] netfilter: bridge: move br_netfilter out of the core Pablo Neira Ayuso
2014-09-29 22:04   ` Eric Dumazet
2014-09-29 23:17     ` Florian Westphal
2014-09-30  8:56       ` Pablo Neira Ayuso
2014-09-30 15:58   ` Stephen Hemminger
2014-10-01 10:33     ` Pablo Neira Ayuso
2014-09-29 12:39 ` [PATCH 33/34] netfilter: nf_tables: store and dump set policy Pablo Neira Ayuso
2014-09-29 16:14   ` Sergei Shtylyov
2014-10-01 13:47     ` Arturo Borrero Gonzalez
2014-09-29 12:39 ` [PATCH 34/34] netfilter: conntrack: disable generic tracking for known protocols Pablo Neira Ayuso
2014-09-29 18:54 ` [PATCH 00/34] pull request: netfilter/ipvs updates for net-next David Miller
2014-09-30  0:22   ` Simon Horman

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1411994363-8451-22-git-send-email-pablo@netfilter.org \
    --to=pablo@netfilter.org \
    --cc=davem@davemloft.net \
    --cc=netdev@vger.kernel.org \
    --cc=netfilter-devel@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).