From: Alex Gartrell <agartrell@fb.com>
To: horms@verge.net.au
Cc: ja@ssi.bg, lvs-devel@vger.kernel.org, kernel-team@fb.com,
Alex Gartrell <agartrell@fb.com>
Subject: [PATCH ipvs,v4 08/20] ipvs: Add generic ensure_mtu_is_adequate to handle mixed pools
Date: Fri, 29 Aug 2014 01:38:57 -0700 [thread overview]
Message-ID: <1409301549-5284-9-git-send-email-agartrell@fb.com> (raw)
In-Reply-To: <1409301549-5284-1-git-send-email-agartrell@fb.com>
The out_rt functions check to see if the mtu is large enough for the packet
and, if not, send icmp messages (TOOBIG or DEST_UNREACH) to the source and
bail out. We needed the ability to send ICMP from the out_rt_v6 function
and DEST_UNREACH from the out_rt function, so we just pulled it out into a
common function.
Signed-off-by: Alex Gartrell <agartrell@fb.com>
---
net/netfilter/ipvs/ip_vs_xmit.c | 77 +++++++++++++++++++++++++++--------------
1 file changed, 51 insertions(+), 26 deletions(-)
diff --git a/net/netfilter/ipvs/ip_vs_xmit.c b/net/netfilter/ipvs/ip_vs_xmit.c
index 034a282..fa2fdd7 100644
--- a/net/netfilter/ipvs/ip_vs_xmit.c
+++ b/net/netfilter/ipvs/ip_vs_xmit.c
@@ -213,17 +213,57 @@ static inline void maybe_update_pmtu(int skb_af, struct sk_buff *skb, int mtu)
ort->dst.ops->update_pmtu(&ort->dst, sk, NULL, mtu);
}
+static inline bool ensure_mtu_is_adequate(int skb_af, int rt_mode,
+ struct ip_vs_iphdr *ipvsh,
+ struct sk_buff *skb, int mtu)
+{
+#ifdef CONFIG_IP_VS_IPV6
+ if (skb_af == AF_INET6) {
+ struct net *net = dev_net(skb_dst(skb)->dev);
+
+ if (unlikely(__mtu_check_toobig_v6(skb, mtu))) {
+ if (!skb->dev)
+ skb->dev = net->loopback_dev;
+ /* only send ICMP too big on first fragment */
+ if (!ipvsh->fragoffs)
+ icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
+ IP_VS_DBG(1, "frag needed for %pI6c\n",
+ &ipv6_hdr(skb)->saddr);
+ return false;
+ }
+ } else
+#endif
+ {
+ struct netns_ipvs *ipvs = net_ipvs(skb_net(skb));
+
+ /* If we're going to tunnel the packet and pmtu discovery
+ * is disabled, we'll just fragment it anyway
+ */
+ if ((rt_mode & IP_VS_RT_MODE_TUNNEL) && !sysctl_pmtu_disc(ipvs))
+ return true;
+
+ if (unlikely(ip_hdr(skb)->frag_off & htons(IP_DF) &&
+ skb->len > mtu && !skb_is_gso(skb))) {
+ icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED,
+ htonl(mtu));
+ IP_VS_DBG(1, "frag needed for %pI4\n",
+ &ip_hdr(skb)->saddr);
+ return false;
+ }
+ }
+
+ return true;
+}
+
/* Get route to destination or remote server */
static int
__ip_vs_get_out_rt(int skb_af, struct sk_buff *skb, struct ip_vs_dest *dest,
- __be32 daddr, int rt_mode, __be32 *ret_saddr)
+ __be32 daddr, int rt_mode, __be32 *ret_saddr,
+ struct ip_vs_iphdr *ipvsh)
{
struct net *net = dev_net(skb_dst(skb)->dev);
- struct netns_ipvs *ipvs = net_ipvs(net);
struct ip_vs_dest_dst *dest_dst;
struct rtable *rt; /* Route to the other host */
- struct iphdr *iph;
- __be16 df;
int mtu;
int local, noref = 1;
@@ -279,7 +319,6 @@ __ip_vs_get_out_rt(int skb_af, struct sk_buff *skb, struct ip_vs_dest *dest,
" daddr=%pI4\n", &dest->addr.ip);
goto err_put;
}
- iph = ip_hdr(skb);
if (unlikely(local)) {
/* skb to local stack, preserve old route */
@@ -290,7 +329,6 @@ __ip_vs_get_out_rt(int skb_af, struct sk_buff *skb, struct ip_vs_dest *dest,
if (likely(!(rt_mode & IP_VS_RT_MODE_TUNNEL))) {
mtu = dst_mtu(&rt->dst);
- df = iph->frag_off & htons(IP_DF);
} else {
mtu = dst_mtu(&rt->dst) - sizeof(struct iphdr);
if (mtu < 68) {
@@ -298,16 +336,10 @@ __ip_vs_get_out_rt(int skb_af, struct sk_buff *skb, struct ip_vs_dest *dest,
goto err_put;
}
maybe_update_pmtu(skb_af, skb, mtu);
- /* MTU check allowed? */
- df = sysctl_pmtu_disc(ipvs) ? iph->frag_off & htons(IP_DF) : 0;
}
- /* MTU checking */
- if (unlikely(df && skb->len > mtu && !skb_is_gso(skb))) {
- icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu));
- IP_VS_DBG(1, "frag needed for %pI4\n", &iph->saddr);
+ if (!ensure_mtu_is_adequate(skb_af, rt_mode, ipvsh, skb, mtu))
goto err_put;
- }
skb_dst_drop(skb);
if (noref) {
@@ -450,15 +482,8 @@ __ip_vs_get_out_rt_v6(int skb_af, struct sk_buff *skb, struct ip_vs_dest *dest,
maybe_update_pmtu(skb_af, skb, mtu);
}
- if (unlikely(__mtu_check_toobig_v6(skb, mtu))) {
- if (!skb->dev)
- skb->dev = net->loopback_dev;
- /* only send ICMP too big on first fragment */
- if (!ipvsh->fragoffs)
- icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
- IP_VS_DBG(1, "frag needed for %pI6c\n", &ipv6_hdr(skb)->saddr);
+ if (!ensure_mtu_is_adequate(skb_af, rt_mode, ipvsh, skb, mtu))
goto err_put;
- }
skb_dst_drop(skb);
if (noref) {
@@ -565,7 +590,7 @@ ip_vs_bypass_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
rcu_read_lock();
if (__ip_vs_get_out_rt(cp->af, skb, NULL, iph->daddr,
- IP_VS_RT_MODE_NON_LOCAL, NULL) < 0)
+ IP_VS_RT_MODE_NON_LOCAL, NULL, ipvsh) < 0)
goto tx_error;
ip_send_check(iph);
@@ -644,7 +669,7 @@ ip_vs_nat_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
local = __ip_vs_get_out_rt(cp->af, skb, cp->dest, cp->daddr.ip,
IP_VS_RT_MODE_LOCAL |
IP_VS_RT_MODE_NON_LOCAL |
- IP_VS_RT_MODE_RDR, NULL);
+ IP_VS_RT_MODE_RDR, NULL, ipvsh);
if (local < 0)
goto tx_error;
rt = skb_rtable(skb);
@@ -841,7 +866,7 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
IP_VS_RT_MODE_LOCAL |
IP_VS_RT_MODE_NON_LOCAL |
IP_VS_RT_MODE_CONNECT |
- IP_VS_RT_MODE_TUNNEL, &saddr);
+ IP_VS_RT_MODE_TUNNEL, &saddr, ipvsh);
if (local < 0)
goto tx_error;
if (local) {
@@ -1032,7 +1057,7 @@ ip_vs_dr_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
local = __ip_vs_get_out_rt(cp->af, skb, cp->dest, cp->daddr.ip,
IP_VS_RT_MODE_LOCAL |
IP_VS_RT_MODE_NON_LOCAL |
- IP_VS_RT_MODE_KNOWN_NH, NULL);
+ IP_VS_RT_MODE_KNOWN_NH, NULL, ipvsh);
if (local < 0)
goto tx_error;
if (local) {
@@ -1137,7 +1162,7 @@ ip_vs_icmp_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
IP_VS_RT_MODE_RDR : IP_VS_RT_MODE_NON_LOCAL;
rcu_read_lock();
local = __ip_vs_get_out_rt(cp->af, skb, cp->dest, cp->daddr.ip, rt_mode,
- NULL);
+ NULL, iph);
if (local < 0)
goto tx_error;
rt = skb_rtable(skb);
--
1.8.1
next prev parent reply other threads:[~2014-08-29 8:38 UTC|newest]
Thread overview: 28+ messages / expand[flat|nested] mbox.gz Atom feed top
2014-08-29 8:38 [PATCH ipvs,v4 00/20] Support v6 real servers in v4 pools and vice versa Alex Gartrell
2014-08-29 8:38 ` [PATCH ipvs,v4 01/20] ipvs: Add destination address family to netlink interface Alex Gartrell
2014-08-29 8:38 ` [PATCH ipvs,v4 02/20] ipvs: Supply destination addr family to ip_vs_{lookup_dest,find_dest} Alex Gartrell
2014-08-29 8:38 ` [PATCH ipvs,v4 03/20] ipvs: Pass destination address family to ip_vs_trash_get_dest Alex Gartrell
2014-08-29 8:38 ` [PATCH ipvs,v4 04/20] ipvs: Supply destination address family to ip_vs_conn_new Alex Gartrell
2014-08-29 8:38 ` [PATCH ipvs,v4 05/20] ipvs: prevent mixing heterogeneous pools and synchronization Alex Gartrell
2014-08-29 8:38 ` [PATCH ipvs,v4 06/20] ipvs: Pull out crosses_local_route_boundary logic Alex Gartrell
2014-08-29 8:38 ` [PATCH ipvs,v4 07/20] ipvs: Pull out update_pmtu code Alex Gartrell
2014-08-29 8:38 ` Alex Gartrell [this message]
2014-08-29 8:38 ` [PATCH ipvs,v4 09/20] ipvs: support ipv4 in ipv6 and ipv6 in ipv4 tunnel forwarding Alex Gartrell
2014-08-29 8:38 ` [PATCH ipvs,v4 10/20] ipvs: address family of LBLC entry depends on svc family Alex Gartrell
2014-08-29 8:39 ` [PATCH ipvs,v4 11/20] ipvs: address family of LBLCR " Alex Gartrell
2014-08-29 8:39 ` [PATCH ipvs,v4 12/20] ipvs: use correct address family in DH logs Alex Gartrell
2014-08-29 8:39 ` [PATCH ipvs,v4 13/20] ipvs: use correct address family in LC logs Alex Gartrell
2014-08-29 8:39 ` [PATCH ipvs,v4 14/20] ipvs: use correct address family in NQ logs Alex Gartrell
2014-08-29 8:39 ` [PATCH ipvs,v4 15/20] ipvs: use correct address family in RR logs Alex Gartrell
2014-08-29 8:39 ` [PATCH ipvs,v4 16/20] ipvs: use correct address family in SED logs Alex Gartrell
2014-08-29 8:39 ` [PATCH ipvs,v4 17/20] ipvs: use correct address family in SH logs Alex Gartrell
2014-08-29 8:39 ` [PATCH ipvs,v4 18/20] ipvs: use correct address family in WLC logs Alex Gartrell
2014-08-29 8:39 ` [PATCH ipvs,v4 19/20] ipvs: use the new dest addr family field Alex Gartrell
2014-08-29 10:32 ` Julian Anastasov
2014-08-29 21:19 ` Alex Gartrell
2014-08-30 8:35 ` Julian Anastasov
2014-09-01 1:17 ` Simon Horman
2014-09-09 19:41 ` Julian Anastasov
2014-09-09 23:22 ` Alex Gartrell
2014-09-10 6:05 ` Julian Anastasov
2014-08-29 8:39 ` [PATCH ipvs,v4 20/20] ipvs: Allow heterogeneous pools now that we support them Alex Gartrell
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=1409301549-5284-9-git-send-email-agartrell@fb.com \
--to=agartrell@fb.com \
--cc=horms@verge.net.au \
--cc=ja@ssi.bg \
--cc=kernel-team@fb.com \
--cc=lvs-devel@vger.kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.