All of lore.kernel.org
 help / color / mirror / Atom feed
* [NETFILTER]: xt_TCPMSS: Consider reverse route's MTU in clamp-to-pmtu
@ 2008-01-23 20:57 Jan Engelhardt
  2008-01-23 22:29 ` Patrick McHardy
  0 siblings, 1 reply; 10+ messages in thread
From: Jan Engelhardt @ 2008-01-23 20:57 UTC (permalink / raw)
  To: kaber; +Cc: Netfilter Developer Mailing List, jaco


On top of standard net branch, no previous patch required.

===
commit 46bfc78a6b91251fd55140b27f382259fc3333a2
Author: Jan Engelhardt <jengelh@computergmbh.de>
Date:   Wed Jan 23 16:35:00 2008 +0100

    [NETFILTER]: xt_TCPMSS: Consider reverse route's MTU in clamp-to-pmtu
    
    The TCPMSS target in Xtables should consider the MTU of the reverse
    route on forwarded packets as part of the path MTU.
    
    Point in case: IN=ppp0, OUT=eth0. MSS set to 1460 in spite of MTU of
    ppp0 being 1392.
    
    Signed-off-by: Jan Engelhardt <jengelh@computergmbh.de>

diff --git a/net/netfilter/xt_TCPMSS.c b/net/netfilter/xt_TCPMSS.c
index 60e3767..d2b3a27 100644
--- a/net/netfilter/xt_TCPMSS.c
+++ b/net/netfilter/xt_TCPMSS.c
@@ -13,7 +13,10 @@
 #include <linux/ip.h>
 #include <linux/ipv6.h>
 #include <linux/tcp.h>
+#include <net/dst.h>
+#include <net/flow.h>
 #include <net/ipv6.h>
+#include <net/route.h>
 #include <net/tcp.h>
 
 #include <linux/netfilter_ipv4/ip_tables.h>
@@ -41,6 +44,7 @@ optlen(const u_int8_t *opt, unsigned int offset)
 static int
 tcpmss_mangle_packet(struct sk_buff *skb,
 		     const struct xt_tcpmss_info *info,
+		     unsigned int in_mtu,
 		     unsigned int tcphoff,
 		     unsigned int minlen)
 {
@@ -76,7 +80,13 @@ tcpmss_mangle_packet(struct sk_buff *skb,
 				       dst_mtu(skb->dst));
 			return -1;
 		}
-		newmss = dst_mtu(skb->dst) - minlen;
+		if (in_mtu <= minlen) {
+			if (net_ratelimit())
+				printk(KERN_ERR "xt_TCPMSS: unknown or "
+				       "invalid path-MTU (%u)\n", in_mtu);
+			return -1;
+		}
+		newmss = min(dst_mtu(skb->dst), in_mtu) - minlen;
 	} else
 		newmss = info->mss;
 
@@ -137,6 +147,30 @@ tcpmss_mangle_packet(struct sk_buff *skb,
 	return TCPOLEN_MSS;
 }
 
+static u_int32_t tcpmss_reverse_mtu4(const struct iphdr *iph)
+{
+	struct flowi fl = {.nl_u = {.ip4_u = {
+		.daddr = iph->saddr,
+		.tos   = RT_TOS(iph->tos),
+		.scope = RT_SCOPE_UNIVERSE,
+	}}};
+	const struct nf_afinfo *ai;
+	struct rtable *rt = NULL;
+	u_int32_t mtu     = ~0U;
+
+	rcu_read_lock();
+	ai = nf_get_afinfo(AF_INET);
+	if (ai != NULL)
+		ai->route((struct dst_entry **)&rt, &fl);
+	rcu_read_unlock();
+
+	if (rt != NULL) {
+		mtu = dst_mtu(&rt->u.dst);
+		dst_release(&rt->u.dst);
+	}
+	return mtu;
+}
+
 static unsigned int
 tcpmss_tg4(struct sk_buff *skb, const struct net_device *in,
            const struct net_device *out, unsigned int hooknum,
@@ -146,8 +180,8 @@ tcpmss_tg4(struct sk_buff *skb, const struct net_device *in,
 	__be16 newlen;
 	int ret;
 
-	ret = tcpmss_mangle_packet(skb, targinfo, iph->ihl * 4,
-				   sizeof(*iph) + sizeof(struct tcphdr));
+	ret = tcpmss_mangle_packet(skb, targinfo, tcpmss_reverse_mtu4(iph),
+	      iph->ihl * 4, sizeof(*iph) + sizeof(struct tcphdr));
 	if (ret < 0)
 		return NF_DROP;
 	if (ret > 0) {
@@ -160,6 +194,31 @@ tcpmss_tg4(struct sk_buff *skb, const struct net_device *in,
 }
 
 #if defined(CONFIG_IP6_NF_IPTABLES) || defined(CONFIG_IP6_NF_IPTABLES_MODULE)
+static u_int32_t tcpmss_reverse_mtu6(const struct ipv6hdr *iph)
+{
+	struct flowi fl = {.nl_u = {.ip6_u = {
+		.daddr     = iph->saddr,
+		.flowlabel = ((iph->flow_lbl[0] << 16) |
+		             (iph->flow_lbl[1] << 8) | iph->flow_lbl[2]) &
+		             0x00FFFFFF,
+	}}};
+	const struct nf_afinfo *ai;
+	struct rtable *rt = NULL;
+	u_int32_t mtu     = ~0U;
+
+	rcu_read_lock();
+	ai = nf_get_afinfo(AF_INET6);
+	if (ai != NULL)
+		ai->route((struct dst_entry **)&rt, &fl);
+	rcu_read_unlock();
+
+	if (rt != NULL) {
+		mtu = dst_mtu(&rt->u.dst);
+		dst_release(&rt->u.dst);
+	}
+	return mtu;
+}
+
 static unsigned int
 tcpmss_tg6(struct sk_buff *skb, const struct net_device *in,
            const struct net_device *out, unsigned int hooknum,
@@ -174,8 +233,8 @@ tcpmss_tg6(struct sk_buff *skb, const struct net_device *in,
 	tcphoff = ipv6_skip_exthdr(skb, sizeof(*ipv6h), &nexthdr);
 	if (tcphoff < 0)
 		return NF_DROP;
-	ret = tcpmss_mangle_packet(skb, targinfo, tcphoff,
-				   sizeof(*ipv6h) + sizeof(struct tcphdr));
+	ret = tcpmss_mangle_packet(skb, targinfo, tcpmss_reverse_mtu6(ipv6h),
+	      tcphoff, sizeof(*ipv6h) + sizeof(struct tcphdr));
 	if (ret < 0)
 		return NF_DROP;
 	if (ret > 0) {


^ permalink raw reply related	[flat|nested] 10+ messages in thread

* Re: [NETFILTER]: xt_TCPMSS: Consider reverse route's MTU in clamp-to-pmtu
  2008-01-23 20:57 [NETFILTER]: xt_TCPMSS: Consider reverse route's MTU in clamp-to-pmtu Jan Engelhardt
@ 2008-01-23 22:29 ` Patrick McHardy
  2008-01-23 22:43   ` Jan Engelhardt
  0 siblings, 1 reply; 10+ messages in thread
From: Patrick McHardy @ 2008-01-23 22:29 UTC (permalink / raw)
  To: Jan Engelhardt; +Cc: Netfilter Developer Mailing List, jaco

Jan Engelhardt wrote:
> On top of standard net branch, no previous patch required.
>
> ===
> commit 46bfc78a6b91251fd55140b27f382259fc3333a2
> Author: Jan Engelhardt <jengelh@computergmbh.de>
> Date:   Wed Jan 23 16:35:00 2008 +0100
>
>     [NETFILTER]: xt_TCPMSS: Consider reverse route's MTU in clamp-to-pmtu
>     
>     The TCPMSS target in Xtables should consider the MTU of the reverse
>     route on forwarded packets as part of the path MTU.
>     
>     Point in case: IN=ppp0, OUT=eth0. MSS set to 1460 in spite of MTU of
>     ppp0 being 1392.
>     
>     Signed-off-by: Jan Engelhardt <jengelh@computergmbh.de>
>   

Applied, thanks.
> +static u_int32_t tcpmss_reverse_mtu4(const struct iphdr *iph)
> +{
> +	struct flowi fl = {.nl_u = {.ip4_u = {
> +		.daddr = iph->saddr,
> +		.tos   = RT_TOS(iph->tos),
>   

This doesn't make much sense, we don't know the ToS value
that will be used in the reverse direction. People might also
use routing rules based on source address, iif etc., so I
think we should make this optional.

I'm on my way out the door, I'll take care of that tommorrow.


^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [NETFILTER]: xt_TCPMSS: Consider reverse route's MTU in clamp-to-pmtu
  2008-01-23 22:29 ` Patrick McHardy
@ 2008-01-23 22:43   ` Jan Engelhardt
  2008-01-24  3:47     ` Jaco Kroon
  0 siblings, 1 reply; 10+ messages in thread
From: Jan Engelhardt @ 2008-01-23 22:43 UTC (permalink / raw)
  To: Patrick McHardy; +Cc: Netfilter Developer Mailing List, jaco


On Jan 23 2008 23:29, Patrick McHardy wrote:
>> +static u_int32_t tcpmss_reverse_mtu4(const struct iphdr *iph)
>> +{
>> +	struct flowi fl = {.nl_u = {.ip4_u = {
>> +		.daddr = iph->saddr,
>> +		.tos   = RT_TOS(iph->tos),
>>   
>
> This doesn't make much sense, we don't know the ToS value
> that will be used in the reverse direction.

Usually it will be the same TOS with default setups.
Interactive SSH sessions for example set IPTOS_LOWDELAY,
on both sides.

> use routing rules based on source address, iif etc., so I
> think we should make this optional.

iif yes; should be a matter of in->ifindex or so.


thanks,
Jan


^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [NETFILTER]: xt_TCPMSS: Consider reverse route's MTU in clamp-to-pmtu
  2008-01-23 22:43   ` Jan Engelhardt
@ 2008-01-24  3:47     ` Jaco Kroon
  2008-01-24  7:53       ` Patrick McHardy
  0 siblings, 1 reply; 10+ messages in thread
From: Jaco Kroon @ 2008-01-24  3:47 UTC (permalink / raw)
  To: Jan Engelhardt; +Cc: Patrick McHardy, Netfilter Developer Mailing List

Jan Engelhardt wrote:
> On Jan 23 2008 23:29, Patrick McHardy wrote:
>   
>>> +static u_int32_t tcpmss_reverse_mtu4(const struct iphdr *iph)
>>> +{
>>> +	struct flowi fl = {.nl_u = {.ip4_u = {
>>> +		.daddr = iph->saddr,
>>> +		.tos   = RT_TOS(iph->tos),
>>>   
>>>       
>> This doesn't make much sense, we don't know the ToS value
>> that will be used in the reverse direction.
>>     
>
> Usually it will be the same TOS with default setups.
> Interactive SSH sessions for example set IPTOS_LOWDELAY,
> on both sides.
>   
_Usually_.  I've seen cases (especially with VoIP) where TOS in one
direction is 0x10 and then 0x68 in the other.  However, this is on top
of udp, for which this patch has no effect.
>   
>> use routing rules based on source address, iif etc., so I
>> think we should make this optional.
>>     
>
> iif yes; should be a matter of in->ifindex or so.
>   
I'd reckon that's a definite "yes, we should fill in iif and saddr"! 
This should cover 99.99% of cases where this is useful, and the only
potentially problematic case I can envision is with asymmetric routing
between the gateway this is running on and the final destination.  And
chances are that even in those cases the oif of two different incoming
routes are going to be the same.

In asymmetric routing you'd probably be using --set anyway though.  Is
there perhaps a custom clamp value available, eg --clamp-to-mtu 1200? 
This could be useful for those cases, as well as what we know as
"business dsl" configurations where the setup looks something like:

[ inet ]-----dsl----[bdsl-router]--ether--[LAN-gateway]

In these cases the bdsl router is often closed off to the local
administrator and run by the ISP, and I for one have been having fights
with at least one of these providers who just can't get the MSS field
clamped correctly.  So far the only way for me to get that right was to
drop my mtu on the router side to that of the lowest mtu value that I
can find with snmp on the router.  Often as low as, or close to, 1200.

Regards,
Jaco

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [NETFILTER]: xt_TCPMSS: Consider reverse route's MTU in clamp-to-pmtu
  2008-01-24  3:47     ` Jaco Kroon
@ 2008-01-24  7:53       ` Patrick McHardy
  2008-01-24  8:57         ` Jaco Kroon
  0 siblings, 1 reply; 10+ messages in thread
From: Patrick McHardy @ 2008-01-24  7:53 UTC (permalink / raw)
  To: Jaco Kroon; +Cc: Jan Engelhardt, Netfilter Developer Mailing List

Jaco Kroon wrote:
> Jan Engelhardt wrote:
>> On Jan 23 2008 23:29, Patrick McHardy wrote:
>>   
>>>> +static u_int32_t tcpmss_reverse_mtu4(const struct iphdr *iph)
>>>> +{
>>>> +	struct flowi fl = {.nl_u = {.ip4_u = {
>>>> +		.daddr = iph->saddr,
>>>> +		.tos   = RT_TOS(iph->tos),
>>>>   
>>>>       
>>> This doesn't make much sense, we don't know the ToS value
>>> that will be used in the reverse direction.
>>>     
>> Usually it will be the same TOS with default setups.
>> Interactive SSH sessions for example set IPTOS_LOWDELAY,
>> on both sides.
>>   
> _Usually_.  I've seen cases (especially with VoIP) where TOS in one
> direction is 0x10 and then 0x68 in the other.  However, this is on top
> of udp, for which this patch has no effect.
>>   
>>> use routing rules based on source address, iif etc., so I
>>> think we should make this optional.
>>>     
>> iif yes; should be a matter of in->ifindex or so.
>>   
> I'd reckon that's a definite "yes, we should fill in iif and saddr"! 


saddr and iif don't work without more complicated changes since
we'd have to use input routing.

> This should cover 99.99% of cases where this is useful, and the only
> potentially problematic case I can envision is with asymmetric routing
> between the gateway this is running on and the final destination.  And
> chances are that even in those cases the oif of two different incoming
> routes are going to be the same.


The problem is that we can't determine all keys used in the
reverse direction, which becomes obvious if you think of
mark based routing. So I'm wondering how many setups this
would break. Leaving the routing as it is and making it
optional looks safer, with the downside that most users
probably want this and won't notice the new option.


^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [NETFILTER]: xt_TCPMSS: Consider reverse route's MTU in clamp-to-pmtu
  2008-01-24  7:53       ` Patrick McHardy
@ 2008-01-24  8:57         ` Jaco Kroon
  2008-01-24  9:58           ` Pascal Hambourg
  0 siblings, 1 reply; 10+ messages in thread
From: Jaco Kroon @ 2008-01-24  8:57 UTC (permalink / raw)
  To: Patrick McHardy; +Cc: Jan Engelhardt, Netfilter Developer Mailing List

Patrick McHardy wrote:
> Jaco Kroon wrote:
>> Jan Engelhardt wrote:

[[ -- snip -- ]

>>>> use routing rules based on source address, iif etc., so I
>>>> think we should make this optional.
>>>>     
>>> iif yes; should be a matter of in->ifindex or so.
>>>   
>> I'd reckon that's a definite "yes, we should fill in iif and saddr"! 
> 
> 
> saddr and iif don't work without more complicated changes since
> we'd have to use input routing.

Oh well.  For my needs the original simpler patch to simply look at the 
mtu of the in net_device is good enough.  So as far as I'm concerned 
this is "nice to have for the day I need to do IPSec with some 
disfunctional router" or "some really funky asymmetric routing".

>> This should cover 99.99% of cases where this is useful, and the only
>> potentially problematic case I can envision is with asymmetric routing
>> between the gateway this is running on and the final destination.  And
>> chances are that even in those cases the oif of two different incoming
>> routes are going to be the same.
> 
> The problem is that we can't determine all keys used in the
> reverse direction, which becomes obvious if you think of
> mark based routing. So I'm wondering how many setups this
> would break. Leaving the routing as it is and making it
> optional looks safer, with the downside that most users
> probably want this and won't notice the new option.

Or keep to the original "simply look at the mtu of the in net_device as 
provided".  Which is a major improvement and should cover the vast 
majority of cases.  Just make it clear in the man page exactly what 
--clamp-mss-to-pmtu does do, and possibly add a "--clamp-to-mtu 
mtu_value" or "--clamp-to-mss mss_value" option (I'd prefer 
--clamp-to-mtu), which works like --set, but only if the new mss value 
is less than the existing one.  I prefer the mtu version simply because 
it doesn't require me to know how much space needs to be reserved for 
ip/tcp headers.

It's possible to do the path routing stuff with somewhat complicated 
iptables chains, as an example, originally I thought to do this for my 
case (the one described in the need case):

iptables -A FORWARD -i ppp+ -p tcp --tcp-flags SYN,FIN,RST SYN -j CLAMP_MSS
iptables -A FOWARD -p tcp --tcp-flags SYN,FIN,RST SYN -j TCPMSS 
--clamp-mss-to-pmtu

iptables -A CLAMP_MSS -i ppp0 -p tcp --tcp-flags SYN,FIN,RST SYN -j 
TCPMSS --set-mss $(( $mtu_of_ppp0 - 40 ))
iptables -A CLAMP_MSS -i ppp1 -p tcp --tcp-flags SYN,FIN,RST SYN -j 
TCPMSS --set-mss $(( $mtu_of_ppp1 - 40 ))
etc ...

More complex situations can also do this, and people actually setting up 
those kind of setups should be (made) aware of this.

*sigh* if there just wasn't so many flippen firewalls blocking icmp 
fragmentation required packets, and if these packets were handled 
correctly ...

Jaco

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [NETFILTER]: xt_TCPMSS: Consider reverse route's MTU in clamp-to-pmtu
  2008-01-24  8:57         ` Jaco Kroon
@ 2008-01-24  9:58           ` Pascal Hambourg
  2008-01-24 10:49             ` Jaco Kroon
  2008-01-29 12:55             ` Patrick McHardy
  0 siblings, 2 replies; 10+ messages in thread
From: Pascal Hambourg @ 2008-01-24  9:58 UTC (permalink / raw)
  To: Jaco Kroon
  Cc: Patrick McHardy, Jan Engelhardt, Netfilter Developer Mailing List

Hello,

Jaco Kroon a écrit :
> 
> possibly add a "--clamp-to-mtu 
> mtu_value" or "--clamp-to-mss mss_value" option (I'd prefer 
> --clamp-to-mtu), which works like --set, but only if the new mss value 
> is less than the existing one.

Doesn't the patch "xt_TCPMSS: don't allow netfilter --setmss to increase 
mss" applied to 2.6.25 about a month ago already do this ?
-
To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [NETFILTER]: xt_TCPMSS: Consider reverse route's MTU in clamp-to-pmtu
  2008-01-24  9:58           ` Pascal Hambourg
@ 2008-01-24 10:49             ` Jaco Kroon
  2008-01-24 11:20               ` Jan Engelhardt
  2008-01-29 12:55             ` Patrick McHardy
  1 sibling, 1 reply; 10+ messages in thread
From: Jaco Kroon @ 2008-01-24 10:49 UTC (permalink / raw)
  To: Pascal Hambourg
  Cc: Patrick McHardy, Jan Engelhardt, Netfilter Developer Mailing List

Pascal Hambourg wrote:
> Hello,
> 
> Jaco Kroon a écrit :
>>
>> possibly add a "--clamp-to-mtu mtu_value" or "--clamp-to-mss 
>> mss_value" option (I'd prefer --clamp-to-mtu), which works like --set, 
>> but only if the new mss value is less than the existing one.
> 
> Doesn't the patch "xt_TCPMSS: don't allow netfilter --setmss to increase 
> mss" applied to 2.6.25 about a month ago already do this ?

I haven't followed that.  I'm running stable (2.6.23.14 atm) on most of 
my systems, my notebook is still on -rc8 for 2.6.24.  I've just had a 
specific problem that had a need to be scratched, but yes, based on the 
description you gave that would do _exactly_ that.

Random question: What happens with the case where we explicitly _want_ 
to break the MSS?  In other words, to set it to something insane like 
3000 in order to test other equipment.

Jaco
-
To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [NETFILTER]: xt_TCPMSS: Consider reverse route's MTU in clamp-to-pmtu
  2008-01-24 10:49             ` Jaco Kroon
@ 2008-01-24 11:20               ` Jan Engelhardt
  0 siblings, 0 replies; 10+ messages in thread
From: Jan Engelhardt @ 2008-01-24 11:20 UTC (permalink / raw)
  To: Jaco Kroon
  Cc: Pascal Hambourg, Patrick McHardy,
	Netfilter Developer Mailing List


On Jan 24 2008 12:49, Jaco Kroon wrote:
>> 
>> Doesn't the patch "xt_TCPMSS: don't allow netfilter --setmss to increase mss"
>> applied to 2.6.25 about a month ago already do this ?
>
> I haven't followed that.  I'm running stable (2.6.23.14 atm) on most of my
> systems, my notebook is still on -rc8 for 2.6.24.  I've just had a specific
> problem that had a need to be scratched, but yes, based on the description you
> gave that would do _exactly_ that.
>
> Random question: What happens with the case where we explicitly _want_ to break
> the MSS?  In other words, to set it to something insane like 3000 in order to
> test other equipment.

Ideally, no TCPMSS would be needed as TCP does PMTUD itself,
similarly for UDP (though _you_ need to do it yourself there).

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [NETFILTER]: xt_TCPMSS: Consider reverse route's MTU in clamp-to-pmtu
  2008-01-24  9:58           ` Pascal Hambourg
  2008-01-24 10:49             ` Jaco Kroon
@ 2008-01-29 12:55             ` Patrick McHardy
  1 sibling, 0 replies; 10+ messages in thread
From: Patrick McHardy @ 2008-01-29 12:55 UTC (permalink / raw)
  To: Pascal Hambourg
  Cc: Jaco Kroon, Jan Engelhardt, Netfilter Developer Mailing List

Pascal Hambourg wrote:
> Hello,
> 
> Jaco Kroon a écrit :
>>
>> possibly add a "--clamp-to-mtu mtu_value" or "--clamp-to-mss 
>> mss_value" option (I'd prefer --clamp-to-mtu), which works like --set, 
>> but only if the new mss value is less than the existing one.
> 
> Doesn't the patch "xt_TCPMSS: don't allow netfilter --setmss to increase 
> mss" applied to 2.6.25 about a month ago already do this ?

It should.
-
To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 10+ messages in thread

end of thread, other threads:[~2008-01-29 13:11 UTC | newest]

Thread overview: 10+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2008-01-23 20:57 [NETFILTER]: xt_TCPMSS: Consider reverse route's MTU in clamp-to-pmtu Jan Engelhardt
2008-01-23 22:29 ` Patrick McHardy
2008-01-23 22:43   ` Jan Engelhardt
2008-01-24  3:47     ` Jaco Kroon
2008-01-24  7:53       ` Patrick McHardy
2008-01-24  8:57         ` Jaco Kroon
2008-01-24  9:58           ` Pascal Hambourg
2008-01-24 10:49             ` Jaco Kroon
2008-01-24 11:20               ` Jan Engelhardt
2008-01-29 12:55             ` Patrick McHardy

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.