* [PATCH net-next v2 3/3] ipv4: introduce hardened ip_no_pmtu_disc mode
@ 2014-01-06 8:48 Hannes Frederic Sowa
0 siblings, 0 replies; 2+ messages in thread
From: Hannes Frederic Sowa @ 2014-01-06 8:48 UTC (permalink / raw)
To: netdev; +Cc: eric.dumazet, davem, johnwheffner, fweimer
This new ip_no_pmtu_disc mode only allowes fragmentation-needed errors
to be honored by protocols which do more stringent validation on the
ICMP's packet payload. This knob is useful for people who e.g. want to
run an unmodified DNS server in a namespace where they need to use pmtu
for TCP connections (as they are used for zone transfers or fallback
for requests) but don't want to use possibly spoofed UDP pmtu information.
Currently the whitelisted protocols are TCP, SCTP and DCCP as they check
if the returned packet is in the window or if the association is valid.
Cc: Eric Dumazet <eric.dumazet@gmail.com>
Cc: David Miller <davem@davemloft.net>
Cc: John Heffner <johnwheffner@gmail.com>
Suggested-by: Florian Weimer <fweimer@redhat.com>
Signed-off-by: Hannes Frederic Sowa <hannes@stressinduktion.org>
---
v2)
* reworded Documentation
Documentation/networking/ip-sysctl.txt | 13 ++++++++++++-
include/net/protocol.h | 7 ++++++-
net/dccp/ipv4.c | 1 +
net/ipv4/af_inet.c | 1 +
net/ipv4/icmp.c | 28 ++++++++++++++++++++++++----
net/sctp/protocol.c | 1 +
6 files changed, 45 insertions(+), 6 deletions(-)
diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt
index 2d164a3..3b230c5 100644
--- a/Documentation/networking/ip-sysctl.txt
+++ b/Documentation/networking/ip-sysctl.txt
@@ -26,7 +26,18 @@ ip_no_pmtu_disc - INTEGER
discarded. Outgoing frames are handled the same as in mode 1,
implicitly setting IP_PMTUDISC_DONT on every created socket.
- Possible values: 0-2
+ Mode 3 is a hardend pmtu discover mode. The kernel will only
+ accept fragmentation-needed errors if the underlying protocol
+ can verify them besides a plain socket lookup. Current
+ protocols for which pmtu events will be honored are TCP, SCTP
+ and DCCP as they verify e.g. the sequence number or the
+ association. This mode should not be enabled globally but is
+ only intended to secure e.g. name servers in namespaces where
+ TCP path mtu must still work but path MTU information of other
+ protocols should be discarded. If enabled globally this mode
+ could break other protocols.
+
+ Possible values: 0-3
Default: FALSE
min_pmtu - INTEGER
diff --git a/include/net/protocol.h b/include/net/protocol.h
index fbf7676..0e5f866 100644
--- a/include/net/protocol.h
+++ b/include/net/protocol.h
@@ -43,7 +43,12 @@ struct net_protocol {
int (*handler)(struct sk_buff *skb);
void (*err_handler)(struct sk_buff *skb, u32 info);
unsigned int no_policy:1,
- netns_ok:1;
+ netns_ok:1,
+ /* does the protocol do more stringent
+ * icmp tag validation than simple
+ * socket lookup?
+ */
+ icmp_strict_tag_validation:1;
};
#if IS_ENABLED(CONFIG_IPV6)
diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c
index 88299c2..22b5d81 100644
--- a/net/dccp/ipv4.c
+++ b/net/dccp/ipv4.c
@@ -989,6 +989,7 @@ static const struct net_protocol dccp_v4_protocol = {
.err_handler = dccp_v4_err,
.no_policy = 1,
.netns_ok = 1,
+ .icmp_strict_tag_validation = 1,
};
static const struct proto_ops inet_dccp_ops = {
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index b8bc1a3..be1f553 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -1539,6 +1539,7 @@ static const struct net_protocol tcp_protocol = {
.err_handler = tcp_v4_err,
.no_policy = 1,
.netns_ok = 1,
+ .icmp_strict_tag_validation = 1,
};
static const struct net_protocol udp_protocol = {
diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
index fb3c563..0134663 100644
--- a/net/ipv4/icmp.c
+++ b/net/ipv4/icmp.c
@@ -668,6 +668,16 @@ static void icmp_socket_deliver(struct sk_buff *skb, u32 info)
rcu_read_unlock();
}
+static bool icmp_tag_validation(int proto)
+{
+ bool ok;
+
+ rcu_read_lock();
+ ok = rcu_dereference(inet_protos[proto])->icmp_strict_tag_validation;
+ rcu_read_unlock();
+ return ok;
+}
+
/*
* Handle ICMP_DEST_UNREACH, ICMP_TIME_EXCEED, ICMP_QUENCH, and
* ICMP_PARAMETERPROB.
@@ -705,12 +715,22 @@ static void icmp_unreach(struct sk_buff *skb)
case ICMP_PORT_UNREACH:
break;
case ICMP_FRAG_NEEDED:
- if (net->ipv4.sysctl_ip_no_pmtu_disc == 2) {
- goto out;
- } else if (net->ipv4.sysctl_ip_no_pmtu_disc) {
+ /* for documentation of the ip_no_pmtu_disc
+ * values please see
+ * Documentation/networking/ip-sysctl.txt
+ */
+ switch (net->ipv4.sysctl_ip_no_pmtu_disc) {
+ default:
LIMIT_NETDEBUG(KERN_INFO pr_fmt("%pI4: fragmentation needed and DF set\n"),
&iph->daddr);
- } else {
+ break;
+ case 2:
+ goto out;
+ case 3:
+ if (!icmp_tag_validation(iph->protocol))
+ goto out;
+ /* fall through */
+ case 0:
info = ntohs(icmph->un.frag.mtu);
if (!info)
goto out;
diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c
index 34b7726..7c16108 100644
--- a/net/sctp/protocol.c
+++ b/net/sctp/protocol.c
@@ -1030,6 +1030,7 @@ static const struct net_protocol sctp_protocol = {
.err_handler = sctp_v4_err,
.no_policy = 1,
.netns_ok = 1,
+ .icmp_strict_tag_validation = 1,
};
/* IPv4 address related functions. */
--
1.8.4.2
^ permalink raw reply related [flat|nested] 2+ messages in thread* [PATCH net-next v4 0/3] path mtu hardening patches
@ 2014-01-09 9:01 hannes
2014-01-09 9:01 ` [PATCH net-next v2 3/3] ipv4: introduce hardened ip_no_pmtu_disc mode hannes
0 siblings, 1 reply; 2+ messages in thread
From: hannes @ 2014-01-09 9:01 UTC (permalink / raw)
To: netdev; +Cc: eric.dumazet, davem, johnwheffner, steffen.klassert, fweimer
Hi all!
After a lot of back and forth I want to propose these changes regarding
path mtu hardening and give an outline why I think this is the best way
how to proceed:
This set contains the following patches:
* ipv4: introduce ip_dst_mtu_maybe_forward and protect forwarding path against pmtu spoofing
* ipv6: introduce ip6_dst_mtu_forward and protect forwarding path with it
* ipv4: introduce hardened ip_no_pmtu_disc mode
The first one switches the forwarding path of IPv4 to use the interface
mtu by default and ignore a possible discovered path mtu. It provides
a sysctl to switch back to the original behavior (see discussion below).
The second patch does the same thing unconditionally for IPv6. I don't
provide a knob for IPv6 to switch to original behavior (please see
below).
The third patch introduces a hardened pmtu mode, where only pmtu
information are accepted where the protocol is able to do more stringent
checks on the icmp piggyback payload (please see the patch commit msg
for further details).
Why is this change necessary?
First of all, RFC 1191 4. Router specification says:
"When a router is unable to forward a datagram because it exceeds the
MTU of the next-hop network and its Don't Fragment bit is set, the
router is required to return an ICMP Destination Unreachable message
to the source of the datagram, with the Code indicating
"fragmentation needed and DF set". ..."
For some time now fragmentation has been considered problematic, e.g.:
* http://www.hpl.hp.com/techreports/Compaq-DEC/WRL-87-3.pdf
* http://tools.ietf.org/search/rfc4963
Most of them seem to agree that fragmentation should be avoided because
of efficiency, data corruption or security concerns.
Recently it was shown possible that correctly guessing IP ids could lead
to data injection on DNS packets:
<https://sites.google.com/site/hayashulman/files/fragmentation-poisoning.pdf>
While we can try to completly stop fragmentation on the end host
(this is e.g. implemented via IP_PMTUDISC_INTERFACE), we cannot stop
fragmentation completly on the forwarding path. On the end host the
application has to deal with MTUs and has to choose fallback methods
if fragmentation could be an attack vector. This is already the case for
most DNS software, where a maximum UDP packet size can be configured. But
until recently they had no control over local fragmentation and could
thus emit fragmented packets.
On the forwarding path we can just try to delay the fragmentation to
the last hop where this is really necessary. Current kernel already does
that but only because routers don't receive feedback of path mtus, these are
only send back to the end host system. But it is possible to maliciously
insert path mtu inforamtion via ICMP packets which have an icmp echo_reply
payload, because we cannot validate those notifications against local
sockets. DHCP clients which establish an any-bound RAW-socket could also
start processing unwanted fragmentation-needed packets.
Why does IPv4 has a knob to revert to old behavior while IPv6 doesn't?
IPv4 does fragmentation on the path while IPv6 does always respond with
packet-too-big errors. The interface MTU will always be greater than
the path MTU information. So we would discard packets we could actually
forward because of malicious information. After this change we would
let the hop, which really could not forward the packet, notify the host
of this problem.
IPv4 allowes fragmentation mid-path. In case someone does use a software
which tries to discover such paths and assumes that the kernel is handling
the discovered pmtu information automatically. This should be an extremly
rare case, but because I could not exclude the possibility this knob is
provided. Also this software could insert non-locked mtu information
into the kernel. We cannot distinguish that from path mtu information
currently. Premature fragmentation could solve some problems in wrongly
configured networks, thus this switch is provided.
One frag-needed packet could reduce the path mtu down to 522 bytes
(route/min_pmtu).
Misc:
IPv6 neighbor discovery could advertise mtu information for an
interface. These information update the ipv6-specific interface mtu and
thus get used by the forwarding path.
Tunnel and xfrm output path will still honour path mtu and also respond
with Packet-too-Big or fragmentation-needed errors if needed.
Changelog for all patches:
# ipv4: introduce ip_dst_mtu_maybe_forward and protect forwarding path against pmtu spoofing
v2)
* enabled ip_forward_use_pmtu by default
* reworded
v3)
* disabled ip_forward_use_pmtu by default
* reworded
v4)
* renamed ip_dst_mtu_secure to ip_dst_mtu_maybe_forward
* updated changelog accordingly
* removed unneeded !!(... & ...) double negations
# ipv6: introduce ip6_dst_mtu_forward and protect forwarding path with it
v2)
* by default we honour pmtu information
3)
* only honor interface mtu
* rewritten and simplified
* no knob to fall back to old mode any more
# ipv4: introduce hardened ip_no_pmtu_disc mode
v2)
* reworded Documentation
Diffstat:
Documentation/networking/ip-sysctl.txt | 26 +++++++++++++++++++++++++-
include/net/ip.h | 33 +++++++++++++++++++++++++++++++++
include/net/netns/ipv4.h | 1 +
include/net/protocol.h | 7 ++++++-
include/net/route.h | 19 +++----------------
net/dccp/ipv4.c | 1 +
net/ipv4/af_inet.c | 1 +
net/ipv4/icmp.c | 28 ++++++++++++++++++++++++----
net/ipv4/ip_forward.c | 7 +++++--
net/ipv4/ip_output.c | 8 +++++---
net/ipv4/route.c | 3 ---
net/ipv4/sysctl_net_ipv4.c | 7 +++++++
net/ipv6/ip6_output.c | 23 ++++++++++++++++++++++-
net/sctp/protocol.c | 1 +
14 files changed, 134 insertions(+), 31 deletions(-)
Thanks,
Hannes
^ permalink raw reply [flat|nested] 2+ messages in thread
* [PATCH net-next v2 3/3] ipv4: introduce hardened ip_no_pmtu_disc mode
2014-01-09 9:01 [PATCH net-next v4 0/3] path mtu hardening patches hannes
@ 2014-01-09 9:01 ` hannes
0 siblings, 0 replies; 2+ messages in thread
From: hannes @ 2014-01-09 9:01 UTC (permalink / raw)
To: netdev
Cc: eric.dumazet, davem, johnwheffner, steffen.klassert, fweimer,
Hannes Frederic Sowa
From: Hannes Frederic Sowa <hannes@stressinduktion.org>
This new ip_no_pmtu_disc mode only allowes fragmentation-needed errors
to be honored by protocols which do more stringent validation on the
ICMP's packet payload. This knob is useful for people who e.g. want to
run an unmodified DNS server in a namespace where they need to use pmtu
for TCP connections (as they are used for zone transfers or fallback
for requests) but don't want to use possibly spoofed UDP pmtu information.
Currently the whitelisted protocols are TCP, SCTP and DCCP as they check
if the returned packet is in the window or if the association is valid.
Cc: Eric Dumazet <eric.dumazet@gmail.com>
Cc: David Miller <davem@davemloft.net>
Cc: John Heffner <johnwheffner@gmail.com>
Suggested-by: Florian Weimer <fweimer@redhat.com>
Signed-off-by: Hannes Frederic Sowa <hannes@stressinduktion.org>
---
Documentation/networking/ip-sysctl.txt | 13 ++++++++++++-
include/net/protocol.h | 7 ++++++-
net/dccp/ipv4.c | 1 +
net/ipv4/af_inet.c | 1 +
net/ipv4/icmp.c | 28 ++++++++++++++++++++++++----
net/sctp/protocol.c | 1 +
6 files changed, 45 insertions(+), 6 deletions(-)
diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt
index 0d71fa9..c97932c 100644
--- a/Documentation/networking/ip-sysctl.txt
+++ b/Documentation/networking/ip-sysctl.txt
@@ -26,7 +26,18 @@ ip_no_pmtu_disc - INTEGER
discarded. Outgoing frames are handled the same as in mode 1,
implicitly setting IP_PMTUDISC_DONT on every created socket.
- Possible values: 0-2
+ Mode 3 is a hardend pmtu discover mode. The kernel will only
+ accept fragmentation-needed errors if the underlying protocol
+ can verify them besides a plain socket lookup. Current
+ protocols for which pmtu events will be honored are TCP, SCTP
+ and DCCP as they verify e.g. the sequence number or the
+ association. This mode should not be enabled globally but is
+ only intended to secure e.g. name servers in namespaces where
+ TCP path mtu must still work but path MTU information of other
+ protocols should be discarded. If enabled globally this mode
+ could break other protocols.
+
+ Possible values: 0-3
Default: FALSE
min_pmtu - INTEGER
diff --git a/include/net/protocol.h b/include/net/protocol.h
index fbf7676..0e5f866 100644
--- a/include/net/protocol.h
+++ b/include/net/protocol.h
@@ -43,7 +43,12 @@ struct net_protocol {
int (*handler)(struct sk_buff *skb);
void (*err_handler)(struct sk_buff *skb, u32 info);
unsigned int no_policy:1,
- netns_ok:1;
+ netns_ok:1,
+ /* does the protocol do more stringent
+ * icmp tag validation than simple
+ * socket lookup?
+ */
+ icmp_strict_tag_validation:1;
};
#if IS_ENABLED(CONFIG_IPV6)
diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c
index 88299c2..22b5d81 100644
--- a/net/dccp/ipv4.c
+++ b/net/dccp/ipv4.c
@@ -989,6 +989,7 @@ static const struct net_protocol dccp_v4_protocol = {
.err_handler = dccp_v4_err,
.no_policy = 1,
.netns_ok = 1,
+ .icmp_strict_tag_validation = 1,
};
static const struct proto_ops inet_dccp_ops = {
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index 6268a47..ecd2c3f 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -1545,6 +1545,7 @@ static const struct net_protocol tcp_protocol = {
.err_handler = tcp_v4_err,
.no_policy = 1,
.netns_ok = 1,
+ .icmp_strict_tag_validation = 1,
};
static const struct net_protocol udp_protocol = {
diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
index fb3c563..0134663 100644
--- a/net/ipv4/icmp.c
+++ b/net/ipv4/icmp.c
@@ -668,6 +668,16 @@ static void icmp_socket_deliver(struct sk_buff *skb, u32 info)
rcu_read_unlock();
}
+static bool icmp_tag_validation(int proto)
+{
+ bool ok;
+
+ rcu_read_lock();
+ ok = rcu_dereference(inet_protos[proto])->icmp_strict_tag_validation;
+ rcu_read_unlock();
+ return ok;
+}
+
/*
* Handle ICMP_DEST_UNREACH, ICMP_TIME_EXCEED, ICMP_QUENCH, and
* ICMP_PARAMETERPROB.
@@ -705,12 +715,22 @@ static void icmp_unreach(struct sk_buff *skb)
case ICMP_PORT_UNREACH:
break;
case ICMP_FRAG_NEEDED:
- if (net->ipv4.sysctl_ip_no_pmtu_disc == 2) {
- goto out;
- } else if (net->ipv4.sysctl_ip_no_pmtu_disc) {
+ /* for documentation of the ip_no_pmtu_disc
+ * values please see
+ * Documentation/networking/ip-sysctl.txt
+ */
+ switch (net->ipv4.sysctl_ip_no_pmtu_disc) {
+ default:
LIMIT_NETDEBUG(KERN_INFO pr_fmt("%pI4: fragmentation needed and DF set\n"),
&iph->daddr);
- } else {
+ break;
+ case 2:
+ goto out;
+ case 3:
+ if (!icmp_tag_validation(iph->protocol))
+ goto out;
+ /* fall through */
+ case 0:
info = ntohs(icmph->un.frag.mtu);
if (!info)
goto out;
diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c
index 34b7726..7c16108 100644
--- a/net/sctp/protocol.c
+++ b/net/sctp/protocol.c
@@ -1030,6 +1030,7 @@ static const struct net_protocol sctp_protocol = {
.err_handler = sctp_v4_err,
.no_policy = 1,
.netns_ok = 1,
+ .icmp_strict_tag_validation = 1,
};
/* IPv4 address related functions. */
--
1.8.4.2
^ permalink raw reply related [flat|nested] 2+ messages in thread
end of thread, other threads:[~2014-01-09 9:01 UTC | newest]
Thread overview: 2+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2014-01-06 8:48 [PATCH net-next v2 3/3] ipv4: introduce hardened ip_no_pmtu_disc mode Hannes Frederic Sowa
-- strict thread matches above, loose matches on Subject: below --
2014-01-09 9:01 [PATCH net-next v4 0/3] path mtu hardening patches hannes
2014-01-09 9:01 ` [PATCH net-next v2 3/3] ipv4: introduce hardened ip_no_pmtu_disc mode hannes
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).