public inbox for netdev@vger.kernel.org
 help / color / mirror / Atom feed
* [PATCH net] ipv6: Implement limits on extension header parsing
@ 2026-04-17 17:18 Daniel Borkmann
  2026-04-17 21:45 ` Eric Dumazet
  2026-04-18 11:45 ` Justin Iurman
  0 siblings, 2 replies; 10+ messages in thread
From: Daniel Borkmann @ 2026-04-17 17:18 UTC (permalink / raw)
  To: kuba; +Cc: edumazet, dsahern, tom, willemdebruijn.kernel, idosch, pabeni,
	netdev

ipv6_{skip_exthdr,find_hdr}() and ip6_tnl_parse_tlv_enc_lim() iterate
over IPv6 extension headers until they find a non-extension-header
protocol or run out of packet data. The loops have no iteration counter,
relying solely on the packet length to bound them. For a crafted packet
with 8-byte extension headers filling a 64KB jumbogram, this means a
worst case of up to ~8k iterations with a skb_header_pointer call each.
ipv6_skip_exthdr(), for example, is used where it parses the inner
quoted packet inside an incoming ICMPv6 error:

  - icmpv6_rcv
    - checksum validation
    - case ICMPV6_DEST_UNREACH
      - icmpv6_notify
        - pskb_may_pull()       <- pull inner IPv6 header
        - ipv6_skip_exthdr()    <- iterates here
        - pskb_may_pull()
        - ipprot->err_handler() <- sk lookup (matching sk not required)

The per-iteration cost of ipv6_skip_exthdr itself is generally light,
but skb_header_pointer becomes more costly on reassembled packets: the
first ~1KB of the inner packet are in the skb's linear area, but the
remaining ~63KB are in the frag_list where skb_copy_bits is needed to
read data.

Add a configurable limit via a new sysctl net.ipv6.max_ext_hdrs_number
(default 32, minimum 1). All three extension header walking functions
are bound by this limit. The sysctl is in line with commit 47d3d7ac656a
("ipv6: Implement limits on Hop-by-Hop and Destination options"). The
init_net is used since plumbing a struct net * through all helpers
would touch a lot of callsites.

There's an ongoing IETF draft-ietf-6man-eh-limits-18 that states that
8 extension headers before the transport header is the baseline which
routers MUST handle; section 7 details also why limits are needed.

Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 Documentation/networking/ip-sysctl.rst |  7 +++++++
 include/net/ipv6.h                     |  2 ++
 include/net/netns/ipv6.h               |  1 +
 net/ipv6/af_inet6.c                    |  1 +
 net/ipv6/exthdrs_core.c                | 11 +++++++++++
 net/ipv6/ip6_tunnel.c                  |  5 +++++
 net/ipv6/sysctl_net_ipv6.c             |  8 ++++++++
 7 files changed, 35 insertions(+)

diff --git a/Documentation/networking/ip-sysctl.rst b/Documentation/networking/ip-sysctl.rst
index 6921d8594b84..4559a956bbd9 100644
--- a/Documentation/networking/ip-sysctl.rst
+++ b/Documentation/networking/ip-sysctl.rst
@@ -2503,6 +2503,13 @@ max_hbh_length - INTEGER
 
 	Default: INT_MAX (unlimited)
 
+max_ext_hdrs_number - INTEGER
+	Maximum number of IPv6 extension headers allowed in a packet.
+	Limits how many extension headers will be traversed. The value
+	is read from the initial netns.
+
+	Default: 32
+
 skip_notify_on_dev_down - BOOLEAN
 	Controls whether an RTM_DELROUTE message is generated for routes
 	removed when a device is taken down or deleted. IPv4 does not
diff --git a/include/net/ipv6.h b/include/net/ipv6.h
index 53c5056508be..d7f0d55e6918 100644
--- a/include/net/ipv6.h
+++ b/include/net/ipv6.h
@@ -90,6 +90,8 @@ struct ip_tunnel_info;
 #define IP6_DEFAULT_MAX_DST_OPTS_LEN	 INT_MAX /* No limit */
 #define IP6_DEFAULT_MAX_HBH_OPTS_LEN	 INT_MAX /* No limit */
 
+#define IP6_DEFAULT_MAX_EXT_HDRS_CNT	 32
+
 /*
  *	Addr type
  *	
diff --git a/include/net/netns/ipv6.h b/include/net/netns/ipv6.h
index 34bdb1308e8f..5be4dd1c9ae8 100644
--- a/include/net/netns/ipv6.h
+++ b/include/net/netns/ipv6.h
@@ -54,6 +54,7 @@ struct netns_sysctl_ipv6 {
 	int max_hbh_opts_cnt;
 	int max_dst_opts_len;
 	int max_hbh_opts_len;
+	int max_ext_hdrs_cnt;
 	int seg6_flowlabel;
 	u32 ioam6_id;
 	u64 ioam6_id_wide;
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index 4cbd45b68088..ed7fe6e4a6bd 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -965,6 +965,7 @@ static int __net_init inet6_net_init(struct net *net)
 	net->ipv6.sysctl.flowlabel_state_ranges = 0;
 	net->ipv6.sysctl.max_dst_opts_cnt = IP6_DEFAULT_MAX_DST_OPTS_CNT;
 	net->ipv6.sysctl.max_hbh_opts_cnt = IP6_DEFAULT_MAX_HBH_OPTS_CNT;
+	net->ipv6.sysctl.max_ext_hdrs_cnt = IP6_DEFAULT_MAX_EXT_HDRS_CNT;
 	net->ipv6.sysctl.max_dst_opts_len = IP6_DEFAULT_MAX_DST_OPTS_LEN;
 	net->ipv6.sysctl.max_hbh_opts_len = IP6_DEFAULT_MAX_HBH_OPTS_LEN;
 	net->ipv6.sysctl.fib_notify_on_flag_change = 0;
diff --git a/net/ipv6/exthdrs_core.c b/net/ipv6/exthdrs_core.c
index 49e31e4ae7b7..917307877cbb 100644
--- a/net/ipv6/exthdrs_core.c
+++ b/net/ipv6/exthdrs_core.c
@@ -4,6 +4,8 @@
  * not configured or static.
  */
 #include <linux/export.h>
+
+#include <net/net_namespace.h>
 #include <net/ipv6.h>
 
 /*
@@ -72,7 +74,9 @@ EXPORT_SYMBOL(ipv6_ext_hdr);
 int ipv6_skip_exthdr(const struct sk_buff *skb, int start, u8 *nexthdrp,
 		     __be16 *frag_offp)
 {
+	int exthdr_max = READ_ONCE(init_net.ipv6.sysctl.max_ext_hdrs_cnt);
 	u8 nexthdr = *nexthdrp;
+	int exthdr_cnt = 0;
 
 	*frag_offp = 0;
 
@@ -80,6 +84,8 @@ int ipv6_skip_exthdr(const struct sk_buff *skb, int start, u8 *nexthdrp,
 		struct ipv6_opt_hdr _hdr, *hp;
 		int hdrlen;
 
+		if (unlikely(exthdr_cnt++ >= exthdr_max))
+			return -1;
 		if (nexthdr == NEXTHDR_NONE)
 			return -1;
 		hp = skb_header_pointer(skb, start, sizeof(_hdr), &_hdr);
@@ -188,8 +194,10 @@ EXPORT_SYMBOL_GPL(ipv6_find_tlv);
 int ipv6_find_hdr(const struct sk_buff *skb, unsigned int *offset,
 		  int target, unsigned short *fragoff, int *flags)
 {
+	int exthdr_max = READ_ONCE(init_net.ipv6.sysctl.max_ext_hdrs_cnt);
 	unsigned int start = skb_network_offset(skb) + sizeof(struct ipv6hdr);
 	u8 nexthdr = ipv6_hdr(skb)->nexthdr;
+	int exthdr_cnt = 0;
 	bool found;
 
 	if (fragoff)
@@ -216,6 +224,9 @@ int ipv6_find_hdr(const struct sk_buff *skb, unsigned int *offset,
 			return -ENOENT;
 		}
 
+		if (unlikely(exthdr_cnt++ >= exthdr_max))
+			return -EBADMSG;
+
 		hp = skb_header_pointer(skb, start, sizeof(_hdr), &_hdr);
 		if (!hp)
 			return -EBADMSG;
diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c
index 0b53488a9229..78e849e167ca 100644
--- a/net/ipv6/ip6_tunnel.c
+++ b/net/ipv6/ip6_tunnel.c
@@ -396,15 +396,20 @@ ip6_tnl_dev_uninit(struct net_device *dev)
 
 __u16 ip6_tnl_parse_tlv_enc_lim(struct sk_buff *skb, __u8 *raw)
 {
+	int exthdr_max = READ_ONCE(init_net.ipv6.sysctl.max_ext_hdrs_cnt);
 	const struct ipv6hdr *ipv6h = (const struct ipv6hdr *)raw;
 	unsigned int nhoff = raw - skb->data;
 	unsigned int off = nhoff + sizeof(*ipv6h);
 	u8 nexthdr = ipv6h->nexthdr;
+	int exthdr_cnt = 0;
 
 	while (ipv6_ext_hdr(nexthdr) && nexthdr != NEXTHDR_NONE) {
 		struct ipv6_opt_hdr *hdr;
 		u16 optlen;
 
+		if (unlikely(exthdr_cnt++ >= exthdr_max))
+			break;
+
 		if (!pskb_may_pull(skb, off + sizeof(*hdr)))
 			break;
 
diff --git a/net/ipv6/sysctl_net_ipv6.c b/net/ipv6/sysctl_net_ipv6.c
index d2cd33e2698d..93f865545a7c 100644
--- a/net/ipv6/sysctl_net_ipv6.c
+++ b/net/ipv6/sysctl_net_ipv6.c
@@ -135,6 +135,14 @@ static struct ctl_table ipv6_table_template[] = {
 		.extra1		= SYSCTL_ZERO,
 		.extra2		= &flowlabel_reflect_max,
 	},
+	{
+		.procname	= "max_ext_hdrs_number",
+		.data		= &init_net.ipv6.sysctl.max_ext_hdrs_cnt,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec_minmax,
+		.extra1		= SYSCTL_ONE,
+	},
 	{
 		.procname	= "max_dst_opts_number",
 		.data		= &init_net.ipv6.sysctl.max_dst_opts_cnt,
-- 
2.43.0


^ permalink raw reply related	[flat|nested] 10+ messages in thread

end of thread, other threads:[~2026-04-18 14:15 UTC | newest]

Thread overview: 10+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2026-04-17 17:18 [PATCH net] ipv6: Implement limits on extension header parsing Daniel Borkmann
2026-04-17 21:45 ` Eric Dumazet
2026-04-18 11:45 ` Justin Iurman
2026-04-18 12:26   ` Daniel Borkmann
2026-04-18 12:50     ` Justin Iurman
2026-04-18 12:59       ` Daniel Borkmann
2026-04-18 13:18         ` Justin Iurman
2026-04-18 13:15       ` Eric Dumazet
2026-04-18 13:46         ` Justin Iurman
2026-04-18 14:15           ` Justin Iurman

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox