public inbox for netdev@vger.kernel.org
 help / color / mirror / Atom feed
* [net,v3] ipv6: shorten reassembly timeout under fragment memory pressure
       [not found]     ` <CGME20260211030048epcms1p54c6ed78458f57def8e3163032498ca00@epcms1p7>
@ 2026-02-11 10:19       ` 배석진
  0 siblings, 0 replies; 7+ messages in thread
From: 배석진 @ 2026-02-11 10:19 UTC (permalink / raw)
  To: netdev@vger.kernel.org
  Cc: 배석진, netfilter-devel@vger.kernel.org,
	davem@davemloft.net, edumazet@google.com, pabeni@redhat.com,
	dsahern@kernel.org, kuba@kernel.org, horms@kernel.org,
	phil@nwl.cc, coreteam@netfilter.org, fw@strlen.de,
	pablo@netfilter.org

 Changes in v2:
- Fix build bot error and warnings



From c7940e3dd728fdc58c8199bc031bf3f8f1e8a20f Mon Sep 17 00:00:00 2001
From: Soukjin Bae <soukjin.bae@samsung.com>
Date: Wed, 11 Feb 2026 11:20:23 +0900
Subject: [PATCH] ipv6: shorten reassembly timeout under fragment memory
 pressure

Under heavy IPv6 fragmentation, incomplete fragment queues may persist
for the full reassembly timeout even when fragment memory is under
pressure.

This can lead to prolonged retention of fragment queues that are unlikely
to complete, causing newly arriving fragmented packets to be dropped due
to memory exhaustion.

Introduce an optional mechanism to shorten the IPv6 reassembly timeout
when fragment memory usage exceeds the low threshold. Different timeout
values are applied depending on the upper-layer protocol to balance
eviction speed and completion probability.

Signed-off-by: Soukjin Bae <soukjin.bae@samsung.com>
---
 MAINTAINERS                             |  5 ++
 include/net/inet_frag.h                 |  4 ++
 include/net/ip6_reasm_policy.h          | 17 +++++
 net/ipv6/Kconfig                        | 10 +++
 net/ipv6/netfilter/nf_conntrack_reasm.c | 29 +++++++++
 net/ipv6/reassembly.c                   | 82 +++++++++++++++++++++++++
 6 files changed, 147 insertions(+)
 create mode 100644 include/net/ip6_reasm_policy.h

diff --git a/MAINTAINERS b/MAINTAINERS
index e08767323763..dacaf07080e6 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -28893,3 +28893,8 @@ S:	Buried alive in reporters
 T:	git git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
 F:	*
 F:	*/
+
+ADJUSTABLE FRAGMENT TIMER
+M:	Soukjin Bae <soukjin.bae@samsung.com>
+S:	Maintained
+F:	include/net/ip6_reasm_policy.h
\ No newline at end of file
diff --git a/include/net/inet_frag.h b/include/net/inet_frag.h
index 365925c9d262..0fc88ef61ca3 100644
--- a/include/net/inet_frag.h
+++ b/include/net/inet_frag.h
@@ -15,6 +15,10 @@ struct fqdir {
 	long			high_thresh;
 	long			low_thresh;
 	int			timeout;
+#ifdef CONFIG_IPV6_FRAG_TIMER_ADJ
+	int			timeout_failed_tcp;
+	int			timeout_failed_udp;
+#endif
 	int			max_dist;
 	struct inet_frags	*f;
 	struct net		*net;
diff --git a/include/net/ip6_reasm_policy.h b/include/net/ip6_reasm_policy.h
new file mode 100644
index 000000000000..994482a03bc0
--- /dev/null
+++ b/include/net/ip6_reasm_policy.h
@@ -0,0 +1,17 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _NET_IP6_REASM_POLICY_H
+#define _NET_IP6_REASM_POLICY_H
+
+struct sk_buff;
+struct frag_queue;
+
+void ip6_reasm_adjust_timer(struct frag_queue *fq,
+			    struct sk_buff *skb, int nhoff);
+
+/*
+ * Default IPv6 reassembly timeouts under fragment memory pressure
+ */
+#define IPV6_REASM_TIMEOUT_FAILED_TCP	3	/* 3 seconds */
+#define IPV6_REASM_TIMEOUT_FAILED_UDP	1	/* 1 second */
+
+#endif /* _NET_IP6_REASM_POLICY_H */
diff --git a/net/ipv6/Kconfig b/net/ipv6/Kconfig
index b8f9a8c0302e..6e8db60f6a4d 100644
--- a/net/ipv6/Kconfig
+++ b/net/ipv6/Kconfig
@@ -340,4 +340,14 @@ config IPV6_IOAM6_LWTUNNEL
 
 	  If unsure, say N.
 
+config IPV6_FRAG_TIMER_ADJ
+	bool "IPv6: Adjust reassembly timer on buffer starvation"
+	default n
+	help
+	  Enable dynamic adjustment of the IPv6 reassembly timer when the
+	  fragment memory usage exceeds the low threshold. This helps to
+	  quickly evict incomplete fragment queues, making room for new
+	  incoming fragments such as latency-sensitive IMS traffic.
+	  If unsure, say N.
+
 endif # IPV6
diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c
index 64ab23ff559b..39902fbd53aa 100644
--- a/net/ipv6/netfilter/nf_conntrack_reasm.c
+++ b/net/ipv6/netfilter/nf_conntrack_reasm.c
@@ -30,6 +30,7 @@
 #include <linux/module.h>
 #include <net/netfilter/ipv6/nf_defrag_ipv6.h>
 #include <net/netns/generic.h>
+#include <net/ip6_reasm_policy.h>
 
 static const char nf_frags_cache_name[] = "nf-frags";
 
@@ -62,6 +63,20 @@ static struct ctl_table nf_ct_frag6_sysctl_table[] = {
 		.mode		= 0644,
 		.proc_handler	= proc_doulongvec_minmax,
 	},
+#ifdef CONFIG_IPV6_FRAG_TIMER_ADJ
+	{
+		.procname	= "nf_conntrack_frag6_timeout_failed_tcp",
+		.maxlen		= sizeof(unsigned int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec_jiffies,
+	},
+	{
+		.procname	= "nf_conntrack_frag6_timeout_failed_udp",
+		.maxlen		= sizeof(unsigned int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec_jiffies,
+	},
+#endif
 };
 
 static int nf_ct_frag6_sysctl_register(struct net *net)
@@ -85,6 +100,10 @@ static int nf_ct_frag6_sysctl_register(struct net *net)
 	table[1].extra2	= &nf_frag->fqdir->high_thresh;
 	table[2].data	= &nf_frag->fqdir->high_thresh;
 	table[2].extra1	= &nf_frag->fqdir->low_thresh;
+#ifdef CONFIG_IPV6_FRAG_TIMER_ADJ
+	table[3].data	= &nf_frag->fqdir->timeout_failed_tcp;
+	table[4].data	= &nf_frag->fqdir->timeout_failed_udp;
+#endif
 
 	hdr = register_net_sysctl_sz(net, "net/netfilter", table,
 				     ARRAY_SIZE(nf_ct_frag6_sysctl_table));
@@ -214,6 +233,10 @@ static int nf_ct_frag6_queue(struct frag_queue *fq, struct sk_buff *skb,
 		}
 		fq->q.flags |= INET_FRAG_LAST_IN;
 		fq->q.len = end;
+
+#ifdef CONFIG_IPV6_FRAG_TIMER_ADJ
+		ip6_reasm_adjust_timer(fq, skb, nhoff);
+#endif
 	} else {
 		/* Check if the fragment is rounded to 8 bytes.
 		 * Required by the RFC.
@@ -513,6 +536,12 @@ static int nf_ct_net_init(struct net *net)
 	nf_frag->fqdir->high_thresh = IPV6_FRAG_HIGH_THRESH;
 	nf_frag->fqdir->low_thresh = IPV6_FRAG_LOW_THRESH;
 	nf_frag->fqdir->timeout = IPV6_FRAG_TIMEOUT;
+#ifdef CONFIG_IPV6_FRAG_TIMER_ADJ
+	nf_frag->fqdir->timeout_failed_tcp =
+		IPV6_REASM_TIMEOUT_FAILED_TCP * HZ;
+	nf_frag->fqdir->timeout_failed_udp =
+		IPV6_REASM_TIMEOUT_FAILED_UDP * HZ;
+#endif
 
 	res = nf_ct_frag6_sysctl_register(net);
 	if (res < 0)
diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c
index 25ec8001898d..cdec65461d81 100644
--- a/net/ipv6/reassembly.c
+++ b/net/ipv6/reassembly.c
@@ -57,6 +57,7 @@
 #include <net/addrconf.h>
 #include <net/ipv6_frag.h>
 #include <net/inet_ecn.h>
+#include <net/ip6_reasm_policy.h>
 
 static const char ip6_frag_cache_name[] = "ip6-frags";
 
@@ -104,6 +105,59 @@ fq_find(struct net *net, __be32 id, const struct ipv6hdr *hdr, int iif)
 	return container_of(q, struct frag_queue, q);
 }
 
+#ifdef CONFIG_IPV6_FRAG_TIMER_ADJ
+static u8 ip6_reasm_get_l4proto(struct sk_buff *skb, int nhoff)
+{
+	struct frag_hdr _fhdr, *fhdr;
+	__be16 frag_off;
+	int offset;
+	u8 nexthdr;
+
+	fhdr = skb_header_pointer(skb, nhoff, sizeof(_fhdr), &_fhdr);
+	if (!fhdr)
+		return IPPROTO_NONE;
+
+	nexthdr = fhdr->nexthdr;
+	offset = nhoff + sizeof(struct frag_hdr);
+
+	/* Skip extension headers after fragment header */
+	if (ipv6_skip_exthdr(skb, offset, &nexthdr, &frag_off) < 0)
+		return IPPROTO_NONE;
+
+	return nexthdr;
+}
+
+/**
+ * ip6_reasm_adjust_timer - adjust IPv6 reassembly timer under memory pressure
+ * @fq: fragment queue
+ * @skb: current fragment skb
+ * @nhoff: offset to fragment header
+ *
+ * Shortens reassembly timeout on buffer starvation to
+ * allow faster eviction of incomplete fragment queues.
+ */
+void ip6_reasm_adjust_timer(struct frag_queue *fq,
+			    struct sk_buff *skb, int nhoff)
+{
+	u8 l4proto;
+	unsigned long new_timer;
+
+	if (frag_mem_limit(fq->q.fqdir) < fq->q.fqdir->low_thresh)
+		return;
+
+	l4proto = ip6_reasm_get_l4proto(skb, nhoff);
+
+	if (l4proto == IPPROTO_TCP || l4proto == IPPROTO_ESP)
+		new_timer = fq->q.fqdir->timeout_failed_tcp;
+	else
+		new_timer = fq->q.fqdir->timeout_failed_udp;
+
+	if (time_after(fq->q.timer.expires, jiffies + new_timer))
+		mod_timer(&fq->q.timer, jiffies + new_timer);
+}
+EXPORT_SYMBOL_GPL(ip6_reasm_adjust_timer);
+#endif
+
 static int ip6_frag_queue(struct net *net,
 			  struct frag_queue *fq, struct sk_buff *skb,
 			  struct frag_hdr *fhdr, int nhoff,
@@ -154,6 +208,10 @@ static int ip6_frag_queue(struct net *net,
 			goto discard_fq;
 		fq->q.flags |= INET_FRAG_LAST_IN;
 		fq->q.len = end;
+
+#ifdef CONFIG_IPV6_FRAG_TIMER_ADJ
+		ip6_reasm_adjust_timer(fq, skb, nhoff);
+#endif
 	} else {
 		/* Check if the fragment is rounded to 8 bytes.
 		 * Required by the RFC.
@@ -437,6 +495,20 @@ static struct ctl_table ip6_frags_ns_ctl_table[] = {
 		.mode		= 0644,
 		.proc_handler	= proc_dointvec_jiffies,
 	},
+#ifdef CONFIG_IPV6_FRAG_TIMER_ADJ
+	{
+		.procname	= "ip6frag_timeout_failed_tcp",
+		.maxlen		= sizeof(unsigned int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec_jiffies,
+	},
+	{
+		.procname	= "ip6frag_timeout_failed_udp",
+		.maxlen		= sizeof(unsigned int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec_jiffies,
+	},
+#endif
 };
 
 /* secret interval has been deprecated */
@@ -468,6 +540,10 @@ static int __net_init ip6_frags_ns_sysctl_register(struct net *net)
 	table[1].data	= &net->ipv6.fqdir->low_thresh;
 	table[1].extra2	= &net->ipv6.fqdir->high_thresh;
 	table[2].data	= &net->ipv6.fqdir->timeout;
+#ifdef CONFIG_IPV6_FRAG_TIMER_ADJ
+	table[3].data	= &net->ipv6.fqdir->timeout_failed_tcp;
+	table[4].data	= &net->ipv6.fqdir->timeout_failed_udp;
+#endif
 
 	hdr = register_net_sysctl_sz(net, "net/ipv6", table,
 				     ARRAY_SIZE(ip6_frags_ns_ctl_table));
@@ -538,6 +614,12 @@ static int __net_init ipv6_frags_init_net(struct net *net)
 	net->ipv6.fqdir->high_thresh = IPV6_FRAG_HIGH_THRESH;
 	net->ipv6.fqdir->low_thresh = IPV6_FRAG_LOW_THRESH;
 	net->ipv6.fqdir->timeout = IPV6_FRAG_TIMEOUT;
+#ifdef CONFIG_IPV6_FRAG_TIMER_ADJ
+	net->ipv6.fqdir->timeout_failed_tcp =
+		IPV6_REASM_TIMEOUT_FAILED_TCP * HZ;
+	net->ipv6.fqdir->timeout_failed_udp =
+		IPV6_REASM_TIMEOUT_FAILED_UDP * HZ;
+#endif
 
 	res = ip6_frags_ns_sysctl_register(net);
 	if (res < 0)
-- 
2.25.1


^ permalink raw reply related	[flat|nested] 7+ messages in thread

* [net-next,v3] ipv6: shorten reassembly timeout under fragment memory pressure
       [not found] <CGME20260211030048epcms1p54c6ed78458f57def8e3163032498ca00@epcms1p2>
@ 2026-02-11 10:32 ` 배석진
  2026-02-11 15:10   ` Fernando Fernandez Mancera
  0 siblings, 1 reply; 7+ messages in thread
From: 배석진 @ 2026-02-11 10:32 UTC (permalink / raw)
  To: netdev@vger.kernel.org
  Cc: 배석진, netfilter-devel@vger.kernel.org,
	davem@davemloft.net, edumazet@google.com, pabeni@redhat.com,
	dsahern@kernel.org, kuba@kernel.org, horms@kernel.org,
	phil@nwl.cc, coreteam@netfilter.org, fw@strlen.de,
	pablo@netfilter.org

 Changes in v3:
- Fix build bot error and warnings
- baseline update



From c7940e3dd728fdc58c8199bc031bf3f8f1e8a20f Mon Sep 17 00:00:00 2001
From: Soukjin Bae <soukjin.bae@samsung.com>
Date: Wed, 11 Feb 2026 11:20:23 +0900
Subject: [PATCH] ipv6: shorten reassembly timeout under fragment memory
 pressure

Under heavy IPv6 fragmentation, incomplete fragment queues may persist
for the full reassembly timeout even when fragment memory is under
pressure.

This can lead to prolonged retention of fragment queues that are unlikely
to complete, causing newly arriving fragmented packets to be dropped due
to memory exhaustion.

Introduce an optional mechanism to shorten the IPv6 reassembly timeout
when fragment memory usage exceeds the low threshold. Different timeout
values are applied depending on the upper-layer protocol to balance
eviction speed and completion probability.

Signed-off-by: Soukjin Bae <soukjin.bae@samsung.com>
---
 MAINTAINERS                             |  5 ++
 include/net/inet_frag.h                 |  4 ++
 include/net/ip6_reasm_policy.h          | 17 +++++
 net/ipv6/Kconfig                        | 10 +++
 net/ipv6/netfilter/nf_conntrack_reasm.c | 29 +++++++++
 net/ipv6/reassembly.c                   | 82 +++++++++++++++++++++++++
 6 files changed, 147 insertions(+)
 create mode 100644 include/net/ip6_reasm_policy.h

diff --git a/MAINTAINERS b/MAINTAINERS
index e08767323763..dacaf07080e6 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -28893,3 +28893,8 @@ S:	Buried alive in reporters
 T:	git git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
 F:	*
 F:	*/
+
+ADJUSTABLE FRAGMENT TIMER
+M:	Soukjin Bae <soukjin.bae@samsung.com>
+S:	Maintained
+F:	include/net/ip6_reasm_policy.h
\ No newline at end of file
diff --git a/include/net/inet_frag.h b/include/net/inet_frag.h
index 365925c9d262..0fc88ef61ca3 100644
--- a/include/net/inet_frag.h
+++ b/include/net/inet_frag.h
@@ -15,6 +15,10 @@ struct fqdir {
 	long			high_thresh;
 	long			low_thresh;
 	int			timeout;
+#ifdef CONFIG_IPV6_FRAG_TIMER_ADJ
+	int			timeout_failed_tcp;
+	int			timeout_failed_udp;
+#endif
 	int			max_dist;
 	struct inet_frags	*f;
 	struct net		*net;
diff --git a/include/net/ip6_reasm_policy.h b/include/net/ip6_reasm_policy.h
new file mode 100644
index 000000000000..994482a03bc0
--- /dev/null
+++ b/include/net/ip6_reasm_policy.h
@@ -0,0 +1,17 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _NET_IP6_REASM_POLICY_H
+#define _NET_IP6_REASM_POLICY_H
+
+struct sk_buff;
+struct frag_queue;
+
+void ip6_reasm_adjust_timer(struct frag_queue *fq,
+			    struct sk_buff *skb, int nhoff);
+
+/*
+ * Default IPv6 reassembly timeouts under fragment memory pressure
+ */
+#define IPV6_REASM_TIMEOUT_FAILED_TCP	3	/* 3 seconds */
+#define IPV6_REASM_TIMEOUT_FAILED_UDP	1	/* 1 second */
+
+#endif /* _NET_IP6_REASM_POLICY_H */
diff --git a/net/ipv6/Kconfig b/net/ipv6/Kconfig
index b8f9a8c0302e..6e8db60f6a4d 100644
--- a/net/ipv6/Kconfig
+++ b/net/ipv6/Kconfig
@@ -340,4 +340,14 @@ config IPV6_IOAM6_LWTUNNEL
 
 	  If unsure, say N.
 
+config IPV6_FRAG_TIMER_ADJ
+	bool "IPv6: Adjust reassembly timer on buffer starvation"
+	default n
+	help
+	  Enable dynamic adjustment of the IPv6 reassembly timer when the
+	  fragment memory usage exceeds the low threshold. This helps to
+	  quickly evict incomplete fragment queues, making room for new
+	  incoming fragments such as latency-sensitive IMS traffic.
+	  If unsure, say N.
+
 endif # IPV6
diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c
index 64ab23ff559b..39902fbd53aa 100644
--- a/net/ipv6/netfilter/nf_conntrack_reasm.c
+++ b/net/ipv6/netfilter/nf_conntrack_reasm.c
@@ -30,6 +30,7 @@
 #include <linux/module.h>
 #include <net/netfilter/ipv6/nf_defrag_ipv6.h>
 #include <net/netns/generic.h>
+#include <net/ip6_reasm_policy.h>
 
 static const char nf_frags_cache_name[] = "nf-frags";
 
@@ -62,6 +63,20 @@ static struct ctl_table nf_ct_frag6_sysctl_table[] = {
 		.mode		= 0644,
 		.proc_handler	= proc_doulongvec_minmax,
 	},
+#ifdef CONFIG_IPV6_FRAG_TIMER_ADJ
+	{
+		.procname	= "nf_conntrack_frag6_timeout_failed_tcp",
+		.maxlen		= sizeof(unsigned int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec_jiffies,
+	},
+	{
+		.procname	= "nf_conntrack_frag6_timeout_failed_udp",
+		.maxlen		= sizeof(unsigned int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec_jiffies,
+	},
+#endif
 };
 
 static int nf_ct_frag6_sysctl_register(struct net *net)
@@ -85,6 +100,10 @@ static int nf_ct_frag6_sysctl_register(struct net *net)
 	table[1].extra2	= &nf_frag->fqdir->high_thresh;
 	table[2].data	= &nf_frag->fqdir->high_thresh;
 	table[2].extra1	= &nf_frag->fqdir->low_thresh;
+#ifdef CONFIG_IPV6_FRAG_TIMER_ADJ
+	table[3].data	= &nf_frag->fqdir->timeout_failed_tcp;
+	table[4].data	= &nf_frag->fqdir->timeout_failed_udp;
+#endif
 
 	hdr = register_net_sysctl_sz(net, "net/netfilter", table,
 				     ARRAY_SIZE(nf_ct_frag6_sysctl_table));
@@ -214,6 +233,10 @@ static int nf_ct_frag6_queue(struct frag_queue *fq, struct sk_buff *skb,
 		}
 		fq->q.flags |= INET_FRAG_LAST_IN;
 		fq->q.len = end;
+
+#ifdef CONFIG_IPV6_FRAG_TIMER_ADJ
+		ip6_reasm_adjust_timer(fq, skb, nhoff);
+#endif
 	} else {
 		/* Check if the fragment is rounded to 8 bytes.
 		 * Required by the RFC.
@@ -513,6 +536,12 @@ static int nf_ct_net_init(struct net *net)
 	nf_frag->fqdir->high_thresh = IPV6_FRAG_HIGH_THRESH;
 	nf_frag->fqdir->low_thresh = IPV6_FRAG_LOW_THRESH;
 	nf_frag->fqdir->timeout = IPV6_FRAG_TIMEOUT;
+#ifdef CONFIG_IPV6_FRAG_TIMER_ADJ
+	nf_frag->fqdir->timeout_failed_tcp =
+		IPV6_REASM_TIMEOUT_FAILED_TCP * HZ;
+	nf_frag->fqdir->timeout_failed_udp =
+		IPV6_REASM_TIMEOUT_FAILED_UDP * HZ;
+#endif
 
 	res = nf_ct_frag6_sysctl_register(net);
 	if (res < 0)
diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c
index 25ec8001898d..cdec65461d81 100644
--- a/net/ipv6/reassembly.c
+++ b/net/ipv6/reassembly.c
@@ -57,6 +57,7 @@
 #include <net/addrconf.h>
 #include <net/ipv6_frag.h>
 #include <net/inet_ecn.h>
+#include <net/ip6_reasm_policy.h>
 
 static const char ip6_frag_cache_name[] = "ip6-frags";
 
@@ -104,6 +105,59 @@ fq_find(struct net *net, __be32 id, const struct ipv6hdr *hdr, int iif)
 	return container_of(q, struct frag_queue, q);
 }
 
+#ifdef CONFIG_IPV6_FRAG_TIMER_ADJ
+static u8 ip6_reasm_get_l4proto(struct sk_buff *skb, int nhoff)
+{
+	struct frag_hdr _fhdr, *fhdr;
+	__be16 frag_off;
+	int offset;
+	u8 nexthdr;
+
+	fhdr = skb_header_pointer(skb, nhoff, sizeof(_fhdr), &_fhdr);
+	if (!fhdr)
+		return IPPROTO_NONE;
+
+	nexthdr = fhdr->nexthdr;
+	offset = nhoff + sizeof(struct frag_hdr);
+
+	/* Skip extension headers after fragment header */
+	if (ipv6_skip_exthdr(skb, offset, &nexthdr, &frag_off) < 0)
+		return IPPROTO_NONE;
+
+	return nexthdr;
+}
+
+/**
+ * ip6_reasm_adjust_timer - adjust IPv6 reassembly timer under memory pressure
+ * @fq: fragment queue
+ * @skb: current fragment skb
+ * @nhoff: offset to fragment header
+ *
+ * Shortens reassembly timeout on buffer starvation to
+ * allow faster eviction of incomplete fragment queues.
+ */
+void ip6_reasm_adjust_timer(struct frag_queue *fq,
+			    struct sk_buff *skb, int nhoff)
+{
+	u8 l4proto;
+	unsigned long new_timer;
+
+	if (frag_mem_limit(fq->q.fqdir) < fq->q.fqdir->low_thresh)
+		return;
+
+	l4proto = ip6_reasm_get_l4proto(skb, nhoff);
+
+	if (l4proto == IPPROTO_TCP || l4proto == IPPROTO_ESP)
+		new_timer = fq->q.fqdir->timeout_failed_tcp;
+	else
+		new_timer = fq->q.fqdir->timeout_failed_udp;
+
+	if (time_after(fq->q.timer.expires, jiffies + new_timer))
+		mod_timer(&fq->q.timer, jiffies + new_timer);
+}
+EXPORT_SYMBOL_GPL(ip6_reasm_adjust_timer);
+#endif
+
 static int ip6_frag_queue(struct net *net,
 			  struct frag_queue *fq, struct sk_buff *skb,
 			  struct frag_hdr *fhdr, int nhoff,
@@ -154,6 +208,10 @@ static int ip6_frag_queue(struct net *net,
 			goto discard_fq;
 		fq->q.flags |= INET_FRAG_LAST_IN;
 		fq->q.len = end;
+
+#ifdef CONFIG_IPV6_FRAG_TIMER_ADJ
+		ip6_reasm_adjust_timer(fq, skb, nhoff);
+#endif
 	} else {
 		/* Check if the fragment is rounded to 8 bytes.
 		 * Required by the RFC.
@@ -437,6 +495,20 @@ static struct ctl_table ip6_frags_ns_ctl_table[] = {
 		.mode		= 0644,
 		.proc_handler	= proc_dointvec_jiffies,
 	},
+#ifdef CONFIG_IPV6_FRAG_TIMER_ADJ
+	{
+		.procname	= "ip6frag_timeout_failed_tcp",
+		.maxlen		= sizeof(unsigned int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec_jiffies,
+	},
+	{
+		.procname	= "ip6frag_timeout_failed_udp",
+		.maxlen		= sizeof(unsigned int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec_jiffies,
+	},
+#endif
 };
 
 /* secret interval has been deprecated */
@@ -468,6 +540,10 @@ static int __net_init ip6_frags_ns_sysctl_register(struct net *net)
 	table[1].data	= &net->ipv6.fqdir->low_thresh;
 	table[1].extra2	= &net->ipv6.fqdir->high_thresh;
 	table[2].data	= &net->ipv6.fqdir->timeout;
+#ifdef CONFIG_IPV6_FRAG_TIMER_ADJ
+	table[3].data	= &net->ipv6.fqdir->timeout_failed_tcp;
+	table[4].data	= &net->ipv6.fqdir->timeout_failed_udp;
+#endif
 
 	hdr = register_net_sysctl_sz(net, "net/ipv6", table,
 				     ARRAY_SIZE(ip6_frags_ns_ctl_table));
@@ -538,6 +614,12 @@ static int __net_init ipv6_frags_init_net(struct net *net)
 	net->ipv6.fqdir->high_thresh = IPV6_FRAG_HIGH_THRESH;
 	net->ipv6.fqdir->low_thresh = IPV6_FRAG_LOW_THRESH;
 	net->ipv6.fqdir->timeout = IPV6_FRAG_TIMEOUT;
+#ifdef CONFIG_IPV6_FRAG_TIMER_ADJ
+	net->ipv6.fqdir->timeout_failed_tcp =
+		IPV6_REASM_TIMEOUT_FAILED_TCP * HZ;
+	net->ipv6.fqdir->timeout_failed_udp =
+		IPV6_REASM_TIMEOUT_FAILED_UDP * HZ;
+#endif
 
 	res = ip6_frags_ns_sysctl_register(net);
 	if (res < 0)
-- 
2.25.1


^ permalink raw reply related	[flat|nested] 7+ messages in thread

* Re: [net-next,v3] ipv6: shorten reassembly timeout under fragment memory pressure
  2026-02-11 10:32 ` [net-next,v3] ipv6: shorten reassembly timeout under fragment memory pressure 배석진
@ 2026-02-11 15:10   ` Fernando Fernandez Mancera
       [not found]     ` <CGME20260211030048epcms1p54c6ed78458f57def8e3163032498ca00@epcms1p7>
  2026-02-11 15:36     ` [net-next,v3] " Eric Dumazet
  0 siblings, 2 replies; 7+ messages in thread
From: Fernando Fernandez Mancera @ 2026-02-11 15:10 UTC (permalink / raw)
  To: soukjin.bae, netdev@vger.kernel.org
  Cc: netfilter-devel@vger.kernel.org, davem@davemloft.net,
	edumazet@google.com, pabeni@redhat.com, dsahern@kernel.org,
	kuba@kernel.org, horms@kernel.org, phil@nwl.cc,
	coreteam@netfilter.org, fw@strlen.de, pablo@netfilter.org

On 2/11/26 11:32 AM, 배석진 wrote:
>   Changes in v3:
> - Fix build bot error and warnings
> - baseline update
> 
> 
> 
>  From c7940e3dd728fdc58c8199bc031bf3f8f1e8a20f Mon Sep 17 00:00:00 2001
> From: Soukjin Bae <soukjin.bae@samsung.com>
> Date: Wed, 11 Feb 2026 11:20:23 +0900
> Subject: [PATCH] ipv6: shorten reassembly timeout under fragment memory
>   pressure
> 
> Under heavy IPv6 fragmentation, incomplete fragment queues may persist
> for the full reassembly timeout even when fragment memory is under
> pressure.
> 
> This can lead to prolonged retention of fragment queues that are unlikely
> to complete, causing newly arriving fragmented packets to be dropped due
> to memory exhaustion.
> 
> Introduce an optional mechanism to shorten the IPv6 reassembly timeout
> when fragment memory usage exceeds the low threshold. Different timeout
> values are applied depending on the upper-layer protocol to balance
> eviction speed and completion probability.
> 
> Signed-off-by: Soukjin Bae <soukjin.bae@samsung.com>

Hello,

isn't this what net.ipv6.ip6frag_time does? In addition, the situation 
you described could be overcome by increasing the memory thresholds at 
net.ipv6.ip6frag_low_thresh and net.ipv6.ip6frag_high_thresh.

Please, let me know if I am missing something.

Thanks,
Fernando.

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [net-next,v3] ipv6: shorten reassembly timeout under fragment memory pressure
  2026-02-11 15:10   ` Fernando Fernandez Mancera
       [not found]     ` <CGME20260211030048epcms1p54c6ed78458f57def8e3163032498ca00@epcms1p7>
@ 2026-02-11 15:36     ` Eric Dumazet
  2026-02-12  0:22       ` 배석진
  1 sibling, 1 reply; 7+ messages in thread
From: Eric Dumazet @ 2026-02-11 15:36 UTC (permalink / raw)
  To: Fernando Fernandez Mancera
  Cc: soukjin.bae, netdev@vger.kernel.org,
	netfilter-devel@vger.kernel.org, davem@davemloft.net,
	pabeni@redhat.com, dsahern@kernel.org, kuba@kernel.org,
	horms@kernel.org, phil@nwl.cc, coreteam@netfilter.org,
	fw@strlen.de, pablo@netfilter.org

On Wed, Feb 11, 2026 at 4:11 PM Fernando Fernandez Mancera
<fmancera@suse.de> wrote:
>
> On 2/11/26 11:32 AM, 배석진 wrote:
> >   Changes in v3:
> > - Fix build bot error and warnings
> > - baseline update
> >
> >
> >
> >  From c7940e3dd728fdc58c8199bc031bf3f8f1e8a20f Mon Sep 17 00:00:00 2001
> > From: Soukjin Bae <soukjin.bae@samsung.com>
> > Date: Wed, 11 Feb 2026 11:20:23 +0900
> > Subject: [PATCH] ipv6: shorten reassembly timeout under fragment memory
> >   pressure
> >
> > Under heavy IPv6 fragmentation, incomplete fragment queues may persist
> > for the full reassembly timeout even when fragment memory is under
> > pressure.
> >
> > This can lead to prolonged retention of fragment queues that are unlikely
> > to complete, causing newly arriving fragmented packets to be dropped due
> > to memory exhaustion.
> >
> > Introduce an optional mechanism to shorten the IPv6 reassembly timeout
> > when fragment memory usage exceeds the low threshold. Different timeout
> > values are applied depending on the upper-layer protocol to balance
> > eviction speed and completion probability.
> >
> > Signed-off-by: Soukjin Bae <soukjin.bae@samsung.com>
>
> Hello,
>
> isn't this what net.ipv6.ip6frag_time does? In addition, the situation
> you described could be overcome by increasing the memory thresholds at
> net.ipv6.ip6frag_low_thresh and net.ipv6.ip6frag_high_thresh.
>
> Please, let me know if I am missing something.

Also :

1) net-next is closed.
Please read Documentation/process/maintainer-netdev.rst

2) We do not send 3 versions of a patch in the same day.
Please read Documentation/process/maintainer-netdev.rst

3) What about IPv4 ?

4) Only the first fragment contains the 'protocol of the whole
datagram', and fragments can be received in any order.

5) We do not add a MAINTAINER entry for such a patch, sorry.

^ permalink raw reply	[flat|nested] 7+ messages in thread

* RE:(2) [net-next,v3] ipv6: shorten reassembly timeout under fragment memory pressure
  2026-02-11 15:36     ` [net-next,v3] " Eric Dumazet
@ 2026-02-12  0:22       ` 배석진
  2026-02-12  7:16         ` (2) " Eric Dumazet
  0 siblings, 1 reply; 7+ messages in thread
From: 배석진 @ 2026-02-12  0:22 UTC (permalink / raw)
  To: Eric Dumazet, Fernando Fernandez Mancera
  Cc: netdev@vger.kernel.org, netfilter-devel@vger.kernel.org,
	davem@davemloft.net, pabeni@redhat.com, dsahern@kernel.org,
	kuba@kernel.org, horms@kernel.org, phil@nwl.cc,
	coreteam@netfilter.org, fw@strlen.de, pablo@netfilter.org

>On Wed, Feb 11, 2026 at 4:11 PM Fernando Fernandez Mancera
><fmancera@suse.de> wrote:
>>
>> On 2/11/26 11:32 AM, 배석진 wrote:
>> >   Changes in v3:
>> > - Fix build bot error and warnings
>> > - baseline update
>> >
>> >
>> >
>> >  From c7940e3dd728fdc58c8199bc031bf3f8f1e8a20f Mon Sep 17 00:00:00 2001
>> > From: Soukjin Bae <soukjin.bae@samsung.com>
>> > Date: Wed, 11 Feb 2026 11:20:23 +0900
>> > Subject: [PATCH] ipv6: shorten reassembly timeout under fragment memory
>> >   pressure
>> >
>> > Under heavy IPv6 fragmentation, incomplete fragment queues may persist
>> > for the full reassembly timeout even when fragment memory is under
>> > pressure.
>> >
>> > This can lead to prolonged retention of fragment queues that are unlikely
>> > to complete, causing newly arriving fragmented packets to be dropped due
>> > to memory exhaustion.
>> >
>> > Introduce an optional mechanism to shorten the IPv6 reassembly timeout
>> > when fragment memory usage exceeds the low threshold. Different timeout
>> > values are applied depending on the upper-layer protocol to balance
>> > eviction speed and completion probability.
>> >
>> > Signed-off-by: Soukjin Bae <soukjin.bae@samsung.com>
>>
>> Hello,
>>
>> isn't this what net.ipv6.ip6frag_time does? In addition, the situation
>> you described could be overcome by increasing the memory thresholds at
>> net.ipv6.ip6frag_low_thresh and net.ipv6.ip6frag_high_thresh.
>>
>> Please, let me know if I am missing something.
>
>Also :
>
>1) net-next is closed.
>Please read Documentation/process/maintainer-netdev.rst
>
>2) We do not send 3 versions of a patch in the same day.
>Please read Documentation/process/maintainer-netdev.rst
>
>3) What about IPv4 ?
>
>4) Only the first fragment contains the 'protocol of the whole
>datagram', and fragments can be received in any order.
>
>5) We do not add a MAINTAINER entry for such a patch, sorry.


Hello,


Regarding about net.ipv6.ip6frag_time and low/high_thresh:

The issue we are addressing currently occurs due to a large volume of mDNS
traffic from WiFi APs. As a temporary measure, we increased the high_thresh
value to accommodate the traffic.

However, UDP traffic such as mDNS cannot recover once a fragment stream is
lost, leading to wasted memory. Therefore, this patch is intended to make
more efficient use of the currently allocated fragment memory by shortening
the reassembly timeout under memory pressure.

Also, we tend to avoid changing the global value of ip6frag_time to
preserve existing behavior. This is why I added new config too.


Regarding others:

1, 2) net-next is closed and multiple patch 
I apologize for the oversight regarding the net-next status and the frequent
submissions. I was tried to fix CI build failures. I will follow the
documented guidance going forward.

3) What about IPv4?
The issue was primarily observed in IPv6-dominant IMS environments, which
was the initial focus. However, I agree that the same memory management logic
is beneficial for IPv4. I will include IPv4 support in the next version to
provide a unified solution.

4) Only the first fragment contains the 'protocol of the whole datagram'.
You are correct. I will update the logic to store the L4 protocol information
once the first fragment is received, and only then apply the adjusted timeout.
If the first fragment is lost, the adjusted timer will not be triggered, but
this is acceptable as a partial case.

5) MAINTAINERS entry
Understood. I will remove the MAINTAINERS entry. I was added to address a
checkpatch.pl warning and CI build failure.


^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: (2) [net-next,v3] ipv6: shorten reassembly timeout under fragment memory pressure
  2026-02-12  0:22       ` 배석진
@ 2026-02-12  7:16         ` Eric Dumazet
  2026-02-12  8:15           ` 배석진
  0 siblings, 1 reply; 7+ messages in thread
From: Eric Dumazet @ 2026-02-12  7:16 UTC (permalink / raw)
  To: soukjin.bae
  Cc: Fernando Fernandez Mancera, netdev@vger.kernel.org,
	netfilter-devel@vger.kernel.org, davem@davemloft.net,
	pabeni@redhat.com, dsahern@kernel.org, kuba@kernel.org,
	horms@kernel.org, phil@nwl.cc, coreteam@netfilter.org,
	fw@strlen.de, pablo@netfilter.org

On Thu, Feb 12, 2026 at 1:22 AM 배석진 <soukjin.bae@samsung.com> wrote:
>
> >On Wed, Feb 11, 2026 at 4:11 PM Fernando Fernandez Mancera
> ><fmancera@suse.de> wrote:
> >>
> >> On 2/11/26 11:32 AM, 배석진 wrote:
> >> >   Changes in v3:
> >> > - Fix build bot error and warnings
> >> > - baseline update
> >> >
> >> >
> >> >
> >> >  From c7940e3dd728fdc58c8199bc031bf3f8f1e8a20f Mon Sep 17 00:00:00 2001
> >> > From: Soukjin Bae <soukjin.bae@samsung.com>
> >> > Date: Wed, 11 Feb 2026 11:20:23 +0900
> >> > Subject: [PATCH] ipv6: shorten reassembly timeout under fragment memory
> >> >   pressure
> >> >
> >> > Under heavy IPv6 fragmentation, incomplete fragment queues may persist
> >> > for the full reassembly timeout even when fragment memory is under
> >> > pressure.
> >> >
> >> > This can lead to prolonged retention of fragment queues that are unlikely
> >> > to complete, causing newly arriving fragmented packets to be dropped due
> >> > to memory exhaustion.
> >> >
> >> > Introduce an optional mechanism to shorten the IPv6 reassembly timeout
> >> > when fragment memory usage exceeds the low threshold. Different timeout
> >> > values are applied depending on the upper-layer protocol to balance
> >> > eviction speed and completion probability.
> >> >
> >> > Signed-off-by: Soukjin Bae <soukjin.bae@samsung.com>
> >>
> >> Hello,
> >>
> >> isn't this what net.ipv6.ip6frag_time does? In addition, the situation
> >> you described could be overcome by increasing the memory thresholds at
> >> net.ipv6.ip6frag_low_thresh and net.ipv6.ip6frag_high_thresh.
> >>
> >> Please, let me know if I am missing something.
> >
> >Also :
> >
> >1) net-next is closed.
> >Please read Documentation/process/maintainer-netdev.rst
> >
> >2) We do not send 3 versions of a patch in the same day.
> >Please read Documentation/process/maintainer-netdev.rst
> >
> >3) What about IPv4 ?
> >
> >4) Only the first fragment contains the 'protocol of the whole
> >datagram', and fragments can be received in any order.
> >
> >5) We do not add a MAINTAINER entry for such a patch, sorry.
>
>
> Hello,
>
>
> Regarding about net.ipv6.ip6frag_time and low/high_thresh:
>
> The issue we are addressing currently occurs due to a large volume of mDNS
> traffic from WiFi APs. As a temporary measure, we increased the high_thresh
> value to accommodate the traffic.
>
> However, UDP traffic such as mDNS cannot recover once a fragment stream is
> lost, leading to wasted memory. Therefore, this patch is intended to make
> more efficient use of the currently allocated fragment memory by shortening
> the reassembly timeout under memory pressure.
>
> Also, we tend to avoid changing the global value of ip6frag_time to
> preserve existing behavior. This is why I added new config too.
>
>
> Regarding others:
>
> 1, 2) net-next is closed and multiple patch
> I apologize for the oversight regarding the net-next status and the frequent
> submissions. I was tried to fix CI build failures. I will follow the
> documented guidance going forward.
>
> 3) What about IPv4?
> The issue was primarily observed in IPv6-dominant IMS environments, which
> was the initial focus. However, I agree that the same memory management logic
> is beneficial for IPv4. I will include IPv4 support in the next version to
> provide a unified solution.
>
> 4) Only the first fragment contains the 'protocol of the whole datagram'.
> You are correct. I will update the logic to store the L4 protocol information
> once the first fragment is received, and only then apply the adjusted timeout.
> If the first fragment is lost, the adjusted timer will not be triggered, but
> this is acceptable as a partial case.

I do not think we will accept a patch trying to 'fix' reassembly. This
is fundamentally not fixable.

I can tell you that in crowded wifi environments, I have seen delays
of 30 seconds (and more) to complete
a datagram of only 2 fragments. Your default settings are not viable.

If you depend on receiving fragments, change the existing tunables.
Instead of :

/proc/sys/net/ipv6/ip6frag_high_thresh:4194304  (4 MB)
/proc/sys/net/ipv6/ip6frag_low_thresh:3145728 (3 MB)
/proc/sys/net/ipv6/ip6frag_time:60

Use:

/proc/sys/net/ipv6/ip6frag_high_thresh:104857600  (100 MB)
/proc/sys/net/ipv6/ip6frag_low_thresh:78643200 (75 MB)
/proc/sys/net/ipv6/ip6frag_time:60

Of course, avoiding fragments is the right solution.

https://datatracker.ietf.org/doc/html/rfc6762#section-17

TCP should avoid using frags by default.

^ permalink raw reply	[flat|nested] 7+ messages in thread

* RE:(4) [net-next,v3] ipv6: shorten reassembly timeout under fragment memory pressure
  2026-02-12  7:16         ` (2) " Eric Dumazet
@ 2026-02-12  8:15           ` 배석진
  0 siblings, 0 replies; 7+ messages in thread
From: 배석진 @ 2026-02-12  8:15 UTC (permalink / raw)
  To: Eric Dumazet
  Cc: Fernando Fernandez Mancera, netdev@vger.kernel.org,
	netfilter-devel@vger.kernel.org, davem@davemloft.net,
	pabeni@redhat.com, dsahern@kernel.org, kuba@kernel.org,
	horms@kernel.org, phil@nwl.cc, coreteam@netfilter.org,
	fw@strlen.de, pablo@netfilter.org

Hello,

Thank you for the detailed explanation and I understand your concerns.
Given your feedback, we will reconsider this approach and rely on
the existing sysctl tunables instead.

Thank you for the guidance.

^ permalink raw reply	[flat|nested] 7+ messages in thread

end of thread, other threads:[~2026-02-12  8:15 UTC | newest]

Thread overview: 7+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
     [not found] <CGME20260211030048epcms1p54c6ed78458f57def8e3163032498ca00@epcms1p2>
2026-02-11 10:32 ` [net-next,v3] ipv6: shorten reassembly timeout under fragment memory pressure 배석진
2026-02-11 15:10   ` Fernando Fernandez Mancera
     [not found]     ` <CGME20260211030048epcms1p54c6ed78458f57def8e3163032498ca00@epcms1p7>
2026-02-11 10:19       ` [net,v3] " 배석진
2026-02-11 15:36     ` [net-next,v3] " Eric Dumazet
2026-02-12  0:22       ` 배석진
2026-02-12  7:16         ` (2) " Eric Dumazet
2026-02-12  8:15           ` 배석진

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox