Netdev List
 help / color / mirror / Atom feed
* [PATCH net-next 0/2] net/sched: refine fq_codel memory limits
@ 2026-05-12  9:48 Eric Dumazet
  2026-05-12  9:48 ` [PATCH net-next 1/2] net: make is_skb_wmem() available to modules Eric Dumazet
  2026-05-12  9:48 ` [PATCH net-next 2/2] net/sched: fq_codel: local packets no longer count against memory limit Eric Dumazet
  0 siblings, 2 replies; 7+ messages in thread
From: Eric Dumazet @ 2026-05-12  9:48 UTC (permalink / raw)
  To: David S . Miller, Jakub Kicinski, Paolo Abeni
  Cc: Simon Horman, Jamal Hadi Salim, Jiri Pirko,
	Toke Høiland-Jørgensen, netdev, eric.dumazet,
	Eric Dumazet

Packets that are associated with local sockets sk_wmem_alloc
do not really need additional memory control.

First patch makes is_skb_wmem() available to modules.

Second patch uses is_skb_wmem() in fq_codel.

Eric Dumazet (2):
  net: make is_skb_wmem() available to modules
  net/sched: fq_codel: local packets no longer count against memory
    limit

 include/net/sock.h         | 12 +++++++++++-
 include/net/tcp.h          |  1 -
 net/core/skbuff.c          |  2 +-
 net/core/sock.c            |  3 +++
 net/core/sock_destructor.h | 12 ------------
 net/ipv4/inet_fragment.c   |  2 --
 net/ipv4/tcp_output.c      |  1 +
 net/sched/sch_fq_codel.c   |  2 +-
 8 files changed, 17 insertions(+), 18 deletions(-)
 delete mode 100644 net/core/sock_destructor.h

-- 
2.54.0.563.g4f69b47b94-goog


^ permalink raw reply	[flat|nested] 7+ messages in thread

* [PATCH net-next 1/2] net: make is_skb_wmem() available to modules
  2026-05-12  9:48 [PATCH net-next 0/2] net/sched: refine fq_codel memory limits Eric Dumazet
@ 2026-05-12  9:48 ` Eric Dumazet
  2026-05-12 12:06   ` Toke Høiland-Jørgensen
  2026-05-12  9:48 ` [PATCH net-next 2/2] net/sched: fq_codel: local packets no longer count against memory limit Eric Dumazet
  1 sibling, 1 reply; 7+ messages in thread
From: Eric Dumazet @ 2026-05-12  9:48 UTC (permalink / raw)
  To: David S . Miller, Jakub Kicinski, Paolo Abeni
  Cc: Simon Horman, Jamal Hadi Salim, Jiri Pirko,
	Toke Høiland-Jørgensen, netdev, eric.dumazet,
	Eric Dumazet

Following patch will use is_skb_wmem() from fq_codel.

Provide __sock_wfree() only if CONFIG_INET=y

Signed-off-by: Eric Dumazet <edumazet@google.com>
---
 include/net/sock.h         | 12 +++++++++++-
 include/net/tcp.h          |  1 -
 net/core/skbuff.c          |  2 +-
 net/core/sock.c            |  3 +++
 net/core/sock_destructor.h | 12 ------------
 net/ipv4/inet_fragment.c   |  2 --
 net/ipv4/tcp_output.c      |  1 +
 7 files changed, 16 insertions(+), 17 deletions(-)
 delete mode 100644 net/core/sock_destructor.h

diff --git a/include/net/sock.h b/include/net/sock.h
index dccd3738c3687056b67c8de44fce9842dcc365ec..219917959b54582fdd3d78e8042e1bdfc43decd0 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -1850,8 +1850,18 @@ static inline struct sock *sk_clone_lock(const struct sock *sk, const gfp_t prio
 
 struct sk_buff *sock_wmalloc(struct sock *sk, unsigned long size, int force,
 			     gfp_t priority);
-void __sock_wfree(struct sk_buff *skb);
 void sock_wfree(struct sk_buff *skb);
+#ifdef CONFIG_INET
+void __sock_wfree(struct sk_buff *skb);
+void tcp_wfree(struct sk_buff *skb);
+#endif
+static inline bool is_skb_wmem(const struct sk_buff *skb)
+{
+	return skb->destructor == sock_wfree ||
+	       (IS_ENABLED(CONFIG_INET) && skb->destructor == __sock_wfree) ||
+	       (IS_ENABLED(CONFIG_INET) && skb->destructor == tcp_wfree);
+}
+
 struct sk_buff *sock_omalloc(struct sock *sk, unsigned long size,
 			     gfp_t priority);
 void skb_orphan_partial(struct sk_buff *skb);
diff --git a/include/net/tcp.h b/include/net/tcp.h
index 3c4e6adb0dbdcefdfa6c95d1738e2039dede3f32..5715e60c519a79232a3bed6682aa2c9cf97be22a 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -390,7 +390,6 @@ static inline bool tcp_release_cb_cond(struct sock *sk)
 	return false;
 }
 
-void tcp_wfree(struct sk_buff *skb);
 void tcp_write_timer_handler(struct sock *sk);
 void tcp_delack_timer_handler(struct sock *sk);
 int tcp_ioctl(struct sock *sk, int cmd, int *karg);
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 7dad68e3b5186cf622a3ed5a6e87c09d46bc3fd6..11871836aa88ea1f16414a94628eb50336cd9653 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -78,6 +78,7 @@
 #include <net/mpls.h>
 #include <net/mptcp.h>
 #include <net/mctp.h>
+#include <net/tcp.h>
 #include <net/can.h>
 #include <net/page_pool/helpers.h>
 #include <net/psp/types.h>
@@ -96,7 +97,6 @@
 #include "devmem.h"
 #include "net-sysfs.h"
 #include "netmem_priv.h"
-#include "sock_destructor.h"
 
 #ifdef CONFIG_SKB_EXTENSIONS
 static struct kmem_cache *skbuff_ext_cache __ro_after_init;
diff --git a/net/core/sock.c b/net/core/sock.c
index b37b664b6eb92f375d6708a5a609f35f07ee2897..f362e3ce1efb672bc463c54b7a1b8f05785cba45 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -2708,6 +2708,7 @@ EXPORT_SYMBOL(sock_wfree);
 /* This variant of sock_wfree() is used by TCP,
  * since it sets SOCK_USE_WRITE_QUEUE.
  */
+#ifdef CONFIG_INET
 void __sock_wfree(struct sk_buff *skb)
 {
 	struct sock *sk = skb->sk;
@@ -2715,6 +2716,8 @@ void __sock_wfree(struct sk_buff *skb)
 	if (refcount_sub_and_test(skb->truesize, &sk->sk_wmem_alloc))
 		__sk_free(sk);
 }
+EXPORT_SYMBOL_GPL(__sock_wfree);
+#endif
 
 void skb_set_owner_w(struct sk_buff *skb, struct sock *sk)
 {
diff --git a/net/core/sock_destructor.h b/net/core/sock_destructor.h
deleted file mode 100644
index 2f396e6bfba5a364727f98b160bf2fd3a3731958..0000000000000000000000000000000000000000
--- a/net/core/sock_destructor.h
+++ /dev/null
@@ -1,12 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-#ifndef _NET_CORE_SOCK_DESTRUCTOR_H
-#define _NET_CORE_SOCK_DESTRUCTOR_H
-#include <net/tcp.h>
-
-static inline bool is_skb_wmem(const struct sk_buff *skb)
-{
-	return skb->destructor == sock_wfree ||
-	       skb->destructor == __sock_wfree ||
-	       (IS_ENABLED(CONFIG_INET) && skb->destructor == tcp_wfree);
-}
-#endif
diff --git a/net/ipv4/inet_fragment.c b/net/ipv4/inet_fragment.c
index 393770920abd794a47a933f2694afe3e2d08d25a..86b100694659ee51292625216113f9411b98a351 100644
--- a/net/ipv4/inet_fragment.c
+++ b/net/ipv4/inet_fragment.c
@@ -24,8 +24,6 @@
 #include <net/ip.h>
 #include <net/ipv6.h>
 
-#include "../core/sock_destructor.h"
-
 /* Use skb->cb to track consecutive/adjacent fragments coming at
  * the end of the queue. Nodes in the rb-tree queue will
  * contain "runs" of one or more adjacent fragments.
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 75eb18a2ad4e7ee126806e7a7cbf3d27a452cba5..47647a99e32443a3925a9a160a2da58f530e32e1 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -1415,6 +1415,7 @@ void tcp_wfree(struct sk_buff *skb)
 out:
 	sk_free(sk);
 }
+EXPORT_SYMBOL_GPL(tcp_wfree);
 
 /* Note: Called under soft irq.
  * We can call TCP stack right away, unless socket is owned by user.
-- 
2.54.0.563.g4f69b47b94-goog


^ permalink raw reply related	[flat|nested] 7+ messages in thread

* [PATCH net-next 2/2] net/sched: fq_codel: local packets no longer count against memory limit
  2026-05-12  9:48 [PATCH net-next 0/2] net/sched: refine fq_codel memory limits Eric Dumazet
  2026-05-12  9:48 ` [PATCH net-next 1/2] net: make is_skb_wmem() available to modules Eric Dumazet
@ 2026-05-12  9:48 ` Eric Dumazet
  2026-05-12 12:11   ` Toke Høiland-Jørgensen
  1 sibling, 1 reply; 7+ messages in thread
From: Eric Dumazet @ 2026-05-12  9:48 UTC (permalink / raw)
  To: David S . Miller, Jakub Kicinski, Paolo Abeni
  Cc: Simon Horman, Jamal Hadi Salim, Jiri Pirko,
	Toke Høiland-Jørgensen, netdev, eric.dumazet,
	Eric Dumazet

Commit 95b58430abe7 ("fq_codel: add memory limitation per queue")
claimed that the 32Mb default was "reasonable even for heavy duty usages."

In practice, this is not the case.

Packets that are associated with local sockets sk_wmem_alloc
do not really need additional memory control.

Signed-off-by: Eric Dumazet <edumazet@google.com>
---
 net/sched/sch_fq_codel.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/sched/sch_fq_codel.c b/net/sched/sch_fq_codel.c
index 1b1de693d4c64a1f5f4e9e788371829dea91740e..71107dc52be799a14f370f2ad74d2eadd93992c1 100644
--- a/net/sched/sch_fq_codel.c
+++ b/net/sched/sch_fq_codel.c
@@ -212,7 +212,7 @@ static int fq_codel_enqueue(struct sk_buff *skb, struct Qdisc *sch,
 		q->new_flow_count++;
 		WRITE_ONCE(flow->deficit, q->quantum);
 	}
-	get_codel_cb(skb)->mem_usage = skb->truesize;
+	get_codel_cb(skb)->mem_usage = is_skb_wmem(skb) ? 0 : skb->truesize;
 	q->memory_usage += get_codel_cb(skb)->mem_usage;
 	memory_limited = q->memory_usage > q->memory_limit;
 	qdisc_qlen_inc(sch);
-- 
2.54.0.563.g4f69b47b94-goog


^ permalink raw reply related	[flat|nested] 7+ messages in thread

* Re: [PATCH net-next 1/2] net: make is_skb_wmem() available to modules
  2026-05-12  9:48 ` [PATCH net-next 1/2] net: make is_skb_wmem() available to modules Eric Dumazet
@ 2026-05-12 12:06   ` Toke Høiland-Jørgensen
  0 siblings, 0 replies; 7+ messages in thread
From: Toke Høiland-Jørgensen @ 2026-05-12 12:06 UTC (permalink / raw)
  To: Eric Dumazet, David S . Miller, Jakub Kicinski, Paolo Abeni
  Cc: Simon Horman, Jamal Hadi Salim, Jiri Pirko, netdev, eric.dumazet,
	Eric Dumazet

Eric Dumazet <edumazet@google.com> writes:

> Following patch will use is_skb_wmem() from fq_codel.
>
> Provide __sock_wfree() only if CONFIG_INET=y
>
> Signed-off-by: Eric Dumazet <edumazet@google.com>

Reviewed-by: Toke Høiland-Jørgensen <toke@toke.dk>

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH net-next 2/2] net/sched: fq_codel: local packets no longer count against memory limit
  2026-05-12  9:48 ` [PATCH net-next 2/2] net/sched: fq_codel: local packets no longer count against memory limit Eric Dumazet
@ 2026-05-12 12:11   ` Toke Høiland-Jørgensen
  2026-05-12 17:50     ` Eric Dumazet
  0 siblings, 1 reply; 7+ messages in thread
From: Toke Høiland-Jørgensen @ 2026-05-12 12:11 UTC (permalink / raw)
  To: Eric Dumazet, David S . Miller, Jakub Kicinski, Paolo Abeni
  Cc: Simon Horman, Jamal Hadi Salim, Jiri Pirko, netdev, eric.dumazet,
	Eric Dumazet

Eric Dumazet <edumazet@google.com> writes:

> Commit 95b58430abe7 ("fq_codel: add memory limitation per queue")
> claimed that the 32Mb default was "reasonable even for heavy duty usages."
>
> In practice, this is not the case.

Well, the assumption lasted a decade, so that's pretty good? :)

> Packets that are associated with local sockets sk_wmem_alloc
> do not really need additional memory control.
>
> Signed-off-by: Eric Dumazet <edumazet@google.com>
> ---
>  net/sched/sch_fq_codel.c | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
>
> diff --git a/net/sched/sch_fq_codel.c b/net/sched/sch_fq_codel.c
> index 1b1de693d4c64a1f5f4e9e788371829dea91740e..71107dc52be799a14f370f2ad74d2eadd93992c1 100644
> --- a/net/sched/sch_fq_codel.c
> +++ b/net/sched/sch_fq_codel.c
> @@ -212,7 +212,7 @@ static int fq_codel_enqueue(struct sk_buff *skb, struct Qdisc *sch,
>  		q->new_flow_count++;
>  		WRITE_ONCE(flow->deficit, q->quantum);
>  	}
> -	get_codel_cb(skb)->mem_usage = skb->truesize;
> +	get_codel_cb(skb)->mem_usage = is_skb_wmem(skb) ? 0 : skb->truesize;
>  	q->memory_usage += get_codel_cb(skb)->mem_usage;

Only one concern here: q->memory_usage is exposed to userspace in the
stats, so this will look like the packets queued are zero-length to
anyone watching, which may end up confusing folks? Also, there will be
no way to see how many bytes are actually in the qdisc.

Should we keep a separate counter so we can still accurately report the
memory usage to userspace?

-Toke

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH net-next 2/2] net/sched: fq_codel: local packets no longer count against memory limit
  2026-05-12 12:11   ` Toke Høiland-Jørgensen
@ 2026-05-12 17:50     ` Eric Dumazet
  2026-05-12 18:15       ` Toke Høiland-Jørgensen
  0 siblings, 1 reply; 7+ messages in thread
From: Eric Dumazet @ 2026-05-12 17:50 UTC (permalink / raw)
  To: Toke Høiland-Jørgensen
  Cc: David S . Miller, Jakub Kicinski, Paolo Abeni, Simon Horman,
	Jamal Hadi Salim, Jiri Pirko, netdev, eric.dumazet

On Tue, May 12, 2026 at 5:11 AM Toke Høiland-Jørgensen <toke@toke.dk> wrote:
>
> Eric Dumazet <edumazet@google.com> writes:
>
> > Commit 95b58430abe7 ("fq_codel: add memory limitation per queue")
> > claimed that the 32Mb default was "reasonable even for heavy duty usages."
> >
> > In practice, this is not the case.
>
> Well, the assumption lasted a decade, so that's pretty good? :)
>
> > Packets that are associated with local sockets sk_wmem_alloc
> > do not really need additional memory control.
> >
> > Signed-off-by: Eric Dumazet <edumazet@google.com>
> > ---
> >  net/sched/sch_fq_codel.c | 2 +-
> >  1 file changed, 1 insertion(+), 1 deletion(-)
> >
> > diff --git a/net/sched/sch_fq_codel.c b/net/sched/sch_fq_codel.c
> > index 1b1de693d4c64a1f5f4e9e788371829dea91740e..71107dc52be799a14f370f2ad74d2eadd93992c1 100644
> > --- a/net/sched/sch_fq_codel.c
> > +++ b/net/sched/sch_fq_codel.c
> > @@ -212,7 +212,7 @@ static int fq_codel_enqueue(struct sk_buff *skb, struct Qdisc *sch,
> >               q->new_flow_count++;
> >               WRITE_ONCE(flow->deficit, q->quantum);
> >       }
> > -     get_codel_cb(skb)->mem_usage = skb->truesize;
> > +     get_codel_cb(skb)->mem_usage = is_skb_wmem(skb) ? 0 : skb->truesize;
> >       q->memory_usage += get_codel_cb(skb)->mem_usage;
>
> Only one concern here: q->memory_usage is exposed to userspace in the
> stats, so this will look like the packets queued are zero-length to
> anyone watching, which may end up confusing folks? Also, there will be
> no way to see how many bytes are actually in the qdisc.

Standard qdisc stats show the amount of bytes and packets.

None of the other qdiscs seem to care (except cake which copy/pasted fq_codel)

>
> Should we keep a separate counter so we can still accurately report the
> memory usage to userspace?

Only purpose of mem usage counter was to have an idea of how close of
the 32Mb limit we were,
this is still reported for forwarded packets (router workload)

All these counters take precious resources, I would rather not add another one.

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH net-next 2/2] net/sched: fq_codel: local packets no longer count against memory limit
  2026-05-12 17:50     ` Eric Dumazet
@ 2026-05-12 18:15       ` Toke Høiland-Jørgensen
  0 siblings, 0 replies; 7+ messages in thread
From: Toke Høiland-Jørgensen @ 2026-05-12 18:15 UTC (permalink / raw)
  To: Eric Dumazet
  Cc: David S . Miller, Jakub Kicinski, Paolo Abeni, Simon Horman,
	Jamal Hadi Salim, Jiri Pirko, netdev, eric.dumazet

Eric Dumazet <edumazet@google.com> writes:

> On Tue, May 12, 2026 at 5:11 AM Toke Høiland-Jørgensen <toke@toke.dk> wrote:
>>
>> Eric Dumazet <edumazet@google.com> writes:
>>
>> > Commit 95b58430abe7 ("fq_codel: add memory limitation per queue")
>> > claimed that the 32Mb default was "reasonable even for heavy duty usages."
>> >
>> > In practice, this is not the case.
>>
>> Well, the assumption lasted a decade, so that's pretty good? :)
>>
>> > Packets that are associated with local sockets sk_wmem_alloc
>> > do not really need additional memory control.
>> >
>> > Signed-off-by: Eric Dumazet <edumazet@google.com>
>> > ---
>> >  net/sched/sch_fq_codel.c | 2 +-
>> >  1 file changed, 1 insertion(+), 1 deletion(-)
>> >
>> > diff --git a/net/sched/sch_fq_codel.c b/net/sched/sch_fq_codel.c
>> > index 1b1de693d4c64a1f5f4e9e788371829dea91740e..71107dc52be799a14f370f2ad74d2eadd93992c1 100644
>> > --- a/net/sched/sch_fq_codel.c
>> > +++ b/net/sched/sch_fq_codel.c
>> > @@ -212,7 +212,7 @@ static int fq_codel_enqueue(struct sk_buff *skb, struct Qdisc *sch,
>> >               q->new_flow_count++;
>> >               WRITE_ONCE(flow->deficit, q->quantum);
>> >       }
>> > -     get_codel_cb(skb)->mem_usage = skb->truesize;
>> > +     get_codel_cb(skb)->mem_usage = is_skb_wmem(skb) ? 0 : skb->truesize;
>> >       q->memory_usage += get_codel_cb(skb)->mem_usage;
>>
>> Only one concern here: q->memory_usage is exposed to userspace in the
>> stats, so this will look like the packets queued are zero-length to
>> anyone watching, which may end up confusing folks? Also, there will be
>> no way to see how many bytes are actually in the qdisc.
>
> Standard qdisc stats show the amount of bytes and packets.
>
> None of the other qdiscs seem to care (except cake which copy/pasted fq_codel)

Right, the cake stats was why I asked. Pretty sure there are people who
watch these; but now that you mention it, I may have confused the
standard backlog_bytes stat field with this one...

>> Should we keep a separate counter so we can still accurately report the
>> memory usage to userspace?
>
> Only purpose of mem usage counter was to have an idea of how close of
> the 32Mb limit we were, this is still reported for forwarded packets
> (router workload)
>
> All these counters take precious resources, I would rather not add another one.

Right, fair (and see above). In that case:

Reviewed-by: Toke Høiland-Jørgensen <toke@toke.dk>


^ permalink raw reply	[flat|nested] 7+ messages in thread

end of thread, other threads:[~2026-05-12 18:15 UTC | newest]

Thread overview: 7+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2026-05-12  9:48 [PATCH net-next 0/2] net/sched: refine fq_codel memory limits Eric Dumazet
2026-05-12  9:48 ` [PATCH net-next 1/2] net: make is_skb_wmem() available to modules Eric Dumazet
2026-05-12 12:06   ` Toke Høiland-Jørgensen
2026-05-12  9:48 ` [PATCH net-next 2/2] net/sched: fq_codel: local packets no longer count against memory limit Eric Dumazet
2026-05-12 12:11   ` Toke Høiland-Jørgensen
2026-05-12 17:50     ` Eric Dumazet
2026-05-12 18:15       ` Toke Høiland-Jørgensen

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox