public inbox for netdev@vger.kernel.org
 help / color / mirror / Atom feed
* [PATCH net v4] ipv6: flowlabel: enforce per-netns limit for unprivileged callers
@ 2026-05-01  7:41 Maoyi Xie
  2026-05-01 14:09 ` Willem de Bruijn
  0 siblings, 1 reply; 2+ messages in thread
From: Maoyi Xie @ 2026-05-01  7:41 UTC (permalink / raw)
  To: netdev
  Cc: willemdebruijn.kernel, willemb, edumazet, pabeni, kuba, davem,
	dsahern, kuznet, linux-kernel, stable

fl_size, fl_ht and ip6_fl_lock in net/ipv6/ip6_flowlabel.c are file
scope and shared across netns. mem_check() reads fl_size to decide
whether to deny non-CAP_NET_ADMIN callers; capable() runs against
init_user_ns, so an unprivileged user in any non-init userns can
push fl_size past FL_MAX_SIZE - FL_MAX_SIZE/4 and starve every
other unprivileged userns on the host.

Add struct netns_ipv6::flowlabel_count, bumped and decremented next
to fl_size in fl_intern, ip6_fl_gc and ip6_fl_purge. The new field
is placed in the existing 4-byte hole after ipmr_seq, so struct
netns_ipv6 stays the same size on 64-bit builds.

mem_check() folds an extra FL_MAX_SIZE/8 ceiling into the existing
non-CAP_NET_ADMIN conditional.

Bump FL_MAX_SIZE from 4096 to 8192. It has been 4096 since the file
was added; machines and connection counts have grown. The new
per-netns ceiling is then 1024 flowlabels, half of FL_MAX_SIZE/4.

CAP_NET_ADMIN against init_user_ns still bypasses both caps.

Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")
Suggested-by: Willem de Bruijn <willemb@google.com>
Cc: stable@vger.kernel.org # v5.15+
Signed-off-by: Maoyi Xie <maoyi.xie@ntu.edu.sg>
---
v4 (this submission, addressing v3 review by Willem):
    - rephrased the flowlabel_count placement note: dropped the
      flowlabel_has_excl cacheline argument; replaced with the
      simpler "fills the existing 4-byte hole after ipmr_seq" fact.
    - reordered atomic_dec(&...flowlabel_count) to sit immediately
      after atomic_dec(&fl_size) in ip6_fl_gc and ip6_fl_purge so
      the pairing is visually obvious. Both decs now happen before
      fl_free(fl) since fl_free invalidates fl->fl_net. fl_intern
      was already in this order.
v3: addressed Willem's review on the private security@ thread;
    merged FL_MAX_SIZE doubling, dropped test data, moved
    flowlabel_count near ipmr_seq, inlined fl->fl_net in ip6_fl_gc.
v2: per-netns counter + cap, sent to security@ as a 2-patch series.
v1: fix-shape sketch in original disclosure.

 include/net/netns/ipv6.h |  1 +
 net/ipv6/ip6_flowlabel.c | 14 ++++++++++----
 2 files changed, 11 insertions(+), 4 deletions(-)

diff --git a/include/net/netns/ipv6.h b/include/net/netns/ipv6.h
index 34bdb1308..329482373 100644
--- a/include/net/netns/ipv6.h
+++ b/include/net/netns/ipv6.h
@@ -119,6 +119,7 @@ struct netns_ipv6 {
 	struct fib_notifier_ops	*notifier_ops;
 	struct fib_notifier_ops	*ip6mr_notifier_ops;
 	unsigned int ipmr_seq; /* protected by rtnl_mutex */
+	atomic_t		flowlabel_count;
 	struct {
 		struct hlist_head head;
 		spinlock_t	lock;
diff --git a/net/ipv6/ip6_flowlabel.c b/net/ipv6/ip6_flowlabel.c
index c92f98c6f..360109cad 100644
--- a/net/ipv6/ip6_flowlabel.c
+++ b/net/ipv6/ip6_flowlabel.c
@@ -36,7 +36,7 @@
 /* FL hash table */
 
 #define FL_MAX_PER_SOCK	32
-#define FL_MAX_SIZE	4096
+#define FL_MAX_SIZE	8192
 #define FL_HASH_MASK	255
 #define FL_HASH(l)	(ntohl(l)&FL_HASH_MASK)
 
@@ -162,8 +162,9 @@ static void ip6_fl_gc(struct timer_list *unused)
 				ttd = fl->expires;
 				if (time_after_eq(now, ttd)) {
 					*flp = fl->next;
-					fl_free(fl);
 					atomic_dec(&fl_size);
+					atomic_dec(&fl->fl_net->ipv6.flowlabel_count);
+					fl_free(fl);
 					continue;
 				}
 				if (!sched || time_before(ttd, sched))
@@ -195,8 +196,9 @@ static void __net_exit ip6_fl_purge(struct net *net)
 			if (net_eq(fl->fl_net, net) &&
 			    atomic_read(&fl->users) == 0) {
 				*flp = fl->next;
-				fl_free(fl);
 				atomic_dec(&fl_size);
+				atomic_dec(&net->ipv6.flowlabel_count);
+				fl_free(fl);
 				continue;
 			}
 			flp = &fl->next;
@@ -245,6 +247,7 @@ static struct ip6_flowlabel *fl_intern(struct net *net,
 	fl->next = fl_ht[FL_HASH(fl->label)];
 	rcu_assign_pointer(fl_ht[FL_HASH(fl->label)], fl);
 	atomic_inc(&fl_size);
+	atomic_inc(&net->ipv6.flowlabel_count);
 	spin_unlock_bh(&ip6_fl_lock);
 	rcu_read_unlock();
 	return NULL;
@@ -464,6 +467,7 @@ fl_create(struct net *net, struct sock *sk, struct in6_flowlabel_req *freq,
 
 static int mem_check(struct sock *sk)
 {
+	struct net *net = sock_net(sk);
 	int room = FL_MAX_SIZE - atomic_read(&fl_size);
 	struct ipv6_fl_socklist *sfl;
 	int count = 0;
@@ -478,7 +482,9 @@ static int mem_check(struct sock *sk)
 
 	if (room <= 0 ||
 	    ((count >= FL_MAX_PER_SOCK ||
-	      (count > 0 && room < FL_MAX_SIZE/2) || room < FL_MAX_SIZE/4) &&
+	      (count > 0 && room < FL_MAX_SIZE/2) ||
+	      room < FL_MAX_SIZE/4 ||
+	      atomic_read(&net->ipv6.flowlabel_count) >= FL_MAX_SIZE/8) &&
 	     !capable(CAP_NET_ADMIN)))
 		return -ENOBUFS;
 
-- 
2.34.1


^ permalink raw reply related	[flat|nested] 2+ messages in thread

* Re: [PATCH net v4] ipv6: flowlabel: enforce per-netns limit for unprivileged callers
  2026-05-01  7:41 [PATCH net v4] ipv6: flowlabel: enforce per-netns limit for unprivileged callers Maoyi Xie
@ 2026-05-01 14:09 ` Willem de Bruijn
  0 siblings, 0 replies; 2+ messages in thread
From: Willem de Bruijn @ 2026-05-01 14:09 UTC (permalink / raw)
  To: Maoyi Xie, netdev
  Cc: willemdebruijn.kernel, willemb, edumazet, pabeni, kuba, davem,
	dsahern, kuznet, linux-kernel, stable

Maoyi Xie wrote:
> fl_size, fl_ht and ip6_fl_lock in net/ipv6/ip6_flowlabel.c are file
> scope and shared across netns. mem_check() reads fl_size to decide
> whether to deny non-CAP_NET_ADMIN callers; capable() runs against
> init_user_ns, so an unprivileged user in any non-init userns can
> push fl_size past FL_MAX_SIZE - FL_MAX_SIZE/4 and starve every
> other unprivileged userns on the host.

So previously a single unprivileged user could get 4K - 1K == 3K
entries.

Now it can only get 1K entries even after doubling FL_MAX_SIZE.
The goal of doubling that was to avoid reducing the per-user
limit.

With the expanded limit, unprivileged users collectively can fill 6K
entries. Should the check become that each individual user can only
fill half of this. Keeping the original limit:

const int unpriv_total_limit = FL_MAX_SIZE - (FL_MAX_SIZE / 4);
const int unpriv_user_limit = unpriv_total_limit / 2;

   	if (room <= 0 ||
   	    ((count >= FL_MAX_PER_SOCK ||
  -	      (count > 0 && room < FL_MAX_SIZE/2) || room < FL_MAX_SIZE/4) &&
  +	      (count > 0 && room < FL_MAX_SIZE/2) ||
  +	      room < FL_MAX_SIZE/4 ||
  +	      atomic_read(&net->ipv6.flowlabel_count) >= unpriv_user_limit) &&
   	     !capable(CAP_NET_ADMIN)))

Sorry for not catching this sooner.

> 
> Add struct netns_ipv6::flowlabel_count, bumped and decremented next
> to fl_size in fl_intern, ip6_fl_gc and ip6_fl_purge. The new field
> is placed in the existing 4-byte hole after ipmr_seq, so struct
> netns_ipv6 stays the same size on 64-bit builds.
> 
> mem_check() folds an extra FL_MAX_SIZE/8 ceiling into the existing
> non-CAP_NET_ADMIN conditional.
> 
> Bump FL_MAX_SIZE from 4096 to 8192. It has been 4096 since the file
> was added; machines and connection counts have grown. The new
> per-netns ceiling is then 1024 flowlabels, half of FL_MAX_SIZE/4.
> 
> CAP_NET_ADMIN against init_user_ns still bypasses both caps.
> 
> Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")
> Suggested-by: Willem de Bruijn <willemb@google.com>
> Cc: stable@vger.kernel.org # v5.15+
> Signed-off-by: Maoyi Xie <maoyi.xie@ntu.edu.sg>
> ---
> v4 (this submission, addressing v3 review by Willem):
>     - rephrased the flowlabel_count placement note: dropped the
>       flowlabel_has_excl cacheline argument; replaced with the
>       simpler "fills the existing 4-byte hole after ipmr_seq" fact.
>     - reordered atomic_dec(&...flowlabel_count) to sit immediately
>       after atomic_dec(&fl_size) in ip6_fl_gc and ip6_fl_purge so
>       the pairing is visually obvious. Both decs now happen before
>       fl_free(fl) since fl_free invalidates fl->fl_net. fl_intern
>       was already in this order.
> v3: addressed Willem's review on the private security@ thread;
>     merged FL_MAX_SIZE doubling, dropped test data, moved
>     flowlabel_count near ipmr_seq, inlined fl->fl_net in ip6_fl_gc.
> v2: per-netns counter + cap, sent to security@ as a 2-patch series.
> v1: fix-shape sketch in original disclosure.
> 
>  include/net/netns/ipv6.h |  1 +
>  net/ipv6/ip6_flowlabel.c | 14 ++++++++++----
>  2 files changed, 11 insertions(+), 4 deletions(-)
> 
> diff --git a/include/net/netns/ipv6.h b/include/net/netns/ipv6.h
> index 34bdb1308..329482373 100644
> --- a/include/net/netns/ipv6.h
> +++ b/include/net/netns/ipv6.h
> @@ -119,6 +119,7 @@ struct netns_ipv6 {
>  	struct fib_notifier_ops	*notifier_ops;
>  	struct fib_notifier_ops	*ip6mr_notifier_ops;
>  	unsigned int ipmr_seq; /* protected by rtnl_mutex */
> +	atomic_t		flowlabel_count;
>  	struct {
>  		struct hlist_head head;
>  		spinlock_t	lock;
> diff --git a/net/ipv6/ip6_flowlabel.c b/net/ipv6/ip6_flowlabel.c
> index c92f98c6f..360109cad 100644
> --- a/net/ipv6/ip6_flowlabel.c
> +++ b/net/ipv6/ip6_flowlabel.c
> @@ -36,7 +36,7 @@
>  /* FL hash table */
>  
>  #define FL_MAX_PER_SOCK	32
> -#define FL_MAX_SIZE	4096
> +#define FL_MAX_SIZE	8192
>  #define FL_HASH_MASK	255
>  #define FL_HASH(l)	(ntohl(l)&FL_HASH_MASK)
>  
> @@ -162,8 +162,9 @@ static void ip6_fl_gc(struct timer_list *unused)
>  				ttd = fl->expires;
>  				if (time_after_eq(now, ttd)) {
>  					*flp = fl->next;
> -					fl_free(fl);
>  					atomic_dec(&fl_size);
> +					atomic_dec(&fl->fl_net->ipv6.flowlabel_count);
> +					fl_free(fl);
>  					continue;
>  				}
>  				if (!sched || time_before(ttd, sched))
> @@ -195,8 +196,9 @@ static void __net_exit ip6_fl_purge(struct net *net)
>  			if (net_eq(fl->fl_net, net) &&
>  			    atomic_read(&fl->users) == 0) {
>  				*flp = fl->next;
> -				fl_free(fl);
>  				atomic_dec(&fl_size);
> +				atomic_dec(&net->ipv6.flowlabel_count);
> +				fl_free(fl);
>  				continue;
>  			}
>  			flp = &fl->next;
> @@ -245,6 +247,7 @@ static struct ip6_flowlabel *fl_intern(struct net *net,
>  	fl->next = fl_ht[FL_HASH(fl->label)];
>  	rcu_assign_pointer(fl_ht[FL_HASH(fl->label)], fl);
>  	atomic_inc(&fl_size);
> +	atomic_inc(&net->ipv6.flowlabel_count);
>  	spin_unlock_bh(&ip6_fl_lock);
>  	rcu_read_unlock();
>  	return NULL;
> @@ -464,6 +467,7 @@ fl_create(struct net *net, struct sock *sk, struct in6_flowlabel_req *freq,
>  
>  static int mem_check(struct sock *sk)
>  {
> +	struct net *net = sock_net(sk);
>  	int room = FL_MAX_SIZE - atomic_read(&fl_size);
>  	struct ipv6_fl_socklist *sfl;
>  	int count = 0;
> @@ -478,7 +482,9 @@ static int mem_check(struct sock *sk)
>  
>  	if (room <= 0 ||
>  	    ((count >= FL_MAX_PER_SOCK ||
> -	      (count > 0 && room < FL_MAX_SIZE/2) || room < FL_MAX_SIZE/4) &&
> +	      (count > 0 && room < FL_MAX_SIZE/2) ||
> +	      room < FL_MAX_SIZE/4 ||
> +	      atomic_read(&net->ipv6.flowlabel_count) >= FL_MAX_SIZE/8) &&
>  	     !capable(CAP_NET_ADMIN)))
>  		return -ENOBUFS;
>  
> -- 
> 2.34.1
> 



^ permalink raw reply	[flat|nested] 2+ messages in thread

end of thread, other threads:[~2026-05-01 14:09 UTC | newest]

Thread overview: 2+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2026-05-01  7:41 [PATCH net v4] ipv6: flowlabel: enforce per-netns limit for unprivileged callers Maoyi Xie
2026-05-01 14:09 ` Willem de Bruijn

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox