public inbox for netdev@vger.kernel.org
 help / color / mirror / Atom feed
* [PATCH net v2 1/2] 8021q: use RCU for egress QoS mappings
@ 2026-04-20  3:18 Ren Wei
  2026-04-20  3:18 ` [PATCH net v2 2/2] 8021q: delete cleared " Ren Wei
  0 siblings, 1 reply; 3+ messages in thread
From: Ren Wei @ 2026-04-20  3:18 UTC (permalink / raw)
  To: netdev, horms, edumazet
  Cc: andrew+netdev, davem, kuba, pabeni, kees, yuantan098, ylong030,
	n05ec, yifanwucs, tomapufckgml, bird

From: Longxuan Yu <ylong030@ucr.edu>

The TX fast path and reporting paths walk egress QoS mappings without
RTNL. Convert the mapping lists to RCU-protected pointers, use RCU
reader annotations in readers, and defer freeing mapping nodes with an
embedded rcu_head.

This prepares the egress QoS mapping code for safe removal of mapping
nodes in a follow-up change while preserving the current behavior.

Co-developed-by: Yuan Tan <yuantan098@gmail.com>
Signed-off-by: Yuan Tan <yuantan098@gmail.com>
Signed-off-by: Longxuan Yu <ylong030@ucr.edu>
Signed-off-by: Ren Wei <n05ec@lzu.edu.cn>
---

changes in v2:
- split the original single patch into RCU preparation and bug-fix patches
- embed an rcu_head in mapping nodes and use kfree_rcu(pm, rcu)
- keep the existing tombstone-skipping check in vlan_fill_info() so this
patch preserves behavior
- use rcu_dereference_rtnl() for netlink dump traversal
- In patch 1, sample pm->vlan_qos once with READ_ONCE() in vlan_fill_info()
  and reuse the local value, so the RCU preparatory change does not mix a
  plain load for the tombstone check with a separate READ_ONCE() for the
  exported value
- v1 Link:
https://lore.kernel.org/all/b877895cd02d35254b5c05d3c40abbf130cd87eb.1776039122.git.ylong030@ucr.edu/

Besides, we still think Fixes should point to 1da177e4c3f4, not
b020cb488586, because the underlying bug already exists in 1da177e4c3f4:
clearing an egress QoS mapping leaves the mapping node in the hash instead
of deleting it. Commit b020cb488586 only added nr_egress_mappings
accounting for that pre-existing behavior; it did not introduce the bug
itself.

 include/linux/if_vlan.h  | 25 ++++++++++++++++---------
 net/8021q/vlan_dev.c     | 31 ++++++++++++++++---------------
 net/8021q/vlan_netlink.c | 10 ++++++----
 net/8021q/vlanproc.c     | 12 ++++++++----
 4 files changed, 46 insertions(+), 32 deletions(-)

diff --git a/include/linux/if_vlan.h b/include/linux/if_vlan.h
index e6272f9c5e42..20cc16ea4e5a 100644
--- a/include/linux/if_vlan.h
+++ b/include/linux/if_vlan.h
@@ -147,11 +147,13 @@ extern __be16 vlan_dev_vlan_proto(const struct net_device *dev);
  *	@priority: skb priority
  *	@vlan_qos: vlan priority: (skb->priority << 13) & 0xE000
  *	@next: pointer to next struct
+ *	@rcu: used for deferred freeing of mapping nodes
  */
 struct vlan_priority_tci_mapping {
 	u32					priority;
 	u16					vlan_qos;
-	struct vlan_priority_tci_mapping	*next;
+	struct vlan_priority_tci_mapping __rcu	*next;
+	struct rcu_head			rcu;
 };
 
 struct proc_dir_entry;
@@ -177,7 +179,7 @@ struct vlan_dev_priv {
 	unsigned int				nr_ingress_mappings;
 	u32					ingress_priority_map[8];
 	unsigned int				nr_egress_mappings;
-	struct vlan_priority_tci_mapping	*egress_priority_map[16];
+	struct vlan_priority_tci_mapping __rcu	*egress_priority_map[16];
 
 	__be16					vlan_proto;
 	u16					vlan_id;
@@ -209,19 +211,24 @@ static inline u16
 vlan_dev_get_egress_qos_mask(struct net_device *dev, u32 skprio)
 {
 	struct vlan_priority_tci_mapping *mp;
+	u16 vlan_qos = 0;
 
-	smp_rmb(); /* coupled with smp_wmb() in vlan_dev_set_egress_priority() */
+	rcu_read_lock();
 
-	mp = vlan_dev_priv(dev)->egress_priority_map[(skprio & 0xF)];
+	mp = rcu_dereference(vlan_dev_priv(dev)->egress_priority_map[skprio & 0xF]);
 	while (mp) {
 		if (mp->priority == skprio) {
-			return mp->vlan_qos; /* This should already be shifted
-					      * to mask correctly with the
-					      * VLAN's TCI */
+			vlan_qos = READ_ONCE(mp->vlan_qos);
+			break;
 		}
-		mp = mp->next;
+		mp = rcu_dereference(mp->next);
 	}
-	return 0;
+	rcu_read_unlock();
+
+	/* This should already be shifted to mask correctly with
+	 * the VLAN's TCI.
+	 */
+	return vlan_qos;
 }
 
 extern bool vlan_do_receive(struct sk_buff **skb);
diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c
index c40f7d5c4fca..a5340932b657 100644
--- a/net/8021q/vlan_dev.c
+++ b/net/8021q/vlan_dev.c
@@ -172,39 +172,34 @@ int vlan_dev_set_egress_priority(const struct net_device *dev,
 				 u32 skb_prio, u16 vlan_prio)
 {
 	struct vlan_dev_priv *vlan = vlan_dev_priv(dev);
-	struct vlan_priority_tci_mapping *mp = NULL;
+	struct vlan_priority_tci_mapping *mp;
 	struct vlan_priority_tci_mapping *np;
+	u32 bucket = skb_prio & 0xF;
 	u32 vlan_qos = (vlan_prio << VLAN_PRIO_SHIFT) & VLAN_PRIO_MASK;
 
 	/* See if a priority mapping exists.. */
-	mp = vlan->egress_priority_map[skb_prio & 0xF];
+	mp = rtnl_dereference(vlan->egress_priority_map[bucket]);
 	while (mp) {
 		if (mp->priority == skb_prio) {
 			if (mp->vlan_qos && !vlan_qos)
 				vlan->nr_egress_mappings--;
 			else if (!mp->vlan_qos && vlan_qos)
 				vlan->nr_egress_mappings++;
-			mp->vlan_qos = vlan_qos;
+			WRITE_ONCE(mp->vlan_qos, vlan_qos);
 			return 0;
 		}
-		mp = mp->next;
+		mp = rtnl_dereference(mp->next);
 	}
 
 	/* Create a new mapping then. */
-	mp = vlan->egress_priority_map[skb_prio & 0xF];
 	np = kmalloc_obj(struct vlan_priority_tci_mapping);
 	if (!np)
 		return -ENOBUFS;
 
-	np->next = mp;
 	np->priority = skb_prio;
 	np->vlan_qos = vlan_qos;
-	/* Before inserting this element in hash table, make sure all its fields
-	 * are committed to memory.
-	 * coupled with smp_rmb() in vlan_dev_get_egress_qos_mask()
-	 */
-	smp_wmb();
-	vlan->egress_priority_map[skb_prio & 0xF] = np;
+	RCU_INIT_POINTER(np->next, rtnl_dereference(vlan->egress_priority_map[bucket]));
+	rcu_assign_pointer(vlan->egress_priority_map[bucket], np);
 	if (vlan_qos)
 		vlan->nr_egress_mappings++;
 	return 0;
@@ -604,11 +599,17 @@ void vlan_dev_free_egress_priority(const struct net_device *dev)
 	int i;
 
 	for (i = 0; i < ARRAY_SIZE(vlan->egress_priority_map); i++) {
-		while ((pm = vlan->egress_priority_map[i]) != NULL) {
-			vlan->egress_priority_map[i] = pm->next;
-			kfree(pm);
+		pm = rtnl_dereference(vlan->egress_priority_map[i]);
+		RCU_INIT_POINTER(vlan->egress_priority_map[i], NULL);
+		while (pm) {
+			struct vlan_priority_tci_mapping *next;
+
+			next = rtnl_dereference(pm->next);
+			kfree_rcu(pm, rcu);
+			pm = next;
 		}
 	}
+	vlan->nr_egress_mappings = 0;
 }
 
 static void vlan_dev_uninit(struct net_device *dev)
diff --git a/net/8021q/vlan_netlink.c b/net/8021q/vlan_netlink.c
index a000b1ef0520..a5b16833e2ce 100644
--- a/net/8021q/vlan_netlink.c
+++ b/net/8021q/vlan_netlink.c
@@ -260,13 +260,15 @@ static int vlan_fill_info(struct sk_buff *skb, const struct net_device *dev)
 			goto nla_put_failure;
 
 		for (i = 0; i < ARRAY_SIZE(vlan->egress_priority_map); i++) {
-			for (pm = vlan->egress_priority_map[i]; pm;
-			     pm = pm->next) {
-				if (!pm->vlan_qos)
+			for (pm = rcu_dereference_rtnl(vlan->egress_priority_map[i]); pm;
+			     pm = rcu_dereference_rtnl(pm->next)) {
+				u16 vlan_qos = READ_ONCE(pm->vlan_qos);
+
+				if (!vlan_qos)
 					continue;
 
 				m.from = pm->priority;
-				m.to   = (pm->vlan_qos >> 13) & 0x7;
+				m.to   = (vlan_qos >> 13) & 0x7;
 				if (nla_put(skb, IFLA_VLAN_QOS_MAPPING,
 					    sizeof(m), &m))
 					goto nla_put_failure;
diff --git a/net/8021q/vlanproc.c b/net/8021q/vlanproc.c
index fa67374bda49..0e424e0895b7 100644
--- a/net/8021q/vlanproc.c
+++ b/net/8021q/vlanproc.c
@@ -262,15 +262,19 @@ static int vlandev_seq_show(struct seq_file *seq, void *offset)
 		   vlan->ingress_priority_map[7]);
 
 	seq_printf(seq, " EGRESS priority mappings: ");
+	rcu_read_lock();
 	for (i = 0; i < 16; i++) {
-		const struct vlan_priority_tci_mapping *mp
-			= vlan->egress_priority_map[i];
+		const struct vlan_priority_tci_mapping *mp =
+			rcu_dereference(vlan->egress_priority_map[i]);
 		while (mp) {
+			u16 vlan_qos = READ_ONCE(mp->vlan_qos);
+
 			seq_printf(seq, "%u:%d ",
-				   mp->priority, ((mp->vlan_qos >> 13) & 0x7));
-			mp = mp->next;
+				   mp->priority, ((vlan_qos >> 13) & 0x7));
+			mp = rcu_dereference(mp->next);
 		}
 	}
+	rcu_read_unlock();
 	seq_puts(seq, "\n");
 
 	return 0;
-- 
2.43.0


^ permalink raw reply related	[flat|nested] 3+ messages in thread

* [PATCH net v2 2/2] 8021q: delete cleared egress QoS mappings
  2026-04-20  3:18 [PATCH net v2 1/2] 8021q: use RCU for egress QoS mappings Ren Wei
@ 2026-04-20  3:18 ` Ren Wei
  2026-04-22 16:20   ` Simon Horman
  0 siblings, 1 reply; 3+ messages in thread
From: Ren Wei @ 2026-04-20  3:18 UTC (permalink / raw)
  To: netdev, horms, edumazet
  Cc: andrew+netdev, davem, kuba, pabeni, kees, yuantan098, ylong030,
	n05ec, yifanwucs, tomapufckgml, bird

From: Longxuan Yu <ylong030@ucr.edu>

vlan_dev_set_egress_priority() currently keeps cleared egress
priority mappings in the hash as tombstones. Repeated set/clear cycles
with distinct skb priorities therefore accumulate mapping nodes until
device teardown and leak memory.

Delete mappings when vlan_prio is cleared instead of keeping tombstones.
Now that the egress mapping lists are RCU protected, the node can be
unlinked safely and freed after a grace period.

Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")
Cc: stable@kernel.org
Reported-by: Yifan Wu <yifanwucs@gmail.com>
Reported-by: Juefei Pu <tomapufckgml@gmail.com>
Reported-by: Xin Liu <bird@lzu.edu.cn>
Co-developed-by: Yuan Tan <yuantan098@gmail.com>
Signed-off-by: Yuan Tan <yuantan098@gmail.com>
Signed-off-by: Longxuan Yu <ylong030@ucr.edu>
Signed-off-by: Ren Wei <n05ec@lzu.edu.cn>
---
 net/8021q/vlan_dev.c     | 20 ++++++++++++++------
 net/8021q/vlan_netlink.c |  4 ----
 2 files changed, 14 insertions(+), 10 deletions(-)

diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c
index a5340932b657..7aa3af8b10ea 100644
--- a/net/8021q/vlan_dev.c
+++ b/net/8021q/vlan_dev.c
@@ -172,26 +172,34 @@ int vlan_dev_set_egress_priority(const struct net_device *dev,
 				 u32 skb_prio, u16 vlan_prio)
 {
 	struct vlan_dev_priv *vlan = vlan_dev_priv(dev);
+	struct vlan_priority_tci_mapping __rcu **mpp;
 	struct vlan_priority_tci_mapping *mp;
 	struct vlan_priority_tci_mapping *np;
 	u32 bucket = skb_prio & 0xF;
 	u32 vlan_qos = (vlan_prio << VLAN_PRIO_SHIFT) & VLAN_PRIO_MASK;
 
 	/* See if a priority mapping exists.. */
-	mp = rtnl_dereference(vlan->egress_priority_map[bucket]);
+	mpp = &vlan->egress_priority_map[bucket];
+	mp = rtnl_dereference(*mpp);
 	while (mp) {
 		if (mp->priority == skb_prio) {
-			if (mp->vlan_qos && !vlan_qos)
+			if (!vlan_qos) {
+				rcu_assign_pointer(*mpp, rtnl_dereference(mp->next));
 				vlan->nr_egress_mappings--;
-			else if (!mp->vlan_qos && vlan_qos)
-				vlan->nr_egress_mappings++;
-			WRITE_ONCE(mp->vlan_qos, vlan_qos);
+				kfree_rcu(mp, rcu);
+			} else {
+				WRITE_ONCE(mp->vlan_qos, vlan_qos);
+			}
 			return 0;
 		}
-		mp = rtnl_dereference(mp->next);
+		mpp = &mp->next;
+		mp = rtnl_dereference(*mpp);
 	}
 
 	/* Create a new mapping then. */
+	if (!vlan_qos)
+		return 0;
+
 	np = kmalloc_obj(struct vlan_priority_tci_mapping);
 	if (!np)
 		return -ENOBUFS;
diff --git a/net/8021q/vlan_netlink.c b/net/8021q/vlan_netlink.c
index a5b16833e2ce..368d53ca7d87 100644
--- a/net/8021q/vlan_netlink.c
+++ b/net/8021q/vlan_netlink.c
@@ -263,10 +263,6 @@ static int vlan_fill_info(struct sk_buff *skb, const struct net_device *dev)
 			for (pm = rcu_dereference_rtnl(vlan->egress_priority_map[i]); pm;
 			     pm = rcu_dereference_rtnl(pm->next)) {
 				u16 vlan_qos = READ_ONCE(pm->vlan_qos);
-
-				if (!vlan_qos)
-					continue;
-
 				m.from = pm->priority;
 				m.to   = (vlan_qos >> 13) & 0x7;
 				if (nla_put(skb, IFLA_VLAN_QOS_MAPPING,
-- 
2.43.0


^ permalink raw reply related	[flat|nested] 3+ messages in thread

* Re: [PATCH net v2 2/2] 8021q: delete cleared egress QoS mappings
  2026-04-20  3:18 ` [PATCH net v2 2/2] 8021q: delete cleared " Ren Wei
@ 2026-04-22 16:20   ` Simon Horman
  0 siblings, 0 replies; 3+ messages in thread
From: Simon Horman @ 2026-04-22 16:20 UTC (permalink / raw)
  To: Ren Wei
  Cc: netdev, edumazet, andrew+netdev, davem, kuba, pabeni, kees,
	yuantan098, ylong030, yifanwucs, tomapufckgml, bird

On Mon, Apr 20, 2026 at 11:18:46AM +0800, Ren Wei wrote:
> From: Longxuan Yu <ylong030@ucr.edu>
> 
> vlan_dev_set_egress_priority() currently keeps cleared egress
> priority mappings in the hash as tombstones. Repeated set/clear cycles
> with distinct skb priorities therefore accumulate mapping nodes until
> device teardown and leak memory.
> 
> Delete mappings when vlan_prio is cleared instead of keeping tombstones.
> Now that the egress mapping lists are RCU protected, the node can be
> unlinked safely and freed after a grace period.
> 
> Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")
> Cc: stable@kernel.org
> Reported-by: Yifan Wu <yifanwucs@gmail.com>
> Reported-by: Juefei Pu <tomapufckgml@gmail.com>
> Reported-by: Xin Liu <bird@lzu.edu.cn>
> Co-developed-by: Yuan Tan <yuantan098@gmail.com>
> Signed-off-by: Yuan Tan <yuantan098@gmail.com>
> Signed-off-by: Longxuan Yu <ylong030@ucr.edu>
> Signed-off-by: Ren Wei <n05ec@lzu.edu.cn>
> ---
>  net/8021q/vlan_dev.c     | 20 ++++++++++++++------
>  net/8021q/vlan_netlink.c |  4 ----
>  2 files changed, 14 insertions(+), 10 deletions(-)
> 
> diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c
> index a5340932b657..7aa3af8b10ea 100644
> --- a/net/8021q/vlan_dev.c
> +++ b/net/8021q/vlan_dev.c
> @@ -172,26 +172,34 @@ int vlan_dev_set_egress_priority(const struct net_device *dev,
>  				 u32 skb_prio, u16 vlan_prio)
>  {
>  	struct vlan_dev_priv *vlan = vlan_dev_priv(dev);
> +	struct vlan_priority_tci_mapping __rcu **mpp;
>  	struct vlan_priority_tci_mapping *mp;
>  	struct vlan_priority_tci_mapping *np;
>  	u32 bucket = skb_prio & 0xF;
>  	u32 vlan_qos = (vlan_prio << VLAN_PRIO_SHIFT) & VLAN_PRIO_MASK;
>  
>  	/* See if a priority mapping exists.. */
> -	mp = rtnl_dereference(vlan->egress_priority_map[bucket]);
> +	mpp = &vlan->egress_priority_map[bucket];
> +	mp = rtnl_dereference(*mpp);
>  	while (mp) {
>  		if (mp->priority == skb_prio) {
> -			if (mp->vlan_qos && !vlan_qos)
> +			if (!vlan_qos) {
> +				rcu_assign_pointer(*mpp, rtnl_dereference(mp->next));
>  				vlan->nr_egress_mappings--;
> -			else if (!mp->vlan_qos && vlan_qos)
> -				vlan->nr_egress_mappings++;
> -			WRITE_ONCE(mp->vlan_qos, vlan_qos);
> +				kfree_rcu(mp, rcu);
> +			} else {
> +				WRITE_ONCE(mp->vlan_qos, vlan_qos);
> +			}
>  			return 0;
>  		}
> -		mp = rtnl_dereference(mp->next);
> +		mpp = &mp->next;
> +		mp = rtnl_dereference(*mpp);
>  	}

Hi Ren,

Thanks for splitting up the patchset, it is very helpful to me.

It seems to me that the mpp/mp construct used is a bit complex and
stems from the use of a hand-rolled list centred the next field of
struct vlan_priority_tci_mapping.

I wonder if things can be simplified by moving to use a standardised
list construct, such as an hlist. And the helpers available for using it.

>  
>  	/* Create a new mapping then. */
> +	if (!vlan_qos)
> +		return 0;
> +
>  	np = kmalloc_obj(struct vlan_priority_tci_mapping);
>  	if (!np)
>  		return -ENOBUFS;

...

^ permalink raw reply	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2026-04-22 16:20 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2026-04-20  3:18 [PATCH net v2 1/2] 8021q: use RCU for egress QoS mappings Ren Wei
2026-04-20  3:18 ` [PATCH net v2 2/2] 8021q: delete cleared " Ren Wei
2026-04-22 16:20   ` Simon Horman

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox