* [PATCH net v2 1/2] 8021q: use RCU for egress QoS mappings
@ 2026-04-20 3:18 Ren Wei
2026-04-20 3:18 ` [PATCH net v2 2/2] 8021q: delete cleared " Ren Wei
0 siblings, 1 reply; 3+ messages in thread
From: Ren Wei @ 2026-04-20 3:18 UTC (permalink / raw)
To: netdev, horms, edumazet
Cc: andrew+netdev, davem, kuba, pabeni, kees, yuantan098, ylong030,
n05ec, yifanwucs, tomapufckgml, bird
From: Longxuan Yu <ylong030@ucr.edu>
The TX fast path and reporting paths walk egress QoS mappings without
RTNL. Convert the mapping lists to RCU-protected pointers, use RCU
reader annotations in readers, and defer freeing mapping nodes with an
embedded rcu_head.
This prepares the egress QoS mapping code for safe removal of mapping
nodes in a follow-up change while preserving the current behavior.
Co-developed-by: Yuan Tan <yuantan098@gmail.com>
Signed-off-by: Yuan Tan <yuantan098@gmail.com>
Signed-off-by: Longxuan Yu <ylong030@ucr.edu>
Signed-off-by: Ren Wei <n05ec@lzu.edu.cn>
---
changes in v2:
- split the original single patch into RCU preparation and bug-fix patches
- embed an rcu_head in mapping nodes and use kfree_rcu(pm, rcu)
- keep the existing tombstone-skipping check in vlan_fill_info() so this
patch preserves behavior
- use rcu_dereference_rtnl() for netlink dump traversal
- In patch 1, sample pm->vlan_qos once with READ_ONCE() in vlan_fill_info()
and reuse the local value, so the RCU preparatory change does not mix a
plain load for the tombstone check with a separate READ_ONCE() for the
exported value
- v1 Link:
https://lore.kernel.org/all/b877895cd02d35254b5c05d3c40abbf130cd87eb.1776039122.git.ylong030@ucr.edu/
Besides, we still think Fixes should point to 1da177e4c3f4, not
b020cb488586, because the underlying bug already exists in 1da177e4c3f4:
clearing an egress QoS mapping leaves the mapping node in the hash instead
of deleting it. Commit b020cb488586 only added nr_egress_mappings
accounting for that pre-existing behavior; it did not introduce the bug
itself.
include/linux/if_vlan.h | 25 ++++++++++++++++---------
net/8021q/vlan_dev.c | 31 ++++++++++++++++---------------
net/8021q/vlan_netlink.c | 10 ++++++----
net/8021q/vlanproc.c | 12 ++++++++----
4 files changed, 46 insertions(+), 32 deletions(-)
diff --git a/include/linux/if_vlan.h b/include/linux/if_vlan.h
index e6272f9c5e42..20cc16ea4e5a 100644
--- a/include/linux/if_vlan.h
+++ b/include/linux/if_vlan.h
@@ -147,11 +147,13 @@ extern __be16 vlan_dev_vlan_proto(const struct net_device *dev);
* @priority: skb priority
* @vlan_qos: vlan priority: (skb->priority << 13) & 0xE000
* @next: pointer to next struct
+ * @rcu: used for deferred freeing of mapping nodes
*/
struct vlan_priority_tci_mapping {
u32 priority;
u16 vlan_qos;
- struct vlan_priority_tci_mapping *next;
+ struct vlan_priority_tci_mapping __rcu *next;
+ struct rcu_head rcu;
};
struct proc_dir_entry;
@@ -177,7 +179,7 @@ struct vlan_dev_priv {
unsigned int nr_ingress_mappings;
u32 ingress_priority_map[8];
unsigned int nr_egress_mappings;
- struct vlan_priority_tci_mapping *egress_priority_map[16];
+ struct vlan_priority_tci_mapping __rcu *egress_priority_map[16];
__be16 vlan_proto;
u16 vlan_id;
@@ -209,19 +211,24 @@ static inline u16
vlan_dev_get_egress_qos_mask(struct net_device *dev, u32 skprio)
{
struct vlan_priority_tci_mapping *mp;
+ u16 vlan_qos = 0;
- smp_rmb(); /* coupled with smp_wmb() in vlan_dev_set_egress_priority() */
+ rcu_read_lock();
- mp = vlan_dev_priv(dev)->egress_priority_map[(skprio & 0xF)];
+ mp = rcu_dereference(vlan_dev_priv(dev)->egress_priority_map[skprio & 0xF]);
while (mp) {
if (mp->priority == skprio) {
- return mp->vlan_qos; /* This should already be shifted
- * to mask correctly with the
- * VLAN's TCI */
+ vlan_qos = READ_ONCE(mp->vlan_qos);
+ break;
}
- mp = mp->next;
+ mp = rcu_dereference(mp->next);
}
- return 0;
+ rcu_read_unlock();
+
+ /* This should already be shifted to mask correctly with
+ * the VLAN's TCI.
+ */
+ return vlan_qos;
}
extern bool vlan_do_receive(struct sk_buff **skb);
diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c
index c40f7d5c4fca..a5340932b657 100644
--- a/net/8021q/vlan_dev.c
+++ b/net/8021q/vlan_dev.c
@@ -172,39 +172,34 @@ int vlan_dev_set_egress_priority(const struct net_device *dev,
u32 skb_prio, u16 vlan_prio)
{
struct vlan_dev_priv *vlan = vlan_dev_priv(dev);
- struct vlan_priority_tci_mapping *mp = NULL;
+ struct vlan_priority_tci_mapping *mp;
struct vlan_priority_tci_mapping *np;
+ u32 bucket = skb_prio & 0xF;
u32 vlan_qos = (vlan_prio << VLAN_PRIO_SHIFT) & VLAN_PRIO_MASK;
/* See if a priority mapping exists.. */
- mp = vlan->egress_priority_map[skb_prio & 0xF];
+ mp = rtnl_dereference(vlan->egress_priority_map[bucket]);
while (mp) {
if (mp->priority == skb_prio) {
if (mp->vlan_qos && !vlan_qos)
vlan->nr_egress_mappings--;
else if (!mp->vlan_qos && vlan_qos)
vlan->nr_egress_mappings++;
- mp->vlan_qos = vlan_qos;
+ WRITE_ONCE(mp->vlan_qos, vlan_qos);
return 0;
}
- mp = mp->next;
+ mp = rtnl_dereference(mp->next);
}
/* Create a new mapping then. */
- mp = vlan->egress_priority_map[skb_prio & 0xF];
np = kmalloc_obj(struct vlan_priority_tci_mapping);
if (!np)
return -ENOBUFS;
- np->next = mp;
np->priority = skb_prio;
np->vlan_qos = vlan_qos;
- /* Before inserting this element in hash table, make sure all its fields
- * are committed to memory.
- * coupled with smp_rmb() in vlan_dev_get_egress_qos_mask()
- */
- smp_wmb();
- vlan->egress_priority_map[skb_prio & 0xF] = np;
+ RCU_INIT_POINTER(np->next, rtnl_dereference(vlan->egress_priority_map[bucket]));
+ rcu_assign_pointer(vlan->egress_priority_map[bucket], np);
if (vlan_qos)
vlan->nr_egress_mappings++;
return 0;
@@ -604,11 +599,17 @@ void vlan_dev_free_egress_priority(const struct net_device *dev)
int i;
for (i = 0; i < ARRAY_SIZE(vlan->egress_priority_map); i++) {
- while ((pm = vlan->egress_priority_map[i]) != NULL) {
- vlan->egress_priority_map[i] = pm->next;
- kfree(pm);
+ pm = rtnl_dereference(vlan->egress_priority_map[i]);
+ RCU_INIT_POINTER(vlan->egress_priority_map[i], NULL);
+ while (pm) {
+ struct vlan_priority_tci_mapping *next;
+
+ next = rtnl_dereference(pm->next);
+ kfree_rcu(pm, rcu);
+ pm = next;
}
}
+ vlan->nr_egress_mappings = 0;
}
static void vlan_dev_uninit(struct net_device *dev)
diff --git a/net/8021q/vlan_netlink.c b/net/8021q/vlan_netlink.c
index a000b1ef0520..a5b16833e2ce 100644
--- a/net/8021q/vlan_netlink.c
+++ b/net/8021q/vlan_netlink.c
@@ -260,13 +260,15 @@ static int vlan_fill_info(struct sk_buff *skb, const struct net_device *dev)
goto nla_put_failure;
for (i = 0; i < ARRAY_SIZE(vlan->egress_priority_map); i++) {
- for (pm = vlan->egress_priority_map[i]; pm;
- pm = pm->next) {
- if (!pm->vlan_qos)
+ for (pm = rcu_dereference_rtnl(vlan->egress_priority_map[i]); pm;
+ pm = rcu_dereference_rtnl(pm->next)) {
+ u16 vlan_qos = READ_ONCE(pm->vlan_qos);
+
+ if (!vlan_qos)
continue;
m.from = pm->priority;
- m.to = (pm->vlan_qos >> 13) & 0x7;
+ m.to = (vlan_qos >> 13) & 0x7;
if (nla_put(skb, IFLA_VLAN_QOS_MAPPING,
sizeof(m), &m))
goto nla_put_failure;
diff --git a/net/8021q/vlanproc.c b/net/8021q/vlanproc.c
index fa67374bda49..0e424e0895b7 100644
--- a/net/8021q/vlanproc.c
+++ b/net/8021q/vlanproc.c
@@ -262,15 +262,19 @@ static int vlandev_seq_show(struct seq_file *seq, void *offset)
vlan->ingress_priority_map[7]);
seq_printf(seq, " EGRESS priority mappings: ");
+ rcu_read_lock();
for (i = 0; i < 16; i++) {
- const struct vlan_priority_tci_mapping *mp
- = vlan->egress_priority_map[i];
+ const struct vlan_priority_tci_mapping *mp =
+ rcu_dereference(vlan->egress_priority_map[i]);
while (mp) {
+ u16 vlan_qos = READ_ONCE(mp->vlan_qos);
+
seq_printf(seq, "%u:%d ",
- mp->priority, ((mp->vlan_qos >> 13) & 0x7));
- mp = mp->next;
+ mp->priority, ((vlan_qos >> 13) & 0x7));
+ mp = rcu_dereference(mp->next);
}
}
+ rcu_read_unlock();
seq_puts(seq, "\n");
return 0;
--
2.43.0
^ permalink raw reply related [flat|nested] 3+ messages in thread* [PATCH net v2 2/2] 8021q: delete cleared egress QoS mappings
2026-04-20 3:18 [PATCH net v2 1/2] 8021q: use RCU for egress QoS mappings Ren Wei
@ 2026-04-20 3:18 ` Ren Wei
2026-04-22 16:20 ` Simon Horman
0 siblings, 1 reply; 3+ messages in thread
From: Ren Wei @ 2026-04-20 3:18 UTC (permalink / raw)
To: netdev, horms, edumazet
Cc: andrew+netdev, davem, kuba, pabeni, kees, yuantan098, ylong030,
n05ec, yifanwucs, tomapufckgml, bird
From: Longxuan Yu <ylong030@ucr.edu>
vlan_dev_set_egress_priority() currently keeps cleared egress
priority mappings in the hash as tombstones. Repeated set/clear cycles
with distinct skb priorities therefore accumulate mapping nodes until
device teardown and leak memory.
Delete mappings when vlan_prio is cleared instead of keeping tombstones.
Now that the egress mapping lists are RCU protected, the node can be
unlinked safely and freed after a grace period.
Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")
Cc: stable@kernel.org
Reported-by: Yifan Wu <yifanwucs@gmail.com>
Reported-by: Juefei Pu <tomapufckgml@gmail.com>
Reported-by: Xin Liu <bird@lzu.edu.cn>
Co-developed-by: Yuan Tan <yuantan098@gmail.com>
Signed-off-by: Yuan Tan <yuantan098@gmail.com>
Signed-off-by: Longxuan Yu <ylong030@ucr.edu>
Signed-off-by: Ren Wei <n05ec@lzu.edu.cn>
---
net/8021q/vlan_dev.c | 20 ++++++++++++++------
net/8021q/vlan_netlink.c | 4 ----
2 files changed, 14 insertions(+), 10 deletions(-)
diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c
index a5340932b657..7aa3af8b10ea 100644
--- a/net/8021q/vlan_dev.c
+++ b/net/8021q/vlan_dev.c
@@ -172,26 +172,34 @@ int vlan_dev_set_egress_priority(const struct net_device *dev,
u32 skb_prio, u16 vlan_prio)
{
struct vlan_dev_priv *vlan = vlan_dev_priv(dev);
+ struct vlan_priority_tci_mapping __rcu **mpp;
struct vlan_priority_tci_mapping *mp;
struct vlan_priority_tci_mapping *np;
u32 bucket = skb_prio & 0xF;
u32 vlan_qos = (vlan_prio << VLAN_PRIO_SHIFT) & VLAN_PRIO_MASK;
/* See if a priority mapping exists.. */
- mp = rtnl_dereference(vlan->egress_priority_map[bucket]);
+ mpp = &vlan->egress_priority_map[bucket];
+ mp = rtnl_dereference(*mpp);
while (mp) {
if (mp->priority == skb_prio) {
- if (mp->vlan_qos && !vlan_qos)
+ if (!vlan_qos) {
+ rcu_assign_pointer(*mpp, rtnl_dereference(mp->next));
vlan->nr_egress_mappings--;
- else if (!mp->vlan_qos && vlan_qos)
- vlan->nr_egress_mappings++;
- WRITE_ONCE(mp->vlan_qos, vlan_qos);
+ kfree_rcu(mp, rcu);
+ } else {
+ WRITE_ONCE(mp->vlan_qos, vlan_qos);
+ }
return 0;
}
- mp = rtnl_dereference(mp->next);
+ mpp = &mp->next;
+ mp = rtnl_dereference(*mpp);
}
/* Create a new mapping then. */
+ if (!vlan_qos)
+ return 0;
+
np = kmalloc_obj(struct vlan_priority_tci_mapping);
if (!np)
return -ENOBUFS;
diff --git a/net/8021q/vlan_netlink.c b/net/8021q/vlan_netlink.c
index a5b16833e2ce..368d53ca7d87 100644
--- a/net/8021q/vlan_netlink.c
+++ b/net/8021q/vlan_netlink.c
@@ -263,10 +263,6 @@ static int vlan_fill_info(struct sk_buff *skb, const struct net_device *dev)
for (pm = rcu_dereference_rtnl(vlan->egress_priority_map[i]); pm;
pm = rcu_dereference_rtnl(pm->next)) {
u16 vlan_qos = READ_ONCE(pm->vlan_qos);
-
- if (!vlan_qos)
- continue;
-
m.from = pm->priority;
m.to = (vlan_qos >> 13) & 0x7;
if (nla_put(skb, IFLA_VLAN_QOS_MAPPING,
--
2.43.0
^ permalink raw reply related [flat|nested] 3+ messages in thread* Re: [PATCH net v2 2/2] 8021q: delete cleared egress QoS mappings
2026-04-20 3:18 ` [PATCH net v2 2/2] 8021q: delete cleared " Ren Wei
@ 2026-04-22 16:20 ` Simon Horman
0 siblings, 0 replies; 3+ messages in thread
From: Simon Horman @ 2026-04-22 16:20 UTC (permalink / raw)
To: Ren Wei
Cc: netdev, edumazet, andrew+netdev, davem, kuba, pabeni, kees,
yuantan098, ylong030, yifanwucs, tomapufckgml, bird
On Mon, Apr 20, 2026 at 11:18:46AM +0800, Ren Wei wrote:
> From: Longxuan Yu <ylong030@ucr.edu>
>
> vlan_dev_set_egress_priority() currently keeps cleared egress
> priority mappings in the hash as tombstones. Repeated set/clear cycles
> with distinct skb priorities therefore accumulate mapping nodes until
> device teardown and leak memory.
>
> Delete mappings when vlan_prio is cleared instead of keeping tombstones.
> Now that the egress mapping lists are RCU protected, the node can be
> unlinked safely and freed after a grace period.
>
> Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")
> Cc: stable@kernel.org
> Reported-by: Yifan Wu <yifanwucs@gmail.com>
> Reported-by: Juefei Pu <tomapufckgml@gmail.com>
> Reported-by: Xin Liu <bird@lzu.edu.cn>
> Co-developed-by: Yuan Tan <yuantan098@gmail.com>
> Signed-off-by: Yuan Tan <yuantan098@gmail.com>
> Signed-off-by: Longxuan Yu <ylong030@ucr.edu>
> Signed-off-by: Ren Wei <n05ec@lzu.edu.cn>
> ---
> net/8021q/vlan_dev.c | 20 ++++++++++++++------
> net/8021q/vlan_netlink.c | 4 ----
> 2 files changed, 14 insertions(+), 10 deletions(-)
>
> diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c
> index a5340932b657..7aa3af8b10ea 100644
> --- a/net/8021q/vlan_dev.c
> +++ b/net/8021q/vlan_dev.c
> @@ -172,26 +172,34 @@ int vlan_dev_set_egress_priority(const struct net_device *dev,
> u32 skb_prio, u16 vlan_prio)
> {
> struct vlan_dev_priv *vlan = vlan_dev_priv(dev);
> + struct vlan_priority_tci_mapping __rcu **mpp;
> struct vlan_priority_tci_mapping *mp;
> struct vlan_priority_tci_mapping *np;
> u32 bucket = skb_prio & 0xF;
> u32 vlan_qos = (vlan_prio << VLAN_PRIO_SHIFT) & VLAN_PRIO_MASK;
>
> /* See if a priority mapping exists.. */
> - mp = rtnl_dereference(vlan->egress_priority_map[bucket]);
> + mpp = &vlan->egress_priority_map[bucket];
> + mp = rtnl_dereference(*mpp);
> while (mp) {
> if (mp->priority == skb_prio) {
> - if (mp->vlan_qos && !vlan_qos)
> + if (!vlan_qos) {
> + rcu_assign_pointer(*mpp, rtnl_dereference(mp->next));
> vlan->nr_egress_mappings--;
> - else if (!mp->vlan_qos && vlan_qos)
> - vlan->nr_egress_mappings++;
> - WRITE_ONCE(mp->vlan_qos, vlan_qos);
> + kfree_rcu(mp, rcu);
> + } else {
> + WRITE_ONCE(mp->vlan_qos, vlan_qos);
> + }
> return 0;
> }
> - mp = rtnl_dereference(mp->next);
> + mpp = &mp->next;
> + mp = rtnl_dereference(*mpp);
> }
Hi Ren,
Thanks for splitting up the patchset, it is very helpful to me.
It seems to me that the mpp/mp construct used is a bit complex and
stems from the use of a hand-rolled list centred the next field of
struct vlan_priority_tci_mapping.
I wonder if things can be simplified by moving to use a standardised
list construct, such as an hlist. And the helpers available for using it.
>
> /* Create a new mapping then. */
> + if (!vlan_qos)
> + return 0;
> +
> np = kmalloc_obj(struct vlan_priority_tci_mapping);
> if (!np)
> return -ENOBUFS;
...
^ permalink raw reply [flat|nested] 3+ messages in thread
end of thread, other threads:[~2026-04-22 16:20 UTC | newest]
Thread overview: 3+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2026-04-20 3:18 [PATCH net v2 1/2] 8021q: use RCU for egress QoS mappings Ren Wei
2026-04-20 3:18 ` [PATCH net v2 2/2] 8021q: delete cleared " Ren Wei
2026-04-22 16:20 ` Simon Horman
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox