Netdev List

Netdev List
 help / color / mirror / Atom feed

* [PATCH net-next v2 03/14] net: macb: unify queue index variable naming convention and types
From: Théo Lebrun @ 2026-04-10 19:51 UTC (permalink / raw)
  To: Nicolas Ferre, Claudiu Beznea, Andrew Lunn, David S. Miller,
	Eric Dumazet, Jakub Kicinski, Paolo Abeni, Richard Cochran,
	Russell King
  Cc: Paolo Valerio, Conor Dooley, Nicolai Buchwitz,
	Vladimir Kondratiev, Gregory CLEMENT, Benoît Monin,
	Tawfik Bayouk, Thomas Petazzoni, Maxime Chevallier, netdev,
	linux-kernel, Théo Lebrun
In-Reply-To: <20260410-macb-context-v2-0-af39f71d40b6@bootlin.com>

Variables are named q or queue_index. Types are int, unsigned int, u32
and u16. Use `unsigned int q` everywhere.

Skip over taprio functions. They use `u8 queue_id` which fits with the
`struct macb_queue_enst_config` field. Using `queue_id` everywhere
would be too verbose.

Signed-off-by: Théo Lebrun <theo.lebrun@bootlin.com>
---
 drivers/net/ethernet/cadence/macb_main.c | 30 +++++++++++++++---------------
 1 file changed, 15 insertions(+), 15 deletions(-)

diff --git a/drivers/net/ethernet/cadence/macb_main.c b/drivers/net/ethernet/cadence/macb_main.c
index a8a7df615d25..b0e70f6ce305 100644
--- a/drivers/net/ethernet/cadence/macb_main.c
+++ b/drivers/net/ethernet/cadence/macb_main.c
@@ -877,7 +877,7 @@ static void gem_shuffle_tx_one_ring(struct macb_queue *queue)
 static void gem_shuffle_tx_rings(struct macb *bp)
 {
 	struct macb_queue *queue;
-	int q;
+	unsigned int q;
 
 	for (q = 0, queue = bp->queues; q < bp->num_queues; q++, queue++)
 		gem_shuffle_tx_one_ring(queue);
@@ -1258,7 +1258,7 @@ static void macb_tx_error_task(struct work_struct *work)
 						      tx_error_task);
 	bool			halt_timeout = false;
 	struct macb		*bp = queue->bp;
-	u32			queue_index;
+	unsigned int		q;
 	u32			packets = 0;
 	u32			bytes = 0;
 	struct macb_tx_skb	*tx_skb;
@@ -1267,9 +1267,9 @@ static void macb_tx_error_task(struct work_struct *work)
 	unsigned int		tail;
 	unsigned long		flags;
 
-	queue_index = queue - bp->queues;
+	q = queue - bp->queues;
 	netdev_vdbg(bp->netdev, "macb_tx_error_task: q = %u, t = %u, h = %u\n",
-		    queue_index, queue->tx_tail, queue->tx_head);
+		    q, queue->tx_tail, queue->tx_head);
 
 	/* Prevent the queue NAPI TX poll from running, as it calls
 	 * macb_tx_complete(), which in turn may call netif_wake_subqueue().
@@ -1342,7 +1342,7 @@ static void macb_tx_error_task(struct work_struct *work)
 		macb_tx_unmap(bp, tx_skb, 0);
 	}
 
-	netdev_tx_completed_queue(netdev_get_tx_queue(bp->netdev, queue_index),
+	netdev_tx_completed_queue(netdev_get_tx_queue(bp->netdev, q),
 				  packets, bytes);
 
 	/* Set end of TX queue */
@@ -1407,7 +1407,7 @@ static bool ptp_one_step_sync(struct sk_buff *skb)
 static int macb_tx_complete(struct macb_queue *queue, int budget)
 {
 	struct macb *bp = queue->bp;
-	u16 queue_index = queue - bp->queues;
+	unsigned int q = queue - bp->queues;
 	unsigned long flags;
 	unsigned int tail;
 	unsigned int head;
@@ -1469,14 +1469,14 @@ static int macb_tx_complete(struct macb_queue *queue, int budget)
 		}
 	}
 
-	netdev_tx_completed_queue(netdev_get_tx_queue(bp->netdev, queue_index),
+	netdev_tx_completed_queue(netdev_get_tx_queue(bp->netdev, q),
 				  packets, bytes);
 
 	queue->tx_tail = tail;
-	if (__netif_subqueue_stopped(bp->netdev, queue_index) &&
+	if (__netif_subqueue_stopped(bp->netdev, q) &&
 	    CIRC_CNT(queue->tx_head, queue->tx_tail,
 		     bp->tx_ring_size) <= MACB_TX_WAKEUP_THRESH(bp))
-		netif_wake_subqueue(bp->netdev, queue_index);
+		netif_wake_subqueue(bp->netdev, q);
 	spin_unlock_irqrestore(&queue->tx_ptr_lock, flags);
 
 	if (packets)
@@ -2470,10 +2470,10 @@ static int macb_pad_and_fcs(struct sk_buff **skb, struct net_device *netdev)
 static netdev_tx_t macb_start_xmit(struct sk_buff *skb,
 				   struct net_device *netdev)
 {
-	u16 queue_index = skb_get_queue_mapping(skb);
 	struct macb *bp = netdev_priv(netdev);
-	struct macb_queue *queue = &bp->queues[queue_index];
+	unsigned int q = skb_get_queue_mapping(skb);
 	unsigned int desc_cnt, nr_frags, frag_size, f;
+	struct macb_queue *queue = &bp->queues[q];
 	unsigned int hdrlen;
 	unsigned long flags;
 	bool is_lso;
@@ -2513,7 +2513,7 @@ static netdev_tx_t macb_start_xmit(struct sk_buff *skb,
 #if defined(DEBUG) && defined(VERBOSE_DEBUG)
 	netdev_vdbg(bp->netdev,
 		    "start_xmit: queue %hu len %u head %p data %p tail %p end %p\n",
-		    queue_index, skb->len, skb->head, skb->data,
+		    q, skb->len, skb->head, skb->data,
 		    skb_tail_pointer(skb), skb_end_pointer(skb));
 	print_hex_dump(KERN_DEBUG, "data: ", DUMP_PREFIX_OFFSET, 16, 1,
 		       skb->data, 16, true);
@@ -2539,7 +2539,7 @@ static netdev_tx_t macb_start_xmit(struct sk_buff *skb,
 	/* This is a hard error, log it. */
 	if (CIRC_SPACE(queue->tx_head, queue->tx_tail,
 		       bp->tx_ring_size) < desc_cnt) {
-		netif_stop_subqueue(netdev, queue_index);
+		netif_stop_subqueue(netdev, q);
 		netdev_dbg(netdev, "tx_head = %u, tx_tail = %u\n",
 			   queue->tx_head, queue->tx_tail);
 		ret = NETDEV_TX_BUSY;
@@ -2555,7 +2555,7 @@ static netdev_tx_t macb_start_xmit(struct sk_buff *skb,
 	/* Make newly initialized descriptor visible to hardware */
 	wmb();
 	skb_tx_timestamp(skb);
-	netdev_tx_sent_queue(netdev_get_tx_queue(bp->netdev, queue_index),
+	netdev_tx_sent_queue(netdev_get_tx_queue(bp->netdev, q),
 			     skb->len);
 
 	spin_lock(&bp->lock);
@@ -2564,7 +2564,7 @@ static netdev_tx_t macb_start_xmit(struct sk_buff *skb,
 	spin_unlock(&bp->lock);
 
 	if (CIRC_SPACE(queue->tx_head, queue->tx_tail, bp->tx_ring_size) < 1)
-		netif_stop_subqueue(netdev, queue_index);
+		netif_stop_subqueue(netdev, q);
 
 unlock:
 	spin_unlock_irqrestore(&queue->tx_ptr_lock, flags);

-- 
2.53.0


^ permalink raw reply related

* [PATCH net-next v2 02/14] net: macb: unify `struct macb *` naming convention
From: Théo Lebrun @ 2026-04-10 19:51 UTC (permalink / raw)
  To: Nicolas Ferre, Claudiu Beznea, Andrew Lunn, David S. Miller,
	Eric Dumazet, Jakub Kicinski, Paolo Abeni, Richard Cochran,
	Russell King
  Cc: Paolo Valerio, Conor Dooley, Nicolai Buchwitz,
	Vladimir Kondratiev, Gregory CLEMENT, Benoît Monin,
	Tawfik Bayouk, Thomas Petazzoni, Maxime Chevallier, netdev,
	linux-kernel, Théo Lebrun
In-Reply-To: <20260410-macb-context-v2-0-af39f71d40b6@bootlin.com>

For historical reason, MACB has both:

   struct macb *bp;
   struct macb *lp; // used in at91ether functions

Use only the former.

Signed-off-by: Théo Lebrun <theo.lebrun@bootlin.com>
---
 drivers/net/ethernet/cadence/macb_main.c | 176 ++++++++++++++++---------------
 1 file changed, 91 insertions(+), 85 deletions(-)

diff --git a/drivers/net/ethernet/cadence/macb_main.c b/drivers/net/ethernet/cadence/macb_main.c
index 896d481e0f95..a8a7df615d25 100644
--- a/drivers/net/ethernet/cadence/macb_main.c
+++ b/drivers/net/ethernet/cadence/macb_main.c
@@ -4938,71 +4938,72 @@ static const struct macb_usrio_config at91_default_usrio = {
 
 static struct sifive_fu540_macb_mgmt *mgmt;
 
-static int at91ether_alloc_coherent(struct macb *lp)
+static int at91ether_alloc_coherent(struct macb *bp)
 {
-	struct macb_queue *q = &lp->queues[0];
+	struct macb_queue *queue = &bp->queues[0];
 
-	q->rx_ring = dma_alloc_coherent(&lp->pdev->dev,
-					 (AT91ETHER_MAX_RX_DESCR *
-					  macb_dma_desc_get_size(lp)),
-					 &q->rx_ring_dma, GFP_KERNEL);
-	if (!q->rx_ring)
+	queue->rx_ring = dma_alloc_coherent(&bp->pdev->dev,
+					    (AT91ETHER_MAX_RX_DESCR *
+					     macb_dma_desc_get_size(bp)),
+					    &queue->rx_ring_dma, GFP_KERNEL);
+	if (!queue->rx_ring)
 		return -ENOMEM;
 
-	q->rx_buffers = dma_alloc_coherent(&lp->pdev->dev,
-					    AT91ETHER_MAX_RX_DESCR *
-					    AT91ETHER_MAX_RBUFF_SZ,
-					    &q->rx_buffers_dma, GFP_KERNEL);
-	if (!q->rx_buffers) {
-		dma_free_coherent(&lp->pdev->dev,
+	queue->rx_buffers = dma_alloc_coherent(&bp->pdev->dev,
+					       AT91ETHER_MAX_RX_DESCR *
+					       AT91ETHER_MAX_RBUFF_SZ,
+					       &queue->rx_buffers_dma,
+					       GFP_KERNEL);
+	if (!queue->rx_buffers) {
+		dma_free_coherent(&bp->pdev->dev,
 				  AT91ETHER_MAX_RX_DESCR *
-				  macb_dma_desc_get_size(lp),
-				  q->rx_ring, q->rx_ring_dma);
-		q->rx_ring = NULL;
+				  macb_dma_desc_get_size(bp),
+				  queue->rx_ring, queue->rx_ring_dma);
+		queue->rx_ring = NULL;
 		return -ENOMEM;
 	}
 
 	return 0;
 }
 
-static void at91ether_free_coherent(struct macb *lp)
+static void at91ether_free_coherent(struct macb *bp)
 {
-	struct macb_queue *q = &lp->queues[0];
+	struct macb_queue *queue = &bp->queues[0];
 
-	if (q->rx_ring) {
-		dma_free_coherent(&lp->pdev->dev,
+	if (queue->rx_ring) {
+		dma_free_coherent(&bp->pdev->dev,
 				  AT91ETHER_MAX_RX_DESCR *
-				  macb_dma_desc_get_size(lp),
-				  q->rx_ring, q->rx_ring_dma);
-		q->rx_ring = NULL;
+				  macb_dma_desc_get_size(bp),
+				  queue->rx_ring, queue->rx_ring_dma);
+		queue->rx_ring = NULL;
 	}
 
-	if (q->rx_buffers) {
-		dma_free_coherent(&lp->pdev->dev,
+	if (queue->rx_buffers) {
+		dma_free_coherent(&bp->pdev->dev,
 				  AT91ETHER_MAX_RX_DESCR *
 				  AT91ETHER_MAX_RBUFF_SZ,
-				  q->rx_buffers, q->rx_buffers_dma);
-		q->rx_buffers = NULL;
+				  queue->rx_buffers, queue->rx_buffers_dma);
+		queue->rx_buffers = NULL;
 	}
 }
 
 /* Initialize and start the Receiver and Transmit subsystems */
-static int at91ether_start(struct macb *lp)
+static int at91ether_start(struct macb *bp)
 {
-	struct macb_queue *q = &lp->queues[0];
+	struct macb_queue *queue = &bp->queues[0];
 	struct macb_dma_desc *desc;
 	dma_addr_t addr;
 	u32 ctl;
 	int i, ret;
 
-	ret = at91ether_alloc_coherent(lp);
+	ret = at91ether_alloc_coherent(bp);
 	if (ret)
 		return ret;
 
-	addr = q->rx_buffers_dma;
+	addr = queue->rx_buffers_dma;
 	for (i = 0; i < AT91ETHER_MAX_RX_DESCR; i++) {
-		desc = macb_rx_desc(q, i);
-		macb_set_addr(lp, desc, addr);
+		desc = macb_rx_desc(queue, i);
+		macb_set_addr(bp, desc, addr);
 		desc->ctrl = 0;
 		addr += AT91ETHER_MAX_RBUFF_SZ;
 	}
@@ -5011,17 +5012,17 @@ static int at91ether_start(struct macb *lp)
 	desc->addr |= MACB_BIT(RX_WRAP);
 
 	/* Reset buffer index */
-	q->rx_tail = 0;
+	queue->rx_tail = 0;
 
 	/* Program address of descriptor list in Rx Buffer Queue register */
-	macb_writel(lp, RBQP, q->rx_ring_dma);
+	macb_writel(bp, RBQP, queue->rx_ring_dma);
 
 	/* Enable Receive and Transmit */
-	ctl = macb_readl(lp, NCR);
-	macb_writel(lp, NCR, ctl | MACB_BIT(RE) | MACB_BIT(TE));
+	ctl = macb_readl(bp, NCR);
+	macb_writel(bp, NCR, ctl | MACB_BIT(RE) | MACB_BIT(TE));
 
 	/* Enable MAC interrupts */
-	macb_writel(lp, IER, MACB_BIT(RCOMP)	|
+	macb_writel(bp, IER, MACB_BIT(RCOMP)	|
 			     MACB_BIT(RXUBR)	|
 			     MACB_BIT(ISR_TUND)	|
 			     MACB_BIT(ISR_RLE)	|
@@ -5032,12 +5033,12 @@ static int at91ether_start(struct macb *lp)
 	return 0;
 }
 
-static void at91ether_stop(struct macb *lp)
+static void at91ether_stop(struct macb *bp)
 {
 	u32 ctl;
 
 	/* Disable MAC interrupts */
-	macb_writel(lp, IDR, MACB_BIT(RCOMP)	|
+	macb_writel(bp, IDR, MACB_BIT(RCOMP)	|
 			     MACB_BIT(RXUBR)	|
 			     MACB_BIT(ISR_TUND)	|
 			     MACB_BIT(ISR_RLE)	|
@@ -5046,35 +5047,35 @@ static void at91ether_stop(struct macb *lp)
 			     MACB_BIT(HRESP));
 
 	/* Disable Receiver and Transmitter */
-	ctl = macb_readl(lp, NCR);
-	macb_writel(lp, NCR, ctl & ~(MACB_BIT(TE) | MACB_BIT(RE)));
+	ctl = macb_readl(bp, NCR);
+	macb_writel(bp, NCR, ctl & ~(MACB_BIT(TE) | MACB_BIT(RE)));
 
 	/* Free resources. */
-	at91ether_free_coherent(lp);
+	at91ether_free_coherent(bp);
 }
 
 /* Open the ethernet interface */
 static int at91ether_open(struct net_device *netdev)
 {
-	struct macb *lp = netdev_priv(netdev);
+	struct macb *bp = netdev_priv(netdev);
 	u32 ctl;
 	int ret;
 
-	ret = pm_runtime_resume_and_get(&lp->pdev->dev);
+	ret = pm_runtime_resume_and_get(&bp->pdev->dev);
 	if (ret < 0)
 		return ret;
 
 	/* Clear internal statistics */
-	ctl = macb_readl(lp, NCR);
-	macb_writel(lp, NCR, ctl | MACB_BIT(CLRSTAT));
+	ctl = macb_readl(bp, NCR);
+	macb_writel(bp, NCR, ctl | MACB_BIT(CLRSTAT));
 
-	macb_set_hwaddr(lp);
+	macb_set_hwaddr(bp);
 
-	ret = at91ether_start(lp);
+	ret = at91ether_start(bp);
 	if (ret)
 		goto pm_exit;
 
-	ret = macb_phylink_connect(lp);
+	ret = macb_phylink_connect(bp);
 	if (ret)
 		goto stop;
 
@@ -5083,25 +5084,25 @@ static int at91ether_open(struct net_device *netdev)
 	return 0;
 
 stop:
-	at91ether_stop(lp);
+	at91ether_stop(bp);
 pm_exit:
-	pm_runtime_put_sync(&lp->pdev->dev);
+	pm_runtime_put_sync(&bp->pdev->dev);
 	return ret;
 }
 
 /* Close the interface */
 static int at91ether_close(struct net_device *netdev)
 {
-	struct macb *lp = netdev_priv(netdev);
+	struct macb *bp = netdev_priv(netdev);
 
 	netif_stop_queue(netdev);
 
-	phylink_stop(lp->phylink);
-	phylink_disconnect_phy(lp->phylink);
+	phylink_stop(bp->phylink);
+	phylink_disconnect_phy(bp->phylink);
 
-	at91ether_stop(lp);
+	at91ether_stop(bp);
 
-	pm_runtime_put(&lp->pdev->dev);
+	pm_runtime_put(&bp->pdev->dev);
 
 	return 0;
 }
@@ -5110,19 +5111,21 @@ static int at91ether_close(struct net_device *netdev)
 static netdev_tx_t at91ether_start_xmit(struct sk_buff *skb,
 					struct net_device *netdev)
 {
-	struct macb *lp = netdev_priv(netdev);
+	struct macb *bp = netdev_priv(netdev);
+	struct device *dev = &bp->pdev->dev;
 
-	if (macb_readl(lp, TSR) & MACB_BIT(RM9200_BNQ)) {
+	if (macb_readl(bp, TSR) & MACB_BIT(RM9200_BNQ)) {
 		int desc = 0;
 
 		netif_stop_queue(netdev);
 
 		/* Store packet information (to free when Tx completed) */
-		lp->rm9200_txq[desc].skb = skb;
-		lp->rm9200_txq[desc].size = skb->len;
-		lp->rm9200_txq[desc].mapping = dma_map_single(&lp->pdev->dev, skb->data,
-							      skb->len, DMA_TO_DEVICE);
-		if (dma_mapping_error(&lp->pdev->dev, lp->rm9200_txq[desc].mapping)) {
+		bp->rm9200_txq[desc].skb = skb;
+		bp->rm9200_txq[desc].size = skb->len;
+		bp->rm9200_txq[desc].mapping = dma_map_single(dev, skb->data,
+							      skb->len,
+							      DMA_TO_DEVICE);
+		if (dma_mapping_error(dev, bp->rm9200_txq[desc].mapping)) {
 			dev_kfree_skb_any(skb);
 			netdev->stats.tx_dropped++;
 			netdev_err(netdev, "%s: DMA mapping error\n", __func__);
@@ -5130,9 +5133,9 @@ static netdev_tx_t at91ether_start_xmit(struct sk_buff *skb,
 		}
 
 		/* Set address of the data in the Transmit Address register */
-		macb_writel(lp, TAR, lp->rm9200_txq[desc].mapping);
+		macb_writel(bp, TAR, bp->rm9200_txq[desc].mapping);
 		/* Set length of the packet in the Transmit Control register */
-		macb_writel(lp, TCR, skb->len);
+		macb_writel(bp, TCR, skb->len);
 
 	} else {
 		netdev_err(netdev, "%s called, but device is busy!\n",
@@ -5148,16 +5151,17 @@ static netdev_tx_t at91ether_start_xmit(struct sk_buff *skb,
  */
 static void at91ether_rx(struct net_device *netdev)
 {
-	struct macb *lp = netdev_priv(netdev);
-	struct macb_queue *q = &lp->queues[0];
+	struct macb *bp = netdev_priv(netdev);
+	struct macb_queue *queue = &bp->queues[0];
 	struct macb_dma_desc *desc;
 	unsigned char *p_recv;
 	struct sk_buff *skb;
 	unsigned int pktlen;
 
-	desc = macb_rx_desc(q, q->rx_tail);
+	desc = macb_rx_desc(queue, queue->rx_tail);
 	while (desc->addr & MACB_BIT(RX_USED)) {
-		p_recv = q->rx_buffers + q->rx_tail * AT91ETHER_MAX_RBUFF_SZ;
+		p_recv = queue->rx_buffers +
+			 queue->rx_tail * AT91ETHER_MAX_RBUFF_SZ;
 		pktlen = MACB_BF(RX_FRMLEN, desc->ctrl);
 		skb = netdev_alloc_skb(netdev, pktlen + 2);
 		if (skb) {
@@ -5179,12 +5183,12 @@ static void at91ether_rx(struct net_device *netdev)
 		desc->addr &= ~MACB_BIT(RX_USED);
 
 		/* wrap after last buffer */
-		if (q->rx_tail == AT91ETHER_MAX_RX_DESCR - 1)
-			q->rx_tail = 0;
+		if (queue->rx_tail == AT91ETHER_MAX_RX_DESCR - 1)
+			queue->rx_tail = 0;
 		else
-			q->rx_tail++;
+			queue->rx_tail++;
 
-		desc = macb_rx_desc(q, q->rx_tail);
+		desc = macb_rx_desc(queue, queue->rx_tail);
 	}
 }
 
@@ -5192,14 +5196,14 @@ static void at91ether_rx(struct net_device *netdev)
 static irqreturn_t at91ether_interrupt(int irq, void *dev_id)
 {
 	struct net_device *netdev = dev_id;
-	struct macb *lp = netdev_priv(netdev);
+	struct macb *bp = netdev_priv(netdev);
 	u32 intstatus, ctl;
 	unsigned int desc;
 
 	/* MAC Interrupt Status register indicates what interrupts are pending.
 	 * It is automatically cleared once read.
 	 */
-	intstatus = macb_readl(lp, ISR);
+	intstatus = macb_readl(bp, ISR);
 
 	/* Receive complete */
 	if (intstatus & MACB_BIT(RCOMP))
@@ -5212,23 +5216,25 @@ static irqreturn_t at91ether_interrupt(int irq, void *dev_id)
 			netdev->stats.tx_errors++;
 
 		desc = 0;
-		if (lp->rm9200_txq[desc].skb) {
-			dev_consume_skb_irq(lp->rm9200_txq[desc].skb);
-			lp->rm9200_txq[desc].skb = NULL;
-			dma_unmap_single(&lp->pdev->dev, lp->rm9200_txq[desc].mapping,
-					 lp->rm9200_txq[desc].size, DMA_TO_DEVICE);
+		if (bp->rm9200_txq[desc].skb) {
+			dev_consume_skb_irq(bp->rm9200_txq[desc].skb);
+			bp->rm9200_txq[desc].skb = NULL;
+			dma_unmap_single(&bp->pdev->dev,
+					 bp->rm9200_txq[desc].mapping,
+					 bp->rm9200_txq[desc].size,
+					 DMA_TO_DEVICE);
 			netdev->stats.tx_packets++;
-			netdev->stats.tx_bytes += lp->rm9200_txq[desc].size;
+			netdev->stats.tx_bytes += bp->rm9200_txq[desc].size;
 		}
 		netif_wake_queue(netdev);
 	}
 
 	/* Work-around for EMAC Errata section 41.3.1 */
 	if (intstatus & MACB_BIT(RXUBR)) {
-		ctl = macb_readl(lp, NCR);
-		macb_writel(lp, NCR, ctl & ~MACB_BIT(RE));
+		ctl = macb_readl(bp, NCR);
+		macb_writel(bp, NCR, ctl & ~MACB_BIT(RE));
 		wmb();
-		macb_writel(lp, NCR, ctl | MACB_BIT(RE));
+		macb_writel(bp, NCR, ctl | MACB_BIT(RE));
 	}
 
 	if (intstatus & MACB_BIT(ISR_ROVR))

-- 
2.53.0


^ permalink raw reply related

* [PATCH net-next v2 01/14] net: macb: unify device pointer naming convention
From: Théo Lebrun @ 2026-04-10 19:51 UTC (permalink / raw)
  To: Nicolas Ferre, Claudiu Beznea, Andrew Lunn, David S. Miller,
	Eric Dumazet, Jakub Kicinski, Paolo Abeni, Richard Cochran,
	Russell King
  Cc: Paolo Valerio, Conor Dooley, Nicolai Buchwitz,
	Vladimir Kondratiev, Gregory CLEMENT, Benoît Monin,
	Tawfik Bayouk, Thomas Petazzoni, Maxime Chevallier, netdev,
	linux-kernel, Théo Lebrun
In-Reply-To: <20260410-macb-context-v2-0-af39f71d40b6@bootlin.com>

Here are all device pointer variable permutations inside MACB:

   struct device *dev;
   struct net_device *dev;
   struct net_device *ndev;
   struct net_device *netdev;
   struct pci_dev *pdev;              // inside macb_pci.c
   struct platform_device *pdev;
   struct platform_device *plat_dev;  // inside macb_pci.c

Unify to this convention:

   struct device *dev;
   struct net_device *netdev;
   struct pci_dev *pci;
   struct platform_device *pdev;

Ensure nothing slipped through using ctags tooling:

⟩ ctags -o - --kinds-c='{local}{member}{parameter}' \
    --fields='{typeref}' drivers/net/ethernet/cadence/* | \
  awk -F"\t" '
    $NF~/struct:.*(device|dev) / {print $NF, $1}' | \
  sort -u
typeref:struct:device * dev
typeref:struct:in_device * idev        // ignored
typeref:struct:net_device * netdev
typeref:struct:pci_dev * pci
typeref:struct:phy_device * phy        // ignored
typeref:struct:phy_device * phydev     // ignored
typeref:struct:platform_device * pdev

Signed-off-by: Théo Lebrun <theo.lebrun@bootlin.com>
---
 drivers/net/ethernet/cadence/macb.h      |  20 +-
 drivers/net/ethernet/cadence/macb_main.c | 632 ++++++++++++++++---------------
 drivers/net/ethernet/cadence/macb_pci.c  |  46 +--
 drivers/net/ethernet/cadence/macb_ptp.c  |  18 +-
 4 files changed, 359 insertions(+), 357 deletions(-)

diff --git a/drivers/net/ethernet/cadence/macb.h b/drivers/net/ethernet/cadence/macb.h
index 2de56017ee0d..9857df5b57f0 100644
--- a/drivers/net/ethernet/cadence/macb.h
+++ b/drivers/net/ethernet/cadence/macb.h
@@ -1207,11 +1207,11 @@ struct macb_or_gem_ops {
 
 /* MACB-PTP interface: adapt to platform needs. */
 struct macb_ptp_info {
-	void (*ptp_init)(struct net_device *ndev);
-	void (*ptp_remove)(struct net_device *ndev);
+	void (*ptp_init)(struct net_device *netdev);
+	void (*ptp_remove)(struct net_device *netdev);
 	s32 (*get_ptp_max_adj)(void);
 	unsigned int (*get_tsu_rate)(struct macb *bp);
-	int (*get_ts_info)(struct net_device *dev,
+	int (*get_ts_info)(struct net_device *netdev,
 			   struct kernel_ethtool_ts_info *info);
 	int (*get_hwtst)(struct net_device *netdev,
 			 struct kernel_hwtstamp_config *tstamp_config);
@@ -1326,7 +1326,7 @@ struct macb {
 	struct clk		*tx_clk;
 	struct clk		*rx_clk;
 	struct clk		*tsu_clk;
-	struct net_device	*dev;
+	struct net_device	*netdev;
 	/* Protects hw_stats and ethtool_stats */
 	spinlock_t		stats_lock;
 	union {
@@ -1406,8 +1406,8 @@ enum macb_bd_control {
 	TSTAMP_ALL_FRAMES,
 };
 
-void gem_ptp_init(struct net_device *ndev);
-void gem_ptp_remove(struct net_device *ndev);
+void gem_ptp_init(struct net_device *netdev);
+void gem_ptp_remove(struct net_device *netdev);
 void gem_ptp_txstamp(struct macb *bp, struct sk_buff *skb, struct macb_dma_desc *desc);
 void gem_ptp_rxstamp(struct macb *bp, struct sk_buff *skb, struct macb_dma_desc *desc);
 static inline void gem_ptp_do_txstamp(struct macb *bp, struct sk_buff *skb, struct macb_dma_desc *desc)
@@ -1426,14 +1426,14 @@ static inline void gem_ptp_do_rxstamp(struct macb *bp, struct sk_buff *skb, stru
 	gem_ptp_rxstamp(bp, skb, desc);
 }
 
-int gem_get_hwtst(struct net_device *dev,
+int gem_get_hwtst(struct net_device *netdev,
 		  struct kernel_hwtstamp_config *tstamp_config);
-int gem_set_hwtst(struct net_device *dev,
+int gem_set_hwtst(struct net_device *netdev,
 		  struct kernel_hwtstamp_config *tstamp_config,
 		  struct netlink_ext_ack *extack);
 #else
-static inline void gem_ptp_init(struct net_device *ndev) { }
-static inline void gem_ptp_remove(struct net_device *ndev) { }
+static inline void gem_ptp_init(struct net_device *netdev) { }
+static inline void gem_ptp_remove(struct net_device *netdev) { }
 
 static inline void gem_ptp_do_txstamp(struct macb *bp, struct sk_buff *skb, struct macb_dma_desc *desc) { }
 static inline void gem_ptp_do_rxstamp(struct macb *bp, struct sk_buff *skb, struct macb_dma_desc *desc) { }
diff --git a/drivers/net/ethernet/cadence/macb_main.c b/drivers/net/ethernet/cadence/macb_main.c
index d9716c56f705..896d481e0f95 100644
--- a/drivers/net/ethernet/cadence/macb_main.c
+++ b/drivers/net/ethernet/cadence/macb_main.c
@@ -252,9 +252,9 @@ static void macb_set_hwaddr(struct macb *bp)
 	u32 bottom;
 	u16 top;
 
-	bottom = get_unaligned_le32(bp->dev->dev_addr);
+	bottom = get_unaligned_le32(bp->netdev->dev_addr);
 	macb_or_gem_writel(bp, SA1B, bottom);
-	top = get_unaligned_le16(bp->dev->dev_addr + 4);
+	top = get_unaligned_le16(bp->netdev->dev_addr + 4);
 	macb_or_gem_writel(bp, SA1T, top);
 
 	if (gem_has_ptp(bp)) {
@@ -291,13 +291,13 @@ static void macb_get_hwaddr(struct macb *bp)
 		addr[5] = (top >> 8) & 0xff;
 
 		if (is_valid_ether_addr(addr)) {
-			eth_hw_addr_set(bp->dev, addr);
+			eth_hw_addr_set(bp->netdev, addr);
 			return;
 		}
 	}
 
 	dev_info(&bp->pdev->dev, "invalid hw address, using random\n");
-	eth_hw_addr_random(bp->dev);
+	eth_hw_addr_random(bp->netdev);
 }
 
 static int macb_mdio_wait_for_idle(struct macb *bp)
@@ -509,12 +509,12 @@ static void macb_set_tx_clk(struct macb *bp, int speed)
 	ferr = abs(rate_rounded - rate);
 	ferr = DIV_ROUND_UP(ferr, rate / 100000);
 	if (ferr > 5)
-		netdev_warn(bp->dev,
+		netdev_warn(bp->netdev,
 			    "unable to generate target frequency: %ld Hz\n",
 			    rate);
 
 	if (clk_set_rate(bp->tx_clk, rate_rounded))
-		netdev_err(bp->dev, "adjusting tx_clk failed.\n");
+		netdev_err(bp->netdev, "adjusting tx_clk failed.\n");
 }
 
 static void macb_usx_pcs_link_up(struct phylink_pcs *pcs, unsigned int neg_mode,
@@ -697,8 +697,8 @@ static void macb_tx_lpi_wake(struct macb *bp)
 
 static void macb_mac_disable_tx_lpi(struct phylink_config *config)
 {
-	struct net_device *ndev = to_net_dev(config->dev);
-	struct macb *bp = netdev_priv(ndev);
+	struct net_device *netdev = to_net_dev(config->dev);
+	struct macb *bp = netdev_priv(netdev);
 	unsigned long flags;
 
 	cancel_delayed_work_sync(&bp->tx_lpi_work);
@@ -712,8 +712,8 @@ static void macb_mac_disable_tx_lpi(struct phylink_config *config)
 static int macb_mac_enable_tx_lpi(struct phylink_config *config, u32 timer,
 				  bool tx_clk_stop)
 {
-	struct net_device *ndev = to_net_dev(config->dev);
-	struct macb *bp = netdev_priv(ndev);
+	struct net_device *netdev = to_net_dev(config->dev);
+	struct macb *bp = netdev_priv(netdev);
 	unsigned long flags;
 
 	spin_lock_irqsave(&bp->lock, flags);
@@ -732,8 +732,8 @@ static int macb_mac_enable_tx_lpi(struct phylink_config *config, u32 timer,
 static void macb_mac_config(struct phylink_config *config, unsigned int mode,
 			    const struct phylink_link_state *state)
 {
-	struct net_device *ndev = to_net_dev(config->dev);
-	struct macb *bp = netdev_priv(ndev);
+	struct net_device *netdev = to_net_dev(config->dev);
+	struct macb *bp = netdev_priv(netdev);
 	unsigned long flags;
 	u32 old_ctrl, ctrl;
 	u32 old_ncr, ncr;
@@ -774,8 +774,8 @@ static void macb_mac_config(struct phylink_config *config, unsigned int mode,
 static void macb_mac_link_down(struct phylink_config *config, unsigned int mode,
 			       phy_interface_t interface)
 {
-	struct net_device *ndev = to_net_dev(config->dev);
-	struct macb *bp = netdev_priv(ndev);
+	struct net_device *netdev = to_net_dev(config->dev);
+	struct macb *bp = netdev_priv(netdev);
 	struct macb_queue *queue;
 	unsigned int q;
 	u32 ctrl;
@@ -789,7 +789,7 @@ static void macb_mac_link_down(struct phylink_config *config, unsigned int mode,
 	ctrl = macb_readl(bp, NCR) & ~(MACB_BIT(RE) | MACB_BIT(TE));
 	macb_writel(bp, NCR, ctrl);
 
-	netif_tx_stop_all_queues(ndev);
+	netif_tx_stop_all_queues(netdev);
 }
 
 /* Use juggling algorithm to left rotate tx ring and tx skb array */
@@ -884,13 +884,13 @@ static void gem_shuffle_tx_rings(struct macb *bp)
 }
 
 static void macb_mac_link_up(struct phylink_config *config,
-			     struct phy_device *phy,
+			     struct phy_device *phydev,
 			     unsigned int mode, phy_interface_t interface,
 			     int speed, int duplex,
 			     bool tx_pause, bool rx_pause)
 {
-	struct net_device *ndev = to_net_dev(config->dev);
-	struct macb *bp = netdev_priv(ndev);
+	struct net_device *netdev = to_net_dev(config->dev);
+	struct macb *bp = netdev_priv(netdev);
 	struct macb_queue *queue;
 	unsigned long flags;
 	unsigned int q;
@@ -946,14 +946,14 @@ static void macb_mac_link_up(struct phylink_config *config,
 
 	macb_writel(bp, NCR, ctrl | MACB_BIT(RE) | MACB_BIT(TE));
 
-	netif_tx_wake_all_queues(ndev);
+	netif_tx_wake_all_queues(netdev);
 }
 
 static struct phylink_pcs *macb_mac_select_pcs(struct phylink_config *config,
 					       phy_interface_t interface)
 {
-	struct net_device *ndev = to_net_dev(config->dev);
-	struct macb *bp = netdev_priv(ndev);
+	struct net_device *netdev = to_net_dev(config->dev);
+	struct macb *bp = netdev_priv(netdev);
 
 	if (interface == PHY_INTERFACE_MODE_10GBASER)
 		return &bp->phylink_usx_pcs;
@@ -982,7 +982,7 @@ static bool macb_phy_handle_exists(struct device_node *dn)
 static int macb_phylink_connect(struct macb *bp)
 {
 	struct device_node *dn = bp->pdev->dev.of_node;
-	struct net_device *dev = bp->dev;
+	struct net_device *netdev = bp->netdev;
 	struct phy_device *phydev;
 	int ret;
 
@@ -992,7 +992,7 @@ static int macb_phylink_connect(struct macb *bp)
 	if (!dn || (ret && !macb_phy_handle_exists(dn))) {
 		phydev = phy_find_first(bp->mii_bus);
 		if (!phydev) {
-			netdev_err(dev, "no PHY found\n");
+			netdev_err(netdev, "no PHY found\n");
 			return -ENXIO;
 		}
 
@@ -1001,7 +1001,7 @@ static int macb_phylink_connect(struct macb *bp)
 	}
 
 	if (ret) {
-		netdev_err(dev, "Could not attach PHY (%d)\n", ret);
+		netdev_err(netdev, "Could not attach PHY (%d)\n", ret);
 		return ret;
 	}
 
@@ -1013,21 +1013,21 @@ static int macb_phylink_connect(struct macb *bp)
 static void macb_get_pcs_fixed_state(struct phylink_config *config,
 				     struct phylink_link_state *state)
 {
-	struct net_device *ndev = to_net_dev(config->dev);
-	struct macb *bp = netdev_priv(ndev);
+	struct net_device *netdev = to_net_dev(config->dev);
+	struct macb *bp = netdev_priv(netdev);
 
 	state->link = (macb_readl(bp, NSR) & MACB_BIT(NSR_LINK)) != 0;
 }
 
 /* based on au1000_eth. c*/
-static int macb_mii_probe(struct net_device *dev)
+static int macb_mii_probe(struct net_device *netdev)
 {
-	struct macb *bp = netdev_priv(dev);
+	struct macb *bp = netdev_priv(netdev);
 
 	bp->phylink_sgmii_pcs.ops = &macb_phylink_pcs_ops;
 	bp->phylink_usx_pcs.ops = &macb_phylink_usx_pcs_ops;
 
-	bp->phylink_config.dev = &dev->dev;
+	bp->phylink_config.dev = &netdev->dev;
 	bp->phylink_config.type = PHYLINK_NETDEV;
 	bp->phylink_config.mac_managed_pm = true;
 
@@ -1086,7 +1086,7 @@ static int macb_mii_probe(struct net_device *dev)
 	bp->phylink = phylink_create(&bp->phylink_config, bp->pdev->dev.fwnode,
 				     bp->phy_interface, &macb_phylink_ops);
 	if (IS_ERR(bp->phylink)) {
-		netdev_err(dev, "Could not create a phylink instance (%ld)\n",
+		netdev_err(netdev, "Could not create a phylink instance (%ld)\n",
 			   PTR_ERR(bp->phylink));
 		return PTR_ERR(bp->phylink);
 	}
@@ -1133,7 +1133,7 @@ static int macb_mii_init(struct macb *bp)
 	 */
 	mdio_np = of_get_child_by_name(np, "mdio");
 	if (!mdio_np && of_phy_is_fixed_link(np))
-		return macb_mii_probe(bp->dev);
+		return macb_mii_probe(bp->netdev);
 
 	/* Enable management port */
 	macb_writel(bp, NCR, MACB_BIT(MPE));
@@ -1154,13 +1154,13 @@ static int macb_mii_init(struct macb *bp)
 	bp->mii_bus->priv = bp;
 	bp->mii_bus->parent = &bp->pdev->dev;
 
-	dev_set_drvdata(&bp->dev->dev, bp->mii_bus);
+	dev_set_drvdata(&bp->netdev->dev, bp->mii_bus);
 
 	err = macb_mdiobus_register(bp, mdio_np);
 	if (err)
 		goto err_out_free_mdiobus;
 
-	err = macb_mii_probe(bp->dev);
+	err = macb_mii_probe(bp->netdev);
 	if (err)
 		goto err_out_unregister_bus;
 
@@ -1268,7 +1268,7 @@ static void macb_tx_error_task(struct work_struct *work)
 	unsigned long		flags;
 
 	queue_index = queue - bp->queues;
-	netdev_vdbg(bp->dev, "macb_tx_error_task: q = %u, t = %u, h = %u\n",
+	netdev_vdbg(bp->netdev, "macb_tx_error_task: q = %u, t = %u, h = %u\n",
 		    queue_index, queue->tx_tail, queue->tx_head);
 
 	/* Prevent the queue NAPI TX poll from running, as it calls
@@ -1281,14 +1281,14 @@ static void macb_tx_error_task(struct work_struct *work)
 	spin_lock_irqsave(&bp->lock, flags);
 
 	/* Make sure nobody is trying to queue up new packets */
-	netif_tx_stop_all_queues(bp->dev);
+	netif_tx_stop_all_queues(bp->netdev);
 
 	/* Stop transmission now
 	 * (in case we have just queued new packets)
 	 * macb/gem must be halted to write TBQP register
 	 */
 	if (macb_halt_tx(bp)) {
-		netdev_err(bp->dev, "BUG: halt tx timed out\n");
+		netdev_err(bp->netdev, "BUG: halt tx timed out\n");
 		macb_writel(bp, NCR, macb_readl(bp, NCR) & (~MACB_BIT(TE)));
 		halt_timeout = true;
 	}
@@ -1317,13 +1317,13 @@ static void macb_tx_error_task(struct work_struct *work)
 			 * since it's the only one written back by the hardware
 			 */
 			if (!(ctrl & MACB_BIT(TX_BUF_EXHAUSTED))) {
-				netdev_vdbg(bp->dev, "txerr skb %u (data %p) TX complete\n",
+				netdev_vdbg(bp->netdev, "txerr skb %u (data %p) TX complete\n",
 					    macb_tx_ring_wrap(bp, tail),
 					    skb->data);
-				bp->dev->stats.tx_packets++;
+				bp->netdev->stats.tx_packets++;
 				queue->stats.tx_packets++;
 				packets++;
-				bp->dev->stats.tx_bytes += skb->len;
+				bp->netdev->stats.tx_bytes += skb->len;
 				queue->stats.tx_bytes += skb->len;
 				bytes += skb->len;
 			}
@@ -1333,7 +1333,7 @@ static void macb_tx_error_task(struct work_struct *work)
 			 * those. Statistics are updated by hardware.
 			 */
 			if (ctrl & MACB_BIT(TX_BUF_EXHAUSTED))
-				netdev_err(bp->dev,
+				netdev_err(bp->netdev,
 					   "BUG: TX buffers exhausted mid-frame\n");
 
 			desc->ctrl = ctrl | MACB_BIT(TX_USED);
@@ -1342,7 +1342,7 @@ static void macb_tx_error_task(struct work_struct *work)
 		macb_tx_unmap(bp, tx_skb, 0);
 	}
 
-	netdev_tx_completed_queue(netdev_get_tx_queue(bp->dev, queue_index),
+	netdev_tx_completed_queue(netdev_get_tx_queue(bp->netdev, queue_index),
 				  packets, bytes);
 
 	/* Set end of TX queue */
@@ -1367,7 +1367,7 @@ static void macb_tx_error_task(struct work_struct *work)
 		macb_writel(bp, NCR, macb_readl(bp, NCR) | MACB_BIT(TE));
 
 	/* Now we are ready to start transmission again */
-	netif_tx_start_all_queues(bp->dev);
+	netif_tx_start_all_queues(bp->netdev);
 	macb_writel(bp, NCR, macb_readl(bp, NCR) | MACB_BIT(TSTART));
 
 	spin_unlock_irqrestore(&bp->lock, flags);
@@ -1446,12 +1446,12 @@ static int macb_tx_complete(struct macb_queue *queue, int budget)
 				    !ptp_one_step_sync(skb))
 					gem_ptp_do_txstamp(bp, skb, desc);
 
-				netdev_vdbg(bp->dev, "skb %u (data %p) TX complete\n",
+				netdev_vdbg(bp->netdev, "skb %u (data %p) TX complete\n",
 					    macb_tx_ring_wrap(bp, tail),
 					    skb->data);
-				bp->dev->stats.tx_packets++;
+				bp->netdev->stats.tx_packets++;
 				queue->stats.tx_packets++;
-				bp->dev->stats.tx_bytes += skb->len;
+				bp->netdev->stats.tx_bytes += skb->len;
 				queue->stats.tx_bytes += skb->len;
 				packets++;
 				bytes += skb->len;
@@ -1469,14 +1469,14 @@ static int macb_tx_complete(struct macb_queue *queue, int budget)
 		}
 	}
 
-	netdev_tx_completed_queue(netdev_get_tx_queue(bp->dev, queue_index),
+	netdev_tx_completed_queue(netdev_get_tx_queue(bp->netdev, queue_index),
 				  packets, bytes);
 
 	queue->tx_tail = tail;
-	if (__netif_subqueue_stopped(bp->dev, queue_index) &&
+	if (__netif_subqueue_stopped(bp->netdev, queue_index) &&
 	    CIRC_CNT(queue->tx_head, queue->tx_tail,
 		     bp->tx_ring_size) <= MACB_TX_WAKEUP_THRESH(bp))
-		netif_wake_subqueue(bp->dev, queue_index);
+		netif_wake_subqueue(bp->netdev, queue_index);
 	spin_unlock_irqrestore(&queue->tx_ptr_lock, flags);
 
 	if (packets)
@@ -1504,9 +1504,9 @@ static void gem_rx_refill(struct macb_queue *queue)
 
 		if (!queue->rx_skbuff[entry]) {
 			/* allocate sk_buff for this free entry in ring */
-			skb = netdev_alloc_skb(bp->dev, bp->rx_buffer_size);
+			skb = netdev_alloc_skb(bp->netdev, bp->rx_buffer_size);
 			if (unlikely(!skb)) {
-				netdev_err(bp->dev,
+				netdev_err(bp->netdev,
 					   "Unable to allocate sk_buff\n");
 				break;
 			}
@@ -1555,8 +1555,8 @@ static void gem_rx_refill(struct macb_queue *queue)
 	/* Make descriptor updates visible to hardware */
 	wmb();
 
-	netdev_vdbg(bp->dev, "rx ring: queue: %p, prepared head %d, tail %d\n",
-			queue, queue->rx_prepared_head, queue->rx_tail);
+	netdev_vdbg(bp->netdev, "rx ring: queue: %p, prepared head %d, tail %d\n",
+		    queue, queue->rx_prepared_head, queue->rx_tail);
 }
 
 /* Mark DMA descriptors from begin up to and not including end as unused */
@@ -1616,17 +1616,17 @@ static int gem_rx(struct macb_queue *queue, struct napi_struct *napi,
 		count++;
 
 		if (!(ctrl & MACB_BIT(RX_SOF) && ctrl & MACB_BIT(RX_EOF))) {
-			netdev_err(bp->dev,
+			netdev_err(bp->netdev,
 				   "not whole frame pointed by descriptor\n");
-			bp->dev->stats.rx_dropped++;
+			bp->netdev->stats.rx_dropped++;
 			queue->stats.rx_dropped++;
 			break;
 		}
 		skb = queue->rx_skbuff[entry];
 		if (unlikely(!skb)) {
-			netdev_err(bp->dev,
+			netdev_err(bp->netdev,
 				   "inconsistent Rx descriptor chain\n");
-			bp->dev->stats.rx_dropped++;
+			bp->netdev->stats.rx_dropped++;
 			queue->stats.rx_dropped++;
 			break;
 		}
@@ -1634,28 +1634,28 @@ static int gem_rx(struct macb_queue *queue, struct napi_struct *napi,
 		queue->rx_skbuff[entry] = NULL;
 		len = ctrl & bp->rx_frm_len_mask;
 
-		netdev_vdbg(bp->dev, "gem_rx %u (len %u)\n", entry, len);
+		netdev_vdbg(bp->netdev, "gem_rx %u (len %u)\n", entry, len);
 
 		skb_put(skb, len);
 		dma_unmap_single(&bp->pdev->dev, addr,
 				 bp->rx_buffer_size, DMA_FROM_DEVICE);
 
-		skb->protocol = eth_type_trans(skb, bp->dev);
+		skb->protocol = eth_type_trans(skb, bp->netdev);
 		skb_checksum_none_assert(skb);
-		if (bp->dev->features & NETIF_F_RXCSUM &&
-		    !(bp->dev->flags & IFF_PROMISC) &&
+		if (bp->netdev->features & NETIF_F_RXCSUM &&
+		    !(bp->netdev->flags & IFF_PROMISC) &&
 		    GEM_BFEXT(RX_CSUM, ctrl) & GEM_RX_CSUM_CHECKED_MASK)
 			skb->ip_summed = CHECKSUM_UNNECESSARY;
 
-		bp->dev->stats.rx_packets++;
+		bp->netdev->stats.rx_packets++;
 		queue->stats.rx_packets++;
-		bp->dev->stats.rx_bytes += skb->len;
+		bp->netdev->stats.rx_bytes += skb->len;
 		queue->stats.rx_bytes += skb->len;
 
 		gem_ptp_do_rxstamp(bp, skb, desc);
 
 #if defined(DEBUG) && defined(VERBOSE_DEBUG)
-		netdev_vdbg(bp->dev, "received skb of length %u, csum: %08x\n",
+		netdev_vdbg(bp->netdev, "received skb of length %u, csum: %08x\n",
 			    skb->len, skb->csum);
 		print_hex_dump(KERN_DEBUG, " mac: ", DUMP_PREFIX_ADDRESS, 16, 1,
 			       skb_mac_header(skb), 16, true);
@@ -1684,9 +1684,9 @@ static int macb_rx_frame(struct macb_queue *queue, struct napi_struct *napi,
 	desc = macb_rx_desc(queue, last_frag);
 	len = desc->ctrl & bp->rx_frm_len_mask;
 
-	netdev_vdbg(bp->dev, "macb_rx_frame frags %u - %u (len %u)\n",
-		macb_rx_ring_wrap(bp, first_frag),
-		macb_rx_ring_wrap(bp, last_frag), len);
+	netdev_vdbg(bp->netdev, "macb_rx_frame frags %u - %u (len %u)\n",
+		    macb_rx_ring_wrap(bp, first_frag),
+		    macb_rx_ring_wrap(bp, last_frag), len);
 
 	/* The ethernet header starts NET_IP_ALIGN bytes into the
 	 * first buffer. Since the header is 14 bytes, this makes the
@@ -1696,9 +1696,9 @@ static int macb_rx_frame(struct macb_queue *queue, struct napi_struct *napi,
 	 * the two padding bytes into the skb so that we avoid hitting
 	 * the slowpath in memcpy(), and pull them off afterwards.
 	 */
-	skb = netdev_alloc_skb(bp->dev, len + NET_IP_ALIGN);
+	skb = netdev_alloc_skb(bp->netdev, len + NET_IP_ALIGN);
 	if (!skb) {
-		bp->dev->stats.rx_dropped++;
+		bp->netdev->stats.rx_dropped++;
 		for (frag = first_frag; ; frag++) {
 			desc = macb_rx_desc(queue, frag);
 			desc->addr &= ~MACB_BIT(RX_USED);
@@ -1742,11 +1742,11 @@ static int macb_rx_frame(struct macb_queue *queue, struct napi_struct *napi,
 	wmb();
 
 	__skb_pull(skb, NET_IP_ALIGN);
-	skb->protocol = eth_type_trans(skb, bp->dev);
+	skb->protocol = eth_type_trans(skb, bp->netdev);
 
-	bp->dev->stats.rx_packets++;
-	bp->dev->stats.rx_bytes += skb->len;
-	netdev_vdbg(bp->dev, "received skb of length %u, csum: %08x\n",
+	bp->netdev->stats.rx_packets++;
+	bp->netdev->stats.rx_bytes += skb->len;
+	netdev_vdbg(bp->netdev, "received skb of length %u, csum: %08x\n",
 		    skb->len, skb->csum);
 	napi_gro_receive(napi, skb);
 
@@ -1826,7 +1826,7 @@ static int macb_rx(struct macb_queue *queue, struct napi_struct *napi,
 		unsigned long flags;
 		u32 ctrl;
 
-		netdev_err(bp->dev, "RX queue corruption: reset it\n");
+		netdev_err(bp->netdev, "RX queue corruption: reset it\n");
 
 		spin_lock_irqsave(&bp->lock, flags);
 
@@ -1873,7 +1873,7 @@ static int macb_rx_poll(struct napi_struct *napi, int budget)
 
 	work_done = bp->macbgem_ops.mog_rx(queue, napi, budget);
 
-	netdev_vdbg(bp->dev, "RX poll: queue = %u, work_done = %d, budget = %d\n",
+	netdev_vdbg(bp->netdev, "RX poll: queue = %u, work_done = %d, budget = %d\n",
 		    (unsigned int)(queue - bp->queues), work_done, budget);
 
 	if (work_done < budget && napi_complete_done(napi, work_done)) {
@@ -1892,7 +1892,7 @@ static int macb_rx_poll(struct napi_struct *napi, int budget)
 		if (macb_rx_pending(queue)) {
 			queue_writel(queue, IDR, bp->rx_intr_mask);
 			macb_queue_isr_clear(bp, queue, MACB_BIT(RCOMP));
-			netdev_vdbg(bp->dev, "poll: packets pending, reschedule\n");
+			netdev_vdbg(bp->netdev, "poll: packets pending, reschedule\n");
 			napi_schedule(napi);
 		}
 	}
@@ -1956,11 +1956,11 @@ static int macb_tx_poll(struct napi_struct *napi, int budget)
 	rmb(); // ensure txubr_pending is up to date
 	if (queue->txubr_pending) {
 		queue->txubr_pending = false;
-		netdev_vdbg(bp->dev, "poll: tx restart\n");
+		netdev_vdbg(bp->netdev, "poll: tx restart\n");
 		macb_tx_restart(queue);
 	}
 
-	netdev_vdbg(bp->dev, "TX poll: queue = %u, work_done = %d, budget = %d\n",
+	netdev_vdbg(bp->netdev, "TX poll: queue = %u, work_done = %d, budget = %d\n",
 		    (unsigned int)(queue - bp->queues), work_done, budget);
 
 	if (work_done < budget && napi_complete_done(napi, work_done)) {
@@ -1979,7 +1979,7 @@ static int macb_tx_poll(struct napi_struct *napi, int budget)
 		if (macb_tx_complete_pending(queue)) {
 			queue_writel(queue, IDR, MACB_BIT(TCOMP));
 			macb_queue_isr_clear(bp, queue, MACB_BIT(TCOMP));
-			netdev_vdbg(bp->dev, "TX poll: packets pending, reschedule\n");
+			netdev_vdbg(bp->netdev, "TX poll: packets pending, reschedule\n");
 			napi_schedule(napi);
 		}
 	}
@@ -1990,7 +1990,7 @@ static int macb_tx_poll(struct napi_struct *napi, int budget)
 static void macb_hresp_error_task(struct work_struct *work)
 {
 	struct macb *bp = from_work(bp, work, hresp_err_bh_work);
-	struct net_device *dev = bp->dev;
+	struct net_device *netdev = bp->netdev;
 	struct macb_queue *queue;
 	unsigned int q;
 	u32 ctrl;
@@ -2004,8 +2004,8 @@ static void macb_hresp_error_task(struct work_struct *work)
 	ctrl &= ~(MACB_BIT(RE) | MACB_BIT(TE));
 	macb_writel(bp, NCR, ctrl);
 
-	netif_tx_stop_all_queues(dev);
-	netif_carrier_off(dev);
+	netif_tx_stop_all_queues(netdev);
+	netif_carrier_off(netdev);
 
 	bp->macbgem_ops.mog_init_rings(bp);
 
@@ -2022,8 +2022,8 @@ static void macb_hresp_error_task(struct work_struct *work)
 	ctrl |= MACB_BIT(RE) | MACB_BIT(TE);
 	macb_writel(bp, NCR, ctrl);
 
-	netif_carrier_on(dev);
-	netif_tx_start_all_queues(dev);
+	netif_carrier_on(netdev);
+	netif_tx_start_all_queues(netdev);
 }
 
 static void macb_wol_interrupt(struct macb_queue *queue, u32 status)
@@ -2032,7 +2032,7 @@ static void macb_wol_interrupt(struct macb_queue *queue, u32 status)
 
 	queue_writel(queue, IDR, MACB_BIT(WOL));
 	macb_writel(bp, WOL, 0);
-	netdev_vdbg(bp->dev, "MACB WoL: queue = %u, isr = 0x%08lx\n",
+	netdev_vdbg(bp->netdev, "MACB WoL: queue = %u, isr = 0x%08lx\n",
 		    (unsigned int)(queue - bp->queues),
 		    (unsigned long)status);
 	macb_queue_isr_clear(bp, queue, MACB_BIT(WOL));
@@ -2045,7 +2045,7 @@ static void gem_wol_interrupt(struct macb_queue *queue, u32 status)
 
 	queue_writel(queue, IDR, GEM_BIT(WOL));
 	gem_writel(bp, WOL, 0);
-	netdev_vdbg(bp->dev, "GEM WoL: queue = %u, isr = 0x%08lx\n",
+	netdev_vdbg(bp->netdev, "GEM WoL: queue = %u, isr = 0x%08lx\n",
 		    (unsigned int)(queue - bp->queues),
 		    (unsigned long)status);
 	macb_queue_isr_clear(bp, queue, GEM_BIT(WOL));
@@ -2055,10 +2055,10 @@ static void gem_wol_interrupt(struct macb_queue *queue, u32 status)
 static int macb_interrupt_misc(struct macb_queue *queue, u32 status)
 {
 	struct macb *bp = queue->bp;
-	struct net_device *dev;
+	struct net_device *netdev;
 	u32 ctrl;
 
-	dev = bp->dev;
+	netdev = bp->netdev;
 
 	if (unlikely(status & (MACB_TX_ERR_FLAGS))) {
 		queue_writel(queue, IDR, MACB_TX_INT_FLAGS);
@@ -2099,7 +2099,7 @@ static int macb_interrupt_misc(struct macb_queue *queue, u32 status)
 
 	if (status & MACB_BIT(HRESP)) {
 		queue_work(system_bh_wq, &bp->hresp_err_bh_work);
-		netdev_err(dev, "DMA bus error: HRESP not OK\n");
+		netdev_err(netdev, "DMA bus error: HRESP not OK\n");
 		macb_queue_isr_clear(bp, queue, MACB_BIT(HRESP));
 	}
 
@@ -2118,7 +2118,7 @@ static irqreturn_t macb_interrupt(int irq, void *dev_id)
 {
 	struct macb_queue *queue = dev_id;
 	struct macb *bp = queue->bp;
-	struct net_device *dev = bp->dev;
+	struct net_device *netdev = bp->netdev;
 	u32 status;
 
 	status = queue_readl(queue, ISR);
@@ -2130,13 +2130,13 @@ static irqreturn_t macb_interrupt(int irq, void *dev_id)
 
 	while (status) {
 		/* close possible race with dev_close */
-		if (unlikely(!netif_running(dev))) {
+		if (unlikely(!netif_running(netdev))) {
 			queue_writel(queue, IDR, -1);
 			macb_queue_isr_clear(bp, queue, -1);
 			break;
 		}
 
-		netdev_vdbg(bp->dev, "queue = %u, isr = 0x%08lx\n",
+		netdev_vdbg(netdev, "queue = %u, isr = 0x%08lx\n",
 			    (unsigned int)(queue - bp->queues),
 			    (unsigned long)status);
 
@@ -2181,16 +2181,16 @@ static irqreturn_t macb_interrupt(int irq, void *dev_id)
 /* Polling receive - used by netconsole and other diagnostic tools
  * to allow network i/o with interrupts disabled.
  */
-static void macb_poll_controller(struct net_device *dev)
+static void macb_poll_controller(struct net_device *netdev)
 {
-	struct macb *bp = netdev_priv(dev);
+	struct macb *bp = netdev_priv(netdev);
 	struct macb_queue *queue;
 	unsigned long flags;
 	unsigned int q;
 
 	local_irq_save(flags);
 	for (q = 0, queue = bp->queues; q < bp->num_queues; ++q, ++queue)
-		macb_interrupt(dev->irq, queue);
+		macb_interrupt(netdev->irq, queue);
 	local_irq_restore(flags);
 }
 #endif
@@ -2277,7 +2277,7 @@ static unsigned int macb_tx_map(struct macb *bp,
 
 	/* Should never happen */
 	if (unlikely(!tx_skb)) {
-		netdev_err(bp->dev, "BUG! empty skb!\n");
+		netdev_err(bp->netdev, "BUG! empty skb!\n");
 		return 0;
 	}
 
@@ -2328,7 +2328,7 @@ static unsigned int macb_tx_map(struct macb *bp,
 		if (i == queue->tx_head) {
 			ctrl |= MACB_BF(TX_LSO, lso_ctrl);
 			ctrl |= MACB_BF(TX_TCP_SEQ_SRC, seq_ctrl);
-			if ((bp->dev->features & NETIF_F_HW_CSUM) &&
+			if ((bp->netdev->features & NETIF_F_HW_CSUM) &&
 			    skb->ip_summed != CHECKSUM_PARTIAL && !lso_ctrl &&
 			    !ptp_one_step_sync(skb))
 				ctrl |= MACB_BIT(TX_NOCRC);
@@ -2352,7 +2352,7 @@ static unsigned int macb_tx_map(struct macb *bp,
 	return 0;
 
 dma_error:
-	netdev_err(bp->dev, "TX DMA map failed\n");
+	netdev_err(bp->netdev, "TX DMA map failed\n");
 
 	for (i = queue->tx_head; i != tx_head; i++) {
 		tx_skb = macb_tx_skb(queue, i);
@@ -2364,7 +2364,7 @@ static unsigned int macb_tx_map(struct macb *bp,
 }
 
 static netdev_features_t macb_features_check(struct sk_buff *skb,
-					     struct net_device *dev,
+					     struct net_device *netdev,
 					     netdev_features_t features)
 {
 	unsigned int nr_frags, f;
@@ -2416,7 +2416,7 @@ static inline int macb_clear_csum(struct sk_buff *skb)
 	return 0;
 }
 
-static int macb_pad_and_fcs(struct sk_buff **skb, struct net_device *ndev)
+static int macb_pad_and_fcs(struct sk_buff **skb, struct net_device *netdev)
 {
 	bool cloned = skb_cloned(*skb) || skb_header_cloned(*skb) ||
 		      skb_is_nonlinear(*skb);
@@ -2425,7 +2425,7 @@ static int macb_pad_and_fcs(struct sk_buff **skb, struct net_device *ndev)
 	struct sk_buff *nskb;
 	u32 fcs;
 
-	if (!(ndev->features & NETIF_F_HW_CSUM) ||
+	if (!(netdev->features & NETIF_F_HW_CSUM) ||
 	    !((*skb)->ip_summed != CHECKSUM_PARTIAL) ||
 	    skb_shinfo(*skb)->gso_size || ptp_one_step_sync(*skb))
 		return 0;
@@ -2467,10 +2467,11 @@ static int macb_pad_and_fcs(struct sk_buff **skb, struct net_device *ndev)
 	return 0;
 }
 
-static netdev_tx_t macb_start_xmit(struct sk_buff *skb, struct net_device *dev)
+static netdev_tx_t macb_start_xmit(struct sk_buff *skb,
+				   struct net_device *netdev)
 {
 	u16 queue_index = skb_get_queue_mapping(skb);
-	struct macb *bp = netdev_priv(dev);
+	struct macb *bp = netdev_priv(netdev);
 	struct macb_queue *queue = &bp->queues[queue_index];
 	unsigned int desc_cnt, nr_frags, frag_size, f;
 	unsigned int hdrlen;
@@ -2483,7 +2484,7 @@ static netdev_tx_t macb_start_xmit(struct sk_buff *skb, struct net_device *dev)
 		return ret;
 	}
 
-	if (macb_pad_and_fcs(&skb, dev)) {
+	if (macb_pad_and_fcs(&skb, netdev)) {
 		dev_kfree_skb_any(skb);
 		return ret;
 	}
@@ -2502,7 +2503,7 @@ static netdev_tx_t macb_start_xmit(struct sk_buff *skb, struct net_device *dev)
 		else
 			hdrlen = skb_tcp_all_headers(skb);
 		if (skb_headlen(skb) < hdrlen) {
-			netdev_err(bp->dev, "Error - LSO headers fragmented!!!\n");
+			netdev_err(bp->netdev, "Error - LSO headers fragmented!!!\n");
 			/* if this is required, would need to copy to single buffer */
 			return NETDEV_TX_BUSY;
 		}
@@ -2510,7 +2511,7 @@ static netdev_tx_t macb_start_xmit(struct sk_buff *skb, struct net_device *dev)
 		hdrlen = umin(skb_headlen(skb), bp->max_tx_length);
 
 #if defined(DEBUG) && defined(VERBOSE_DEBUG)
-	netdev_vdbg(bp->dev,
+	netdev_vdbg(bp->netdev,
 		    "start_xmit: queue %hu len %u head %p data %p tail %p end %p\n",
 		    queue_index, skb->len, skb->head, skb->data,
 		    skb_tail_pointer(skb), skb_end_pointer(skb));
@@ -2538,8 +2539,8 @@ static netdev_tx_t macb_start_xmit(struct sk_buff *skb, struct net_device *dev)
 	/* This is a hard error, log it. */
 	if (CIRC_SPACE(queue->tx_head, queue->tx_tail,
 		       bp->tx_ring_size) < desc_cnt) {
-		netif_stop_subqueue(dev, queue_index);
-		netdev_dbg(bp->dev, "tx_head = %u, tx_tail = %u\n",
+		netif_stop_subqueue(netdev, queue_index);
+		netdev_dbg(netdev, "tx_head = %u, tx_tail = %u\n",
 			   queue->tx_head, queue->tx_tail);
 		ret = NETDEV_TX_BUSY;
 		goto unlock;
@@ -2554,7 +2555,7 @@ static netdev_tx_t macb_start_xmit(struct sk_buff *skb, struct net_device *dev)
 	/* Make newly initialized descriptor visible to hardware */
 	wmb();
 	skb_tx_timestamp(skb);
-	netdev_tx_sent_queue(netdev_get_tx_queue(bp->dev, queue_index),
+	netdev_tx_sent_queue(netdev_get_tx_queue(bp->netdev, queue_index),
 			     skb->len);
 
 	spin_lock(&bp->lock);
@@ -2563,7 +2564,7 @@ static netdev_tx_t macb_start_xmit(struct sk_buff *skb, struct net_device *dev)
 	spin_unlock(&bp->lock);
 
 	if (CIRC_SPACE(queue->tx_head, queue->tx_tail, bp->tx_ring_size) < 1)
-		netif_stop_subqueue(dev, queue_index);
+		netif_stop_subqueue(netdev, queue_index);
 
 unlock:
 	spin_unlock_irqrestore(&queue->tx_ptr_lock, flags);
@@ -2579,7 +2580,7 @@ static void macb_init_rx_buffer_size(struct macb *bp, size_t size)
 		bp->rx_buffer_size = MIN(size, RX_BUFFER_MAX);
 
 		if (bp->rx_buffer_size % RX_BUFFER_MULTIPLE) {
-			netdev_dbg(bp->dev,
+			netdev_dbg(bp->netdev,
 				   "RX buffer must be multiple of %d bytes, expanding\n",
 				   RX_BUFFER_MULTIPLE);
 			bp->rx_buffer_size =
@@ -2587,8 +2588,8 @@ static void macb_init_rx_buffer_size(struct macb *bp, size_t size)
 		}
 	}
 
-	netdev_dbg(bp->dev, "mtu [%u] rx_buffer_size [%zu]\n",
-		   bp->dev->mtu, bp->rx_buffer_size);
+	netdev_dbg(bp->netdev, "mtu [%u] rx_buffer_size [%zu]\n",
+		   bp->netdev->mtu, bp->rx_buffer_size);
 }
 
 static void gem_free_rx_buffers(struct macb *bp)
@@ -2687,7 +2688,7 @@ static int gem_alloc_rx_buffers(struct macb *bp)
 		if (!queue->rx_skbuff)
 			return -ENOMEM;
 		else
-			netdev_dbg(bp->dev,
+			netdev_dbg(bp->netdev,
 				   "Allocated %d RX struct sk_buff entries at %p\n",
 				   bp->rx_ring_size, queue->rx_skbuff);
 	}
@@ -2705,7 +2706,7 @@ static int macb_alloc_rx_buffers(struct macb *bp)
 	if (!queue->rx_buffers)
 		return -ENOMEM;
 
-	netdev_dbg(bp->dev,
+	netdev_dbg(bp->netdev,
 		   "Allocated RX buffers of %d bytes at %08lx (mapped %p)\n",
 		   size, (unsigned long)queue->rx_buffers_dma, queue->rx_buffers);
 	return 0;
@@ -2731,14 +2732,14 @@ static int macb_alloc_consistent(struct macb *bp)
 	tx = dma_alloc_coherent(dev, size, &tx_dma, GFP_KERNEL);
 	if (!tx || upper_32_bits(tx_dma) != upper_32_bits(tx_dma + size - 1))
 		goto out_err;
-	netdev_dbg(bp->dev, "Allocated %zu bytes for %u TX rings at %08lx (mapped %p)\n",
+	netdev_dbg(bp->netdev, "Allocated %zu bytes for %u TX rings at %08lx (mapped %p)\n",
 		   size, bp->num_queues, (unsigned long)tx_dma, tx);
 
 	size = bp->num_queues * macb_rx_ring_size_per_queue(bp);
 	rx = dma_alloc_coherent(dev, size, &rx_dma, GFP_KERNEL);
 	if (!rx || upper_32_bits(rx_dma) != upper_32_bits(rx_dma + size - 1))
 		goto out_err;
-	netdev_dbg(bp->dev, "Allocated %zu bytes for %u RX rings at %08lx (mapped %p)\n",
+	netdev_dbg(bp->netdev, "Allocated %zu bytes for %u RX rings at %08lx (mapped %p)\n",
 		   size, bp->num_queues, (unsigned long)rx_dma, rx);
 
 	for (q = 0, queue = bp->queues; q < bp->num_queues; ++q, ++queue) {
@@ -2966,7 +2967,7 @@ static void macb_configure_dma(struct macb *bp)
 		else
 			dmacfg |= GEM_BIT(ENDIA_DESC); /* CPU in big endian */
 
-		if (bp->dev->features & NETIF_F_HW_CSUM)
+		if (bp->netdev->features & NETIF_F_HW_CSUM)
 			dmacfg |= GEM_BIT(TXCOEN);
 		else
 			dmacfg &= ~GEM_BIT(TXCOEN);
@@ -2976,7 +2977,7 @@ static void macb_configure_dma(struct macb *bp)
 			dmacfg |= GEM_BIT(ADDR64);
 		if (macb_dma_ptp(bp))
 			dmacfg |= GEM_BIT(RXEXT) | GEM_BIT(TXEXT);
-		netdev_dbg(bp->dev, "Cadence configure DMA with 0x%08x\n",
+		netdev_dbg(bp->netdev, "Cadence configure DMA with 0x%08x\n",
 			   dmacfg);
 		gem_writel(bp, DMACFG, dmacfg);
 	}
@@ -3000,11 +3001,11 @@ static void macb_init_hw(struct macb *bp)
 		config |= MACB_BIT(JFRAME);	/* Enable jumbo frames */
 	else
 		config |= MACB_BIT(BIG);	/* Receive oversized frames */
-	if (bp->dev->flags & IFF_PROMISC)
+	if (bp->netdev->flags & IFF_PROMISC)
 		config |= MACB_BIT(CAF);	/* Copy All Frames */
-	else if (macb_is_gem(bp) && bp->dev->features & NETIF_F_RXCSUM)
+	else if (macb_is_gem(bp) && bp->netdev->features & NETIF_F_RXCSUM)
 		config |= GEM_BIT(RXCOEN);
-	if (!(bp->dev->flags & IFF_BROADCAST))
+	if (!(bp->netdev->flags & IFF_BROADCAST))
 		config |= MACB_BIT(NBC);	/* No BroadCast */
 	config |= macb_dbw(bp);
 	macb_writel(bp, NCFGR, config);
@@ -3078,17 +3079,17 @@ static int hash_get_index(__u8 *addr)
 }
 
 /* Add multicast addresses to the internal multicast-hash table. */
-static void macb_sethashtable(struct net_device *dev)
+static void macb_sethashtable(struct net_device *netdev)
 {
 	struct netdev_hw_addr *ha;
 	unsigned long mc_filter[2];
 	unsigned int bitnr;
-	struct macb *bp = netdev_priv(dev);
+	struct macb *bp = netdev_priv(netdev);
 
 	mc_filter[0] = 0;
 	mc_filter[1] = 0;
 
-	netdev_for_each_mc_addr(ha, dev) {
+	netdev_for_each_mc_addr(ha, netdev) {
 		bitnr = hash_get_index(ha->addr);
 		mc_filter[bitnr >> 5] |= 1 << (bitnr & 31);
 	}
@@ -3098,14 +3099,14 @@ static void macb_sethashtable(struct net_device *dev)
 }
 
 /* Enable/Disable promiscuous and multicast modes. */
-static void macb_set_rx_mode(struct net_device *dev)
+static void macb_set_rx_mode(struct net_device *netdev)
 {
 	unsigned long cfg;
-	struct macb *bp = netdev_priv(dev);
+	struct macb *bp = netdev_priv(netdev);
 
 	cfg = macb_readl(bp, NCFGR);
 
-	if (dev->flags & IFF_PROMISC) {
+	if (netdev->flags & IFF_PROMISC) {
 		/* Enable promiscuous mode */
 		cfg |= MACB_BIT(CAF);
 
@@ -3117,20 +3118,20 @@ static void macb_set_rx_mode(struct net_device *dev)
 		cfg &= ~MACB_BIT(CAF);
 
 		/* Enable RX checksum offload only if requested */
-		if (macb_is_gem(bp) && dev->features & NETIF_F_RXCSUM)
+		if (macb_is_gem(bp) && netdev->features & NETIF_F_RXCSUM)
 			cfg |= GEM_BIT(RXCOEN);
 	}
 
-	if (dev->flags & IFF_ALLMULTI) {
+	if (netdev->flags & IFF_ALLMULTI) {
 		/* Enable all multicast mode */
 		macb_or_gem_writel(bp, HRB, -1);
 		macb_or_gem_writel(bp, HRT, -1);
 		cfg |= MACB_BIT(NCFGR_MTI);
-	} else if (!netdev_mc_empty(dev)) {
+	} else if (!netdev_mc_empty(netdev)) {
 		/* Enable specific multicasts */
-		macb_sethashtable(dev);
+		macb_sethashtable(netdev);
 		cfg |= MACB_BIT(NCFGR_MTI);
-	} else if (dev->flags & (~IFF_ALLMULTI)) {
+	} else if (netdev->flags & (~IFF_ALLMULTI)) {
 		/* Disable all multicast mode */
 		macb_or_gem_writel(bp, HRB, 0);
 		macb_or_gem_writel(bp, HRT, 0);
@@ -3140,15 +3141,15 @@ static void macb_set_rx_mode(struct net_device *dev)
 	macb_writel(bp, NCFGR, cfg);
 }
 
-static int macb_open(struct net_device *dev)
+static int macb_open(struct net_device *netdev)
 {
-	size_t bufsz = dev->mtu + ETH_HLEN + ETH_FCS_LEN + NET_IP_ALIGN;
-	struct macb *bp = netdev_priv(dev);
+	size_t bufsz = netdev->mtu + ETH_HLEN + ETH_FCS_LEN + NET_IP_ALIGN;
+	struct macb *bp = netdev_priv(netdev);
 	struct macb_queue *queue;
 	unsigned int q;
 	int err;
 
-	netdev_dbg(bp->dev, "open\n");
+	netdev_dbg(bp->netdev, "open\n");
 
 	err = pm_runtime_resume_and_get(&bp->pdev->dev);
 	if (err < 0)
@@ -3159,7 +3160,7 @@ static int macb_open(struct net_device *dev)
 
 	err = macb_alloc_consistent(bp);
 	if (err) {
-		netdev_err(dev, "Unable to allocate DMA memory (error %d)\n",
+		netdev_err(netdev, "Unable to allocate DMA memory (error %d)\n",
 			   err);
 		goto pm_exit;
 	}
@@ -3186,10 +3187,10 @@ static int macb_open(struct net_device *dev)
 	if (err)
 		goto phy_off;
 
-	netif_tx_start_all_queues(dev);
+	netif_tx_start_all_queues(netdev);
 
 	if (bp->ptp_info)
-		bp->ptp_info->ptp_init(dev);
+		bp->ptp_info->ptp_init(netdev);
 
 	return 0;
 
@@ -3208,19 +3209,19 @@ static int macb_open(struct net_device *dev)
 	return err;
 }
 
-static int macb_close(struct net_device *dev)
+static int macb_close(struct net_device *netdev)
 {
-	struct macb *bp = netdev_priv(dev);
+	struct macb *bp = netdev_priv(netdev);
 	struct macb_queue *queue;
 	unsigned long flags;
 	unsigned int q;
 
-	netif_tx_stop_all_queues(dev);
+	netif_tx_stop_all_queues(netdev);
 
 	for (q = 0, queue = bp->queues; q < bp->num_queues; ++q, ++queue) {
 		napi_disable(&queue->napi_rx);
 		napi_disable(&queue->napi_tx);
-		netdev_tx_reset_queue(netdev_get_tx_queue(dev, q));
+		netdev_tx_reset_queue(netdev_get_tx_queue(netdev, q));
 	}
 
 	cancel_delayed_work_sync(&bp->tx_lpi_work);
@@ -3232,38 +3233,38 @@ static int macb_close(struct net_device *dev)
 
 	spin_lock_irqsave(&bp->lock, flags);
 	macb_reset_hw(bp);
-	netif_carrier_off(dev);
+	netif_carrier_off(netdev);
 	spin_unlock_irqrestore(&bp->lock, flags);
 
 	macb_free_consistent(bp);
 
 	if (bp->ptp_info)
-		bp->ptp_info->ptp_remove(dev);
+		bp->ptp_info->ptp_remove(netdev);
 
 	pm_runtime_put(&bp->pdev->dev);
 
 	return 0;
 }
 
-static int macb_change_mtu(struct net_device *dev, int new_mtu)
+static int macb_change_mtu(struct net_device *netdev, int new_mtu)
 {
-	if (netif_running(dev))
+	if (netif_running(netdev))
 		return -EBUSY;
 
-	WRITE_ONCE(dev->mtu, new_mtu);
+	WRITE_ONCE(netdev->mtu, new_mtu);
 
 	return 0;
 }
 
-static int macb_set_mac_addr(struct net_device *dev, void *addr)
+static int macb_set_mac_addr(struct net_device *netdev, void *addr)
 {
 	int err;
 
-	err = eth_mac_addr(dev, addr);
+	err = eth_mac_addr(netdev, addr);
 	if (err < 0)
 		return err;
 
-	macb_set_hwaddr(netdev_priv(dev));
+	macb_set_hwaddr(netdev_priv(netdev));
 	return 0;
 }
 
@@ -3301,7 +3302,7 @@ static void gem_get_stats(struct macb *bp, struct rtnl_link_stats64 *nstat)
 	struct gem_stats *hwstat = &bp->hw_stats.gem;
 
 	spin_lock_irq(&bp->stats_lock);
-	if (netif_running(bp->dev))
+	if (netif_running(bp->netdev))
 		gem_update_stats(bp);
 
 	nstat->rx_errors = (hwstat->rx_frame_check_sequence_errors +
@@ -3334,10 +3335,10 @@ static void gem_get_stats(struct macb *bp, struct rtnl_link_stats64 *nstat)
 	spin_unlock_irq(&bp->stats_lock);
 }
 
-static void gem_get_ethtool_stats(struct net_device *dev,
+static void gem_get_ethtool_stats(struct net_device *netdev,
 				  struct ethtool_stats *stats, u64 *data)
 {
-	struct macb *bp = netdev_priv(dev);
+	struct macb *bp = netdev_priv(netdev);
 
 	spin_lock_irq(&bp->stats_lock);
 	gem_update_stats(bp);
@@ -3346,9 +3347,9 @@ static void gem_get_ethtool_stats(struct net_device *dev,
 	spin_unlock_irq(&bp->stats_lock);
 }
 
-static int gem_get_sset_count(struct net_device *dev, int sset)
+static int gem_get_sset_count(struct net_device *netdev, int sset)
 {
-	struct macb *bp = netdev_priv(dev);
+	struct macb *bp = netdev_priv(netdev);
 
 	switch (sset) {
 	case ETH_SS_STATS:
@@ -3358,9 +3359,9 @@ static int gem_get_sset_count(struct net_device *dev, int sset)
 	}
 }
 
-static void gem_get_ethtool_strings(struct net_device *dev, u32 sset, u8 *p)
+static void gem_get_ethtool_strings(struct net_device *netdev, u32 sset, u8 *p)
 {
-	struct macb *bp = netdev_priv(dev);
+	struct macb *bp = netdev_priv(netdev);
 	struct macb_queue *queue;
 	unsigned int i;
 	unsigned int q;
@@ -3379,13 +3380,13 @@ static void gem_get_ethtool_strings(struct net_device *dev, u32 sset, u8 *p)
 	}
 }
 
-static void macb_get_stats(struct net_device *dev,
+static void macb_get_stats(struct net_device *netdev,
 			   struct rtnl_link_stats64 *nstat)
 {
-	struct macb *bp = netdev_priv(dev);
+	struct macb *bp = netdev_priv(netdev);
 	struct macb_stats *hwstat = &bp->hw_stats.macb;
 
-	netdev_stats_to_stats64(nstat, &bp->dev->stats);
+	netdev_stats_to_stats64(nstat, &bp->netdev->stats);
 	if (macb_is_gem(bp)) {
 		gem_get_stats(bp, nstat);
 		return;
@@ -3429,10 +3430,10 @@ static void macb_get_stats(struct net_device *dev,
 	spin_unlock_irq(&bp->stats_lock);
 }
 
-static void macb_get_pause_stats(struct net_device *dev,
+static void macb_get_pause_stats(struct net_device *netdev,
 				 struct ethtool_pause_stats *pause_stats)
 {
-	struct macb *bp = netdev_priv(dev);
+	struct macb *bp = netdev_priv(netdev);
 	struct macb_stats *hwstat = &bp->hw_stats.macb;
 
 	spin_lock_irq(&bp->stats_lock);
@@ -3442,10 +3443,10 @@ static void macb_get_pause_stats(struct net_device *dev,
 	spin_unlock_irq(&bp->stats_lock);
 }
 
-static void gem_get_pause_stats(struct net_device *dev,
+static void gem_get_pause_stats(struct net_device *netdev,
 				struct ethtool_pause_stats *pause_stats)
 {
-	struct macb *bp = netdev_priv(dev);
+	struct macb *bp = netdev_priv(netdev);
 	struct gem_stats *hwstat = &bp->hw_stats.gem;
 
 	spin_lock_irq(&bp->stats_lock);
@@ -3455,10 +3456,10 @@ static void gem_get_pause_stats(struct net_device *dev,
 	spin_unlock_irq(&bp->stats_lock);
 }
 
-static void macb_get_eth_mac_stats(struct net_device *dev,
+static void macb_get_eth_mac_stats(struct net_device *netdev,
 				   struct ethtool_eth_mac_stats *mac_stats)
 {
-	struct macb *bp = netdev_priv(dev);
+	struct macb *bp = netdev_priv(netdev);
 	struct macb_stats *hwstat = &bp->hw_stats.macb;
 
 	spin_lock_irq(&bp->stats_lock);
@@ -3480,10 +3481,10 @@ static void macb_get_eth_mac_stats(struct net_device *dev,
 	spin_unlock_irq(&bp->stats_lock);
 }
 
-static void gem_get_eth_mac_stats(struct net_device *dev,
+static void gem_get_eth_mac_stats(struct net_device *netdev,
 				  struct ethtool_eth_mac_stats *mac_stats)
 {
-	struct macb *bp = netdev_priv(dev);
+	struct macb *bp = netdev_priv(netdev);
 	struct gem_stats *hwstat = &bp->hw_stats.gem;
 
 	spin_lock_irq(&bp->stats_lock);
@@ -3513,10 +3514,10 @@ static void gem_get_eth_mac_stats(struct net_device *dev,
 }
 
 /* TODO: Report SQE test errors when added to phy_stats */
-static void macb_get_eth_phy_stats(struct net_device *dev,
+static void macb_get_eth_phy_stats(struct net_device *netdev,
 				   struct ethtool_eth_phy_stats *phy_stats)
 {
-	struct macb *bp = netdev_priv(dev);
+	struct macb *bp = netdev_priv(netdev);
 	struct macb_stats *hwstat = &bp->hw_stats.macb;
 
 	spin_lock_irq(&bp->stats_lock);
@@ -3525,10 +3526,10 @@ static void macb_get_eth_phy_stats(struct net_device *dev,
 	spin_unlock_irq(&bp->stats_lock);
 }
 
-static void gem_get_eth_phy_stats(struct net_device *dev,
+static void gem_get_eth_phy_stats(struct net_device *netdev,
 				  struct ethtool_eth_phy_stats *phy_stats)
 {
-	struct macb *bp = netdev_priv(dev);
+	struct macb *bp = netdev_priv(netdev);
 	struct gem_stats *hwstat = &bp->hw_stats.gem;
 
 	spin_lock_irq(&bp->stats_lock);
@@ -3537,11 +3538,11 @@ static void gem_get_eth_phy_stats(struct net_device *dev,
 	spin_unlock_irq(&bp->stats_lock);
 }
 
-static void macb_get_rmon_stats(struct net_device *dev,
+static void macb_get_rmon_stats(struct net_device *netdev,
 				struct ethtool_rmon_stats *rmon_stats,
 				const struct ethtool_rmon_hist_range **ranges)
 {
-	struct macb *bp = netdev_priv(dev);
+	struct macb *bp = netdev_priv(netdev);
 	struct macb_stats *hwstat = &bp->hw_stats.macb;
 
 	spin_lock_irq(&bp->stats_lock);
@@ -3563,11 +3564,11 @@ static const struct ethtool_rmon_hist_range gem_rmon_ranges[] = {
 	{ },
 };
 
-static void gem_get_rmon_stats(struct net_device *dev,
+static void gem_get_rmon_stats(struct net_device *netdev,
 			       struct ethtool_rmon_stats *rmon_stats,
 			       const struct ethtool_rmon_hist_range **ranges)
 {
-	struct macb *bp = netdev_priv(dev);
+	struct macb *bp = netdev_priv(netdev);
 	struct gem_stats *hwstat = &bp->hw_stats.gem;
 
 	spin_lock_irq(&bp->stats_lock);
@@ -3598,10 +3599,10 @@ static int macb_get_regs_len(struct net_device *netdev)
 	return MACB_GREGS_NBR * sizeof(u32);
 }
 
-static void macb_get_regs(struct net_device *dev, struct ethtool_regs *regs,
+static void macb_get_regs(struct net_device *netdev, struct ethtool_regs *regs,
 			  void *p)
 {
-	struct macb *bp = netdev_priv(dev);
+	struct macb *bp = netdev_priv(netdev);
 	unsigned int tail, head;
 	u32 *regs_buff = p;
 
@@ -3718,16 +3719,16 @@ static int macb_set_ringparam(struct net_device *netdev,
 		return 0;
 	}
 
-	if (netif_running(bp->dev)) {
+	if (netif_running(bp->netdev)) {
 		reset = 1;
-		macb_close(bp->dev);
+		macb_close(bp->netdev);
 	}
 
 	bp->rx_ring_size = new_rx_size;
 	bp->tx_ring_size = new_tx_size;
 
 	if (reset)
-		macb_open(bp->dev);
+		macb_open(bp->netdev);
 
 	return 0;
 }
@@ -3754,13 +3755,13 @@ static s32 gem_get_ptp_max_adj(void)
 	return 64000000;
 }
 
-static int gem_get_ts_info(struct net_device *dev,
+static int gem_get_ts_info(struct net_device *netdev,
 			   struct kernel_ethtool_ts_info *info)
 {
-	struct macb *bp = netdev_priv(dev);
+	struct macb *bp = netdev_priv(netdev);
 
 	if (!macb_dma_ptp(bp)) {
-		ethtool_op_get_ts_info(dev, info);
+		ethtool_op_get_ts_info(netdev, info);
 		return 0;
 	}
 
@@ -3807,7 +3808,7 @@ static int macb_get_ts_info(struct net_device *netdev,
 
 static void gem_enable_flow_filters(struct macb *bp, bool enable)
 {
-	struct net_device *netdev = bp->dev;
+	struct net_device *netdev = bp->netdev;
 	struct ethtool_rx_fs_item *item;
 	u32 t2_scr;
 	int num_t2_scr;
@@ -4137,16 +4138,16 @@ static const struct ethtool_ops macb_ethtool_ops = {
 	.set_ringparam		= macb_set_ringparam,
 };
 
-static int macb_get_eee(struct net_device *dev, struct ethtool_keee *eee)
+static int macb_get_eee(struct net_device *netdev, struct ethtool_keee *eee)
 {
-	struct macb *bp = netdev_priv(dev);
+	struct macb *bp = netdev_priv(netdev);
 
 	return phylink_ethtool_get_eee(bp->phylink, eee);
 }
 
-static int macb_set_eee(struct net_device *dev, struct ethtool_keee *eee)
+static int macb_set_eee(struct net_device *netdev, struct ethtool_keee *eee)
 {
-	struct macb *bp = netdev_priv(dev);
+	struct macb *bp = netdev_priv(netdev);
 
 	return phylink_ethtool_set_eee(bp->phylink, eee);
 }
@@ -4177,43 +4178,43 @@ static const struct ethtool_ops gem_ethtool_ops = {
 	.set_eee		= macb_set_eee,
 };
 
-static int macb_ioctl(struct net_device *dev, struct ifreq *rq, int cmd)
+static int macb_ioctl(struct net_device *netdev, struct ifreq *rq, int cmd)
 {
-	struct macb *bp = netdev_priv(dev);
+	struct macb *bp = netdev_priv(netdev);
 
-	if (!netif_running(dev))
+	if (!netif_running(netdev))
 		return -EINVAL;
 
 	return phylink_mii_ioctl(bp->phylink, rq, cmd);
 }
 
-static int macb_hwtstamp_get(struct net_device *dev,
+static int macb_hwtstamp_get(struct net_device *netdev,
 			     struct kernel_hwtstamp_config *cfg)
 {
-	struct macb *bp = netdev_priv(dev);
+	struct macb *bp = netdev_priv(netdev);
 
-	if (!netif_running(dev))
+	if (!netif_running(netdev))
 		return -EINVAL;
 
 	if (!bp->ptp_info)
 		return -EOPNOTSUPP;
 
-	return bp->ptp_info->get_hwtst(dev, cfg);
+	return bp->ptp_info->get_hwtst(netdev, cfg);
 }
 
-static int macb_hwtstamp_set(struct net_device *dev,
+static int macb_hwtstamp_set(struct net_device *netdev,
 			     struct kernel_hwtstamp_config *cfg,
 			     struct netlink_ext_ack *extack)
 {
-	struct macb *bp = netdev_priv(dev);
+	struct macb *bp = netdev_priv(netdev);
 
-	if (!netif_running(dev))
+	if (!netif_running(netdev))
 		return -EINVAL;
 
 	if (!bp->ptp_info)
 		return -EOPNOTSUPP;
 
-	return bp->ptp_info->set_hwtst(dev, cfg, extack);
+	return bp->ptp_info->set_hwtst(netdev, cfg, extack);
 }
 
 static inline void macb_set_txcsum_feature(struct macb *bp,
@@ -4236,7 +4237,7 @@ static inline void macb_set_txcsum_feature(struct macb *bp,
 static inline void macb_set_rxcsum_feature(struct macb *bp,
 					   netdev_features_t features)
 {
-	struct net_device *netdev = bp->dev;
+	struct net_device *netdev = bp->netdev;
 	u32 val;
 
 	if (!macb_is_gem(bp))
@@ -4283,7 +4284,7 @@ static int macb_set_features(struct net_device *netdev,
 
 static void macb_restore_features(struct macb *bp)
 {
-	struct net_device *netdev = bp->dev;
+	struct net_device *netdev = bp->netdev;
 	netdev_features_t features = netdev->features;
 	struct ethtool_rx_fs_item *item;
 
@@ -4300,14 +4301,14 @@ static void macb_restore_features(struct macb *bp)
 	macb_set_rxflow_feature(bp, features);
 }
 
-static int macb_taprio_setup_replace(struct net_device *ndev,
+static int macb_taprio_setup_replace(struct net_device *netdev,
 				     struct tc_taprio_qopt_offload *conf)
 {
 	u64 total_on_time = 0, start_time_sec = 0, start_time = conf->base_time;
 	u32 configured_queues = 0, speed = 0, start_time_nsec;
 	struct macb_queue_enst_config *enst_queue;
 	struct tc_taprio_sched_entry *entry;
-	struct macb *bp = netdev_priv(ndev);
+	struct macb *bp = netdev_priv(netdev);
 	struct ethtool_link_ksettings kset;
 	struct macb_queue *queue;
 	u32 queue_mask;
@@ -4316,13 +4317,13 @@ static int macb_taprio_setup_replace(struct net_device *ndev,
 	int err;
 
 	if (conf->num_entries > bp->num_queues) {
-		netdev_err(ndev, "Too many TAPRIO entries: %zu > %d queues\n",
+		netdev_err(netdev, "Too many TAPRIO entries: %zu > %d queues\n",
 			   conf->num_entries, bp->num_queues);
 		return -EINVAL;
 	}
 
 	if (conf->base_time < 0) {
-		netdev_err(ndev, "Invalid base_time: must be 0 or positive, got %lld\n",
+		netdev_err(netdev, "Invalid base_time: must be 0 or positive, got %lld\n",
 			   conf->base_time);
 		return -ERANGE;
 	}
@@ -4330,13 +4331,13 @@ static int macb_taprio_setup_replace(struct net_device *ndev,
 	/* Get the current link speed */
 	err = phylink_ethtool_ksettings_get(bp->phylink, &kset);
 	if (unlikely(err)) {
-		netdev_err(ndev, "Failed to get link settings: %d\n", err);
+		netdev_err(netdev, "Failed to get link settings: %d\n", err);
 		return err;
 	}
 
 	speed = kset.base.speed;
 	if (unlikely(speed <= 0)) {
-		netdev_err(ndev, "Invalid speed: %d\n", speed);
+		netdev_err(netdev, "Invalid speed: %d\n", speed);
 		return -EINVAL;
 	}
 
@@ -4349,7 +4350,7 @@ static int macb_taprio_setup_replace(struct net_device *ndev,
 		entry = &conf->entries[i];
 
 		if (entry->command != TC_TAPRIO_CMD_SET_GATES) {
-			netdev_err(ndev, "Entry %zu: unsupported command %d\n",
+			netdev_err(netdev, "Entry %zu: unsupported command %d\n",
 				   i, entry->command);
 			err = -EOPNOTSUPP;
 			goto cleanup;
@@ -4357,7 +4358,7 @@ static int macb_taprio_setup_replace(struct net_device *ndev,
 
 		/* Validate gate_mask: must be nonzero, single queue, and within range */
 		if (!is_power_of_2(entry->gate_mask)) {
-			netdev_err(ndev, "Entry %zu: gate_mask 0x%x is not a power of 2 (only one queue per entry allowed)\n",
+			netdev_err(netdev, "Entry %zu: gate_mask 0x%x is not a power of 2 (only one queue per entry allowed)\n",
 				   i, entry->gate_mask);
 			err = -EINVAL;
 			goto cleanup;
@@ -4366,7 +4367,7 @@ static int macb_taprio_setup_replace(struct net_device *ndev,
 		/* gate_mask must not select queues outside the valid queues */
 		queue_id = order_base_2(entry->gate_mask);
 		if (queue_id >= bp->num_queues) {
-			netdev_err(ndev, "Entry %zu: gate_mask 0x%x exceeds queue range (max_queues=%d)\n",
+			netdev_err(netdev, "Entry %zu: gate_mask 0x%x exceeds queue range (max_queues=%d)\n",
 				   i, entry->gate_mask, bp->num_queues);
 			err = -EINVAL;
 			goto cleanup;
@@ -4376,7 +4377,7 @@ static int macb_taprio_setup_replace(struct net_device *ndev,
 		start_time_sec = start_time;
 		start_time_nsec = do_div(start_time_sec, NSEC_PER_SEC);
 		if (start_time_sec > GENMASK(GEM_START_TIME_SEC_SIZE - 1, 0)) {
-			netdev_err(ndev, "Entry %zu: Start time %llu s exceeds hardware limit\n",
+			netdev_err(netdev, "Entry %zu: Start time %llu s exceeds hardware limit\n",
 				   i, start_time_sec);
 			err = -ERANGE;
 			goto cleanup;
@@ -4384,7 +4385,7 @@ static int macb_taprio_setup_replace(struct net_device *ndev,
 
 		/* Check for on time limit */
 		if (entry->interval > enst_max_hw_interval(speed)) {
-			netdev_err(ndev, "Entry %zu: interval %u ns exceeds hardware limit %llu ns\n",
+			netdev_err(netdev, "Entry %zu: interval %u ns exceeds hardware limit %llu ns\n",
 				   i, entry->interval, enst_max_hw_interval(speed));
 			err = -ERANGE;
 			goto cleanup;
@@ -4392,7 +4393,7 @@ static int macb_taprio_setup_replace(struct net_device *ndev,
 
 		/* Check for off time limit*/
 		if ((conf->cycle_time - entry->interval) > enst_max_hw_interval(speed)) {
-			netdev_err(ndev, "Entry %zu: off_time %llu ns exceeds hardware limit %llu ns\n",
+			netdev_err(netdev, "Entry %zu: off_time %llu ns exceeds hardware limit %llu ns\n",
 				   i, conf->cycle_time - entry->interval,
 				   enst_max_hw_interval(speed));
 			err = -ERANGE;
@@ -4415,13 +4416,13 @@ static int macb_taprio_setup_replace(struct net_device *ndev,
 
 	/* Check total interval doesn't exceed cycle time */
 	if (total_on_time > conf->cycle_time) {
-		netdev_err(ndev, "Total ON %llu ns exceeds cycle time %llu ns\n",
+		netdev_err(netdev, "Total ON %llu ns exceeds cycle time %llu ns\n",
 			   total_on_time, conf->cycle_time);
 		err = -EINVAL;
 		goto cleanup;
 	}
 
-	netdev_dbg(ndev, "TAPRIO setup: %zu entries, base_time=%lld ns, cycle_time=%llu ns\n",
+	netdev_dbg(netdev, "TAPRIO setup: %zu entries, base_time=%lld ns, cycle_time=%llu ns\n",
 		   conf->num_entries, conf->base_time, conf->cycle_time);
 
 	/* All validations passed - proceed with hardware configuration */
@@ -4446,7 +4447,7 @@ static int macb_taprio_setup_replace(struct net_device *ndev,
 		gem_writel(bp, ENST_CONTROL, configured_queues);
 	}
 
-	netdev_info(ndev, "TAPRIO configuration completed successfully: %zu entries, %d queues configured\n",
+	netdev_info(netdev, "TAPRIO configuration completed successfully: %zu entries, %d queues configured\n",
 		    conf->num_entries, hweight32(configured_queues));
 
 cleanup:
@@ -4454,14 +4455,14 @@ static int macb_taprio_setup_replace(struct net_device *ndev,
 	return err;
 }
 
-static void macb_taprio_destroy(struct net_device *ndev)
+static void macb_taprio_destroy(struct net_device *netdev)
 {
-	struct macb *bp = netdev_priv(ndev);
+	struct macb *bp = netdev_priv(netdev);
 	struct macb_queue *queue;
 	u32 queue_mask;
 	unsigned int q;
 
-	netdev_reset_tc(ndev);
+	netdev_reset_tc(netdev);
 	queue_mask = BIT_U32(bp->num_queues) - 1;
 
 	scoped_guard(spinlock_irqsave, &bp->lock) {
@@ -4476,30 +4477,30 @@ static void macb_taprio_destroy(struct net_device *ndev)
 			queue_writel(queue, ENST_OFF_TIME, 0);
 		}
 	}
-	netdev_info(ndev, "TAPRIO destroy: All gates disabled\n");
+	netdev_info(netdev, "TAPRIO destroy: All gates disabled\n");
 }
 
-static int macb_setup_taprio(struct net_device *ndev,
+static int macb_setup_taprio(struct net_device *netdev,
 			     struct tc_taprio_qopt_offload *taprio)
 {
-	struct macb *bp = netdev_priv(ndev);
+	struct macb *bp = netdev_priv(netdev);
 	int err = 0;
 
-	if (unlikely(!(ndev->hw_features & NETIF_F_HW_TC)))
+	if (unlikely(!(netdev->hw_features & NETIF_F_HW_TC)))
 		return -EOPNOTSUPP;
 
 	/* Check if Device is in runtime suspend */
 	if (unlikely(pm_runtime_suspended(&bp->pdev->dev))) {
-		netdev_err(ndev, "Device is in runtime suspend\n");
+		netdev_err(netdev, "Device is in runtime suspend\n");
 		return -EOPNOTSUPP;
 	}
 
 	switch (taprio->cmd) {
 	case TAPRIO_CMD_REPLACE:
-		err = macb_taprio_setup_replace(ndev, taprio);
+		err = macb_taprio_setup_replace(netdev, taprio);
 		break;
 	case TAPRIO_CMD_DESTROY:
-		macb_taprio_destroy(ndev);
+		macb_taprio_destroy(netdev);
 		break;
 	default:
 		err = -EOPNOTSUPP;
@@ -4508,15 +4509,15 @@ static int macb_setup_taprio(struct net_device *ndev,
 	return err;
 }
 
-static int macb_setup_tc(struct net_device *dev, enum tc_setup_type type,
+static int macb_setup_tc(struct net_device *netdev, enum tc_setup_type type,
 			 void *type_data)
 {
-	if (!dev || !type_data)
+	if (!netdev || !type_data)
 		return -EINVAL;
 
 	switch (type) {
 	case TC_SETUP_QDISC_TAPRIO:
-		return macb_setup_taprio(dev, type_data);
+		return macb_setup_taprio(netdev, type_data);
 	default:
 		return -EOPNOTSUPP;
 	}
@@ -4724,9 +4725,9 @@ static int macb_clk_init(struct platform_device *pdev, struct clk **pclk,
 
 static int macb_init_dflt(struct platform_device *pdev)
 {
-	struct net_device *dev = platform_get_drvdata(pdev);
+	struct net_device *netdev = platform_get_drvdata(pdev);
 	unsigned int hw_q, q;
-	struct macb *bp = netdev_priv(dev);
+	struct macb *bp = netdev_priv(netdev);
 	struct macb_queue *queue;
 	int err;
 	u32 val, reg;
@@ -4742,8 +4743,8 @@ static int macb_init_dflt(struct platform_device *pdev)
 		queue = &bp->queues[q];
 		queue->bp = bp;
 		spin_lock_init(&queue->tx_ptr_lock);
-		netif_napi_add(dev, &queue->napi_rx, macb_rx_poll);
-		netif_napi_add_tx(dev, &queue->napi_tx, macb_tx_poll);
+		netif_napi_add(netdev, &queue->napi_rx, macb_rx_poll);
+		netif_napi_add_tx(netdev, &queue->napi_tx, macb_tx_poll);
 		if (hw_q) {
 			queue->ISR  = GEM_ISR(hw_q - 1);
 			queue->IER  = GEM_IER(hw_q - 1);
@@ -4773,7 +4774,7 @@ static int macb_init_dflt(struct platform_device *pdev)
 		 */
 		queue->irq = platform_get_irq(pdev, q);
 		err = devm_request_irq(&pdev->dev, queue->irq, macb_interrupt,
-				       IRQF_SHARED, dev->name, queue);
+				       IRQF_SHARED, netdev->name, queue);
 		if (err) {
 			dev_err(&pdev->dev,
 				"Unable to request IRQ %d (error %d)\n",
@@ -4785,7 +4786,7 @@ static int macb_init_dflt(struct platform_device *pdev)
 		q++;
 	}
 
-	dev->netdev_ops = &macb_netdev_ops;
+	netdev->netdev_ops = &macb_netdev_ops;
 
 	/* setup appropriated routines according to adapter type */
 	if (macb_is_gem(bp)) {
@@ -4793,39 +4794,39 @@ static int macb_init_dflt(struct platform_device *pdev)
 		bp->macbgem_ops.mog_free_rx_buffers = gem_free_rx_buffers;
 		bp->macbgem_ops.mog_init_rings = gem_init_rings;
 		bp->macbgem_ops.mog_rx = gem_rx;
-		dev->ethtool_ops = &gem_ethtool_ops;
+		netdev->ethtool_ops = &gem_ethtool_ops;
 	} else {
 		bp->macbgem_ops.mog_alloc_rx_buffers = macb_alloc_rx_buffers;
 		bp->macbgem_ops.mog_free_rx_buffers = macb_free_rx_buffers;
 		bp->macbgem_ops.mog_init_rings = macb_init_rings;
 		bp->macbgem_ops.mog_rx = macb_rx;
-		dev->ethtool_ops = &macb_ethtool_ops;
+		netdev->ethtool_ops = &macb_ethtool_ops;
 	}
 
-	netdev_sw_irq_coalesce_default_on(dev);
+	netdev_sw_irq_coalesce_default_on(netdev);
 
-	dev->priv_flags |= IFF_LIVE_ADDR_CHANGE;
+	netdev->priv_flags |= IFF_LIVE_ADDR_CHANGE;
 
 	/* Set features */
-	dev->hw_features = NETIF_F_SG;
+	netdev->hw_features = NETIF_F_SG;
 
 	/* Check LSO capability; runtime detection can be overridden by a cap
 	 * flag if the hardware is known to be buggy
 	 */
 	if (!(bp->caps & MACB_CAPS_NO_LSO) &&
 	    GEM_BFEXT(PBUF_LSO, gem_readl(bp, DCFG6)))
-		dev->hw_features |= MACB_NETIF_LSO;
+		netdev->hw_features |= MACB_NETIF_LSO;
 
 	/* Checksum offload is only available on gem with packet buffer */
 	if (macb_is_gem(bp) && !(bp->caps & MACB_CAPS_FIFO_MODE))
-		dev->hw_features |= NETIF_F_HW_CSUM | NETIF_F_RXCSUM;
+		netdev->hw_features |= NETIF_F_HW_CSUM | NETIF_F_RXCSUM;
 	if (bp->caps & MACB_CAPS_SG_DISABLED)
-		dev->hw_features &= ~NETIF_F_SG;
+		netdev->hw_features &= ~NETIF_F_SG;
 	/* Enable HW_TC if hardware supports QBV */
 	if (bp->caps & MACB_CAPS_QBV)
-		dev->hw_features |= NETIF_F_HW_TC;
+		netdev->hw_features |= NETIF_F_HW_TC;
 
-	dev->features = dev->hw_features;
+	netdev->features = netdev->hw_features;
 
 	/* Check RX Flow Filters support.
 	 * Max Rx flows set by availability of screeners & compare regs:
@@ -4843,7 +4844,7 @@ static int macb_init_dflt(struct platform_device *pdev)
 			reg = GEM_BFINS(ETHTCMP, (uint16_t)ETH_P_IP, reg);
 			gem_writel_n(bp, ETHT, SCRT2_ETHT, reg);
 			/* Filtering is supported in hw but don't enable it in kernel now */
-			dev->hw_features |= NETIF_F_NTUPLE;
+			netdev->hw_features |= NETIF_F_NTUPLE;
 			/* init Rx flow definitions */
 			bp->rx_fs_list.count = 0;
 			spin_lock_init(&bp->rx_fs_lock);
@@ -5053,9 +5054,9 @@ static void at91ether_stop(struct macb *lp)
 }
 
 /* Open the ethernet interface */
-static int at91ether_open(struct net_device *dev)
+static int at91ether_open(struct net_device *netdev)
 {
-	struct macb *lp = netdev_priv(dev);
+	struct macb *lp = netdev_priv(netdev);
 	u32 ctl;
 	int ret;
 
@@ -5077,7 +5078,7 @@ static int at91ether_open(struct net_device *dev)
 	if (ret)
 		goto stop;
 
-	netif_start_queue(dev);
+	netif_start_queue(netdev);
 
 	return 0;
 
@@ -5089,11 +5090,11 @@ static int at91ether_open(struct net_device *dev)
 }
 
 /* Close the interface */
-static int at91ether_close(struct net_device *dev)
+static int at91ether_close(struct net_device *netdev)
 {
-	struct macb *lp = netdev_priv(dev);
+	struct macb *lp = netdev_priv(netdev);
 
-	netif_stop_queue(dev);
+	netif_stop_queue(netdev);
 
 	phylink_stop(lp->phylink);
 	phylink_disconnect_phy(lp->phylink);
@@ -5107,14 +5108,14 @@ static int at91ether_close(struct net_device *dev)
 
 /* Transmit packet */
 static netdev_tx_t at91ether_start_xmit(struct sk_buff *skb,
-					struct net_device *dev)
+					struct net_device *netdev)
 {
-	struct macb *lp = netdev_priv(dev);
+	struct macb *lp = netdev_priv(netdev);
 
 	if (macb_readl(lp, TSR) & MACB_BIT(RM9200_BNQ)) {
 		int desc = 0;
 
-		netif_stop_queue(dev);
+		netif_stop_queue(netdev);
 
 		/* Store packet information (to free when Tx completed) */
 		lp->rm9200_txq[desc].skb = skb;
@@ -5123,8 +5124,8 @@ static netdev_tx_t at91ether_start_xmit(struct sk_buff *skb,
 							      skb->len, DMA_TO_DEVICE);
 		if (dma_mapping_error(&lp->pdev->dev, lp->rm9200_txq[desc].mapping)) {
 			dev_kfree_skb_any(skb);
-			dev->stats.tx_dropped++;
-			netdev_err(dev, "%s: DMA mapping error\n", __func__);
+			netdev->stats.tx_dropped++;
+			netdev_err(netdev, "%s: DMA mapping error\n", __func__);
 			return NETDEV_TX_OK;
 		}
 
@@ -5134,7 +5135,8 @@ static netdev_tx_t at91ether_start_xmit(struct sk_buff *skb,
 		macb_writel(lp, TCR, skb->len);
 
 	} else {
-		netdev_err(dev, "%s called, but device is busy!\n", __func__);
+		netdev_err(netdev, "%s called, but device is busy!\n",
+			   __func__);
 		return NETDEV_TX_BUSY;
 	}
 
@@ -5144,9 +5146,9 @@ static netdev_tx_t at91ether_start_xmit(struct sk_buff *skb,
 /* Extract received frame from buffer descriptors and sent to upper layers.
  * (Called from interrupt context)
  */
-static void at91ether_rx(struct net_device *dev)
+static void at91ether_rx(struct net_device *netdev)
 {
-	struct macb *lp = netdev_priv(dev);
+	struct macb *lp = netdev_priv(netdev);
 	struct macb_queue *q = &lp->queues[0];
 	struct macb_dma_desc *desc;
 	unsigned char *p_recv;
@@ -5157,21 +5159,21 @@ static void at91ether_rx(struct net_device *dev)
 	while (desc->addr & MACB_BIT(RX_USED)) {
 		p_recv = q->rx_buffers + q->rx_tail * AT91ETHER_MAX_RBUFF_SZ;
 		pktlen = MACB_BF(RX_FRMLEN, desc->ctrl);
-		skb = netdev_alloc_skb(dev, pktlen + 2);
+		skb = netdev_alloc_skb(netdev, pktlen + 2);
 		if (skb) {
 			skb_reserve(skb, 2);
 			skb_put_data(skb, p_recv, pktlen);
 
-			skb->protocol = eth_type_trans(skb, dev);
-			dev->stats.rx_packets++;
-			dev->stats.rx_bytes += pktlen;
+			skb->protocol = eth_type_trans(skb, netdev);
+			netdev->stats.rx_packets++;
+			netdev->stats.rx_bytes += pktlen;
 			netif_rx(skb);
 		} else {
-			dev->stats.rx_dropped++;
+			netdev->stats.rx_dropped++;
 		}
 
 		if (desc->ctrl & MACB_BIT(RX_MHASH_MATCH))
-			dev->stats.multicast++;
+			netdev->stats.multicast++;
 
 		/* reset ownership bit */
 		desc->addr &= ~MACB_BIT(RX_USED);
@@ -5189,8 +5191,8 @@ static void at91ether_rx(struct net_device *dev)
 /* MAC interrupt handler */
 static irqreturn_t at91ether_interrupt(int irq, void *dev_id)
 {
-	struct net_device *dev = dev_id;
-	struct macb *lp = netdev_priv(dev);
+	struct net_device *netdev = dev_id;
+	struct macb *lp = netdev_priv(netdev);
 	u32 intstatus, ctl;
 	unsigned int desc;
 
@@ -5201,13 +5203,13 @@ static irqreturn_t at91ether_interrupt(int irq, void *dev_id)
 
 	/* Receive complete */
 	if (intstatus & MACB_BIT(RCOMP))
-		at91ether_rx(dev);
+		at91ether_rx(netdev);
 
 	/* Transmit complete */
 	if (intstatus & MACB_BIT(TCOMP)) {
 		/* The TCOM bit is set even if the transmission failed */
 		if (intstatus & (MACB_BIT(ISR_TUND) | MACB_BIT(ISR_RLE)))
-			dev->stats.tx_errors++;
+			netdev->stats.tx_errors++;
 
 		desc = 0;
 		if (lp->rm9200_txq[desc].skb) {
@@ -5215,10 +5217,10 @@ static irqreturn_t at91ether_interrupt(int irq, void *dev_id)
 			lp->rm9200_txq[desc].skb = NULL;
 			dma_unmap_single(&lp->pdev->dev, lp->rm9200_txq[desc].mapping,
 					 lp->rm9200_txq[desc].size, DMA_TO_DEVICE);
-			dev->stats.tx_packets++;
-			dev->stats.tx_bytes += lp->rm9200_txq[desc].size;
+			netdev->stats.tx_packets++;
+			netdev->stats.tx_bytes += lp->rm9200_txq[desc].size;
 		}
-		netif_wake_queue(dev);
+		netif_wake_queue(netdev);
 	}
 
 	/* Work-around for EMAC Errata section 41.3.1 */
@@ -5230,18 +5232,18 @@ static irqreturn_t at91ether_interrupt(int irq, void *dev_id)
 	}
 
 	if (intstatus & MACB_BIT(ISR_ROVR))
-		netdev_err(dev, "ROVR error\n");
+		netdev_err(netdev, "ROVR error\n");
 
 	return IRQ_HANDLED;
 }
 
 #ifdef CONFIG_NET_POLL_CONTROLLER
-static void at91ether_poll_controller(struct net_device *dev)
+static void at91ether_poll_controller(struct net_device *netdev)
 {
 	unsigned long flags;
 
 	local_irq_save(flags);
-	at91ether_interrupt(dev->irq, dev);
+	at91ether_interrupt(netdev->irq, netdev);
 	local_irq_restore(flags);
 }
 #endif
@@ -5288,17 +5290,17 @@ static int at91ether_clk_init(struct platform_device *pdev, struct clk **pclk,
 
 static int at91ether_init(struct platform_device *pdev)
 {
-	struct net_device *dev = platform_get_drvdata(pdev);
-	struct macb *bp = netdev_priv(dev);
+	struct net_device *netdev = platform_get_drvdata(pdev);
+	struct macb *bp = netdev_priv(netdev);
 	int err;
 
 	bp->queues[0].bp = bp;
 
-	dev->netdev_ops = &at91ether_netdev_ops;
-	dev->ethtool_ops = &macb_ethtool_ops;
+	netdev->netdev_ops = &at91ether_netdev_ops;
+	netdev->ethtool_ops = &macb_ethtool_ops;
 
-	err = devm_request_irq(&pdev->dev, dev->irq, at91ether_interrupt,
-			       0, dev->name, dev);
+	err = devm_request_irq(&pdev->dev, netdev->irq, at91ether_interrupt,
+			       0, netdev->name, netdev);
 	if (err)
 		return err;
 
@@ -5427,8 +5429,8 @@ static int fu540_c000_init(struct platform_device *pdev)
 
 static int init_reset_optional(struct platform_device *pdev)
 {
-	struct net_device *dev = platform_get_drvdata(pdev);
-	struct macb *bp = netdev_priv(dev);
+	struct net_device *netdev = platform_get_drvdata(pdev);
+	struct macb *bp = netdev_priv(netdev);
 	int ret;
 
 	if (bp->phy_interface == PHY_INTERFACE_MODE_SGMII) {
@@ -5736,7 +5738,7 @@ static int macb_probe(struct platform_device *pdev)
 	const struct macb_config *macb_config;
 	struct clk *tsu_clk = NULL;
 	phy_interface_t interface;
-	struct net_device *dev;
+	struct net_device *netdev;
 	struct resource *regs;
 	u32 wtrmrk_rst_val;
 	void __iomem *mem;
@@ -5771,19 +5773,19 @@ static int macb_probe(struct platform_device *pdev)
 		goto err_disable_clocks;
 	}
 
-	dev = alloc_etherdev_mq(sizeof(*bp), num_queues);
-	if (!dev) {
+	netdev = alloc_etherdev_mq(sizeof(*bp), num_queues);
+	if (!netdev) {
 		err = -ENOMEM;
 		goto err_disable_clocks;
 	}
 
-	dev->base_addr = regs->start;
+	netdev->base_addr = regs->start;
 
-	SET_NETDEV_DEV(dev, &pdev->dev);
+	SET_NETDEV_DEV(netdev, &pdev->dev);
 
-	bp = netdev_priv(dev);
+	bp = netdev_priv(netdev);
 	bp->pdev = pdev;
-	bp->dev = dev;
+	bp->netdev = netdev;
 	bp->regs = mem;
 	bp->native_io = native_io;
 	if (native_io) {
@@ -5856,21 +5858,21 @@ static int macb_probe(struct platform_device *pdev)
 		bp->caps |= MACB_CAPS_DMA_64B;
 	}
 #endif
-	platform_set_drvdata(pdev, dev);
+	platform_set_drvdata(pdev, netdev);
 
-	dev->irq = platform_get_irq(pdev, 0);
-	if (dev->irq < 0) {
-		err = dev->irq;
+	netdev->irq = platform_get_irq(pdev, 0);
+	if (netdev->irq < 0) {
+		err = netdev->irq;
 		goto err_out_free_netdev;
 	}
 
 	/* MTU range: 68 - 1518 or 10240 */
-	dev->min_mtu = GEM_MTU_MIN_SIZE;
+	netdev->min_mtu = GEM_MTU_MIN_SIZE;
 	if ((bp->caps & MACB_CAPS_JUMBO) && bp->jumbo_max_len)
-		dev->max_mtu = MIN(bp->jumbo_max_len, RX_BUFFER_MAX) -
+		netdev->max_mtu = MIN(bp->jumbo_max_len, RX_BUFFER_MAX) -
 				ETH_HLEN - ETH_FCS_LEN;
 	else
-		dev->max_mtu = 1536 - ETH_HLEN - ETH_FCS_LEN;
+		netdev->max_mtu = 1536 - ETH_HLEN - ETH_FCS_LEN;
 
 	if (bp->caps & MACB_CAPS_BD_RD_PREFETCH) {
 		val = GEM_BFEXT(RXBD_RDBUFF, gem_readl(bp, DCFG10));
@@ -5888,7 +5890,7 @@ static int macb_probe(struct platform_device *pdev)
 	if (bp->caps & MACB_CAPS_NEEDS_RSTONUBR)
 		bp->rx_intr_mask |= MACB_BIT(RXUBR);
 
-	err = of_get_ethdev_address(np, bp->dev);
+	err = of_get_ethdev_address(np, bp->netdev);
 	if (err == -EPROBE_DEFER)
 		goto err_out_free_netdev;
 	else if (err)
@@ -5910,9 +5912,9 @@ static int macb_probe(struct platform_device *pdev)
 	if (err)
 		goto err_out_phy_exit;
 
-	netif_carrier_off(dev);
+	netif_carrier_off(netdev);
 
-	err = register_netdev(dev);
+	err = register_netdev(netdev);
 	if (err) {
 		dev_err(&pdev->dev, "Cannot register net device, aborting.\n");
 		goto err_out_unregister_mdio;
@@ -5921,9 +5923,9 @@ static int macb_probe(struct platform_device *pdev)
 	INIT_WORK(&bp->hresp_err_bh_work, macb_hresp_error_task);
 	INIT_DELAYED_WORK(&bp->tx_lpi_work, macb_tx_lpi_work_fn);
 
-	netdev_info(dev, "Cadence %s rev 0x%08x at 0x%08lx irq %d (%pM)\n",
+	netdev_info(netdev, "Cadence %s rev 0x%08x at 0x%08lx irq %d (%pM)\n",
 		    macb_is_gem(bp) ? "GEM" : "MACB", macb_readl(bp, MID),
-		    dev->base_addr, dev->irq, dev->dev_addr);
+		    netdev->base_addr, netdev->irq, netdev->dev_addr);
 
 	pm_runtime_put_autosuspend(&bp->pdev->dev);
 
@@ -5937,7 +5939,7 @@ static int macb_probe(struct platform_device *pdev)
 	phy_exit(bp->phy);
 
 err_out_free_netdev:
-	free_netdev(dev);
+	free_netdev(netdev);
 
 err_disable_clocks:
 	macb_clks_disable(pclk, hclk, tx_clk, rx_clk, tsu_clk);
@@ -5950,14 +5952,14 @@ static int macb_probe(struct platform_device *pdev)
 
 static void macb_remove(struct platform_device *pdev)
 {
-	struct net_device *dev;
+	struct net_device *netdev;
 	struct macb *bp;
 
-	dev = platform_get_drvdata(pdev);
+	netdev = platform_get_drvdata(pdev);
 
-	if (dev) {
-		bp = netdev_priv(dev);
-		unregister_netdev(dev);
+	if (netdev) {
+		bp = netdev_priv(netdev);
+		unregister_netdev(netdev);
 		phy_exit(bp->phy);
 		mdiobus_unregister(bp->mii_bus);
 		mdiobus_free(bp->mii_bus);
@@ -5969,7 +5971,7 @@ static void macb_remove(struct platform_device *pdev)
 		pm_runtime_dont_use_autosuspend(&pdev->dev);
 		pm_runtime_set_suspended(&pdev->dev);
 		phylink_destroy(bp->phylink);
-		free_netdev(dev);
+		free_netdev(netdev);
 	}
 }
 
@@ -5984,7 +5986,7 @@ static int __maybe_unused macb_suspend(struct device *dev)
 	u32 tmp, ifa_local;
 	unsigned int q;
 
-	if (!device_may_wakeup(&bp->dev->dev))
+	if (!device_may_wakeup(&bp->netdev->dev))
 		phy_exit(bp->phy);
 
 	if (!netif_running(netdev))
@@ -5994,7 +5996,7 @@ static int __maybe_unused macb_suspend(struct device *dev)
 		if (bp->wolopts & WAKE_ARP) {
 			/* Check for IP address in WOL ARP mode */
 			rcu_read_lock();
-			idev = __in_dev_get_rcu(bp->dev);
+			idev = __in_dev_get_rcu(bp->netdev);
 			if (idev)
 				ifa = rcu_dereference(idev->ifa_list);
 			if (!ifa) {
@@ -6096,7 +6098,7 @@ static int __maybe_unused macb_resume(struct device *dev)
 	unsigned long flags;
 	unsigned int q;
 
-	if (!device_may_wakeup(&bp->dev->dev))
+	if (!device_may_wakeup(&bp->netdev->dev))
 		phy_init(bp->phy);
 
 	if (!netif_running(netdev))
diff --git a/drivers/net/ethernet/cadence/macb_pci.c b/drivers/net/ethernet/cadence/macb_pci.c
index b79dec17e6b0..ac009007118f 100644
--- a/drivers/net/ethernet/cadence/macb_pci.c
+++ b/drivers/net/ethernet/cadence/macb_pci.c
@@ -24,48 +24,48 @@
 #define GEM_PCLK_RATE 50000000
 #define GEM_HCLK_RATE 50000000
 
-static int macb_probe(struct pci_dev *pdev, const struct pci_device_id *id)
+static int macb_probe(struct pci_dev *pci, const struct pci_device_id *id)
 {
 	int err;
-	struct platform_device *plat_dev;
+	struct platform_device *pdev;
 	struct platform_device_info plat_info;
 	struct macb_platform_data plat_data;
 	struct resource res[2];
 
 	/* enable pci device */
-	err = pcim_enable_device(pdev);
+	err = pcim_enable_device(pci);
 	if (err < 0) {
-		dev_err(&pdev->dev, "Enabling PCI device has failed: %d", err);
+		dev_err(&pci->dev, "Enabling PCI device has failed: %d", err);
 		return err;
 	}
 
-	pci_set_master(pdev);
+	pci_set_master(pci);
 
 	/* set up resources */
 	memset(res, 0x00, sizeof(struct resource) * ARRAY_SIZE(res));
-	res[0].start = pci_resource_start(pdev, 0);
-	res[0].end = pci_resource_end(pdev, 0);
+	res[0].start = pci_resource_start(pci, 0);
+	res[0].end = pci_resource_end(pci, 0);
 	res[0].name = PCI_DRIVER_NAME;
 	res[0].flags = IORESOURCE_MEM;
-	res[1].start = pci_irq_vector(pdev, 0);
+	res[1].start = pci_irq_vector(pci, 0);
 	res[1].name = PCI_DRIVER_NAME;
 	res[1].flags = IORESOURCE_IRQ;
 
-	dev_info(&pdev->dev, "EMAC physical base addr: %pa\n",
+	dev_info(&pci->dev, "EMAC physical base addr: %pa\n",
 		 &res[0].start);
 
 	/* set up macb platform data */
 	memset(&plat_data, 0, sizeof(plat_data));
 
 	/* initialize clocks */
-	plat_data.pclk = clk_register_fixed_rate(&pdev->dev, "pclk", NULL, 0,
+	plat_data.pclk = clk_register_fixed_rate(&pci->dev, "pclk", NULL, 0,
 						 GEM_PCLK_RATE);
 	if (IS_ERR(plat_data.pclk)) {
 		err = PTR_ERR(plat_data.pclk);
 		goto err_pclk_register;
 	}
 
-	plat_data.hclk = clk_register_fixed_rate(&pdev->dev, "hclk", NULL, 0,
+	plat_data.hclk = clk_register_fixed_rate(&pci->dev, "hclk", NULL, 0,
 						 GEM_HCLK_RATE);
 	if (IS_ERR(plat_data.hclk)) {
 		err = PTR_ERR(plat_data.hclk);
@@ -74,24 +74,24 @@ static int macb_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 
 	/* set up platform device info */
 	memset(&plat_info, 0, sizeof(plat_info));
-	plat_info.parent = &pdev->dev;
-	plat_info.fwnode = pdev->dev.fwnode;
+	plat_info.parent = &pci->dev;
+	plat_info.fwnode = pci->dev.fwnode;
 	plat_info.name = PLAT_DRIVER_NAME;
-	plat_info.id = pdev->devfn;
+	plat_info.id = pci->devfn;
 	plat_info.res = res;
 	plat_info.num_res = ARRAY_SIZE(res);
 	plat_info.data = &plat_data;
 	plat_info.size_data = sizeof(plat_data);
-	plat_info.dma_mask = pdev->dma_mask;
+	plat_info.dma_mask = pci->dma_mask;
 
 	/* register platform device */
-	plat_dev = platform_device_register_full(&plat_info);
-	if (IS_ERR(plat_dev)) {
-		err = PTR_ERR(plat_dev);
+	pdev = platform_device_register_full(&plat_info);
+	if (IS_ERR(pdev)) {
+		err = PTR_ERR(pdev);
 		goto err_plat_dev_register;
 	}
 
-	pci_set_drvdata(pdev, plat_dev);
+	pci_set_drvdata(pci, pdev);
 
 	return 0;
 
@@ -105,14 +105,14 @@ static int macb_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 	return err;
 }
 
-static void macb_remove(struct pci_dev *pdev)
+static void macb_remove(struct pci_dev *pci)
 {
-	struct platform_device *plat_dev = pci_get_drvdata(pdev);
-	struct macb_platform_data *plat_data = dev_get_platdata(&plat_dev->dev);
+	struct platform_device *pdev = pci_get_drvdata(pci);
+	struct macb_platform_data *plat_data = dev_get_platdata(&pdev->dev);
 	struct clk *pclk = plat_data->pclk;
 	struct clk *hclk = plat_data->hclk;
 
-	platform_device_unregister(plat_dev);
+	platform_device_unregister(pdev);
 	clk_unregister_fixed_rate(pclk);
 	clk_unregister_fixed_rate(hclk);
 }
diff --git a/drivers/net/ethernet/cadence/macb_ptp.c b/drivers/net/ethernet/cadence/macb_ptp.c
index d91f7b1aa39c..e5195d7dac1d 100644
--- a/drivers/net/ethernet/cadence/macb_ptp.c
+++ b/drivers/net/ethernet/cadence/macb_ptp.c
@@ -324,9 +324,9 @@ void gem_ptp_txstamp(struct macb *bp, struct sk_buff *skb,
 	skb_tstamp_tx(skb, &shhwtstamps);
 }
 
-void gem_ptp_init(struct net_device *dev)
+void gem_ptp_init(struct net_device *netdev)
 {
-	struct macb *bp = netdev_priv(dev);
+	struct macb *bp = netdev_priv(netdev);
 
 	bp->ptp_clock_info = gem_ptp_caps_template;
 
@@ -334,7 +334,7 @@ void gem_ptp_init(struct net_device *dev)
 	bp->tsu_rate = bp->ptp_info->get_tsu_rate(bp);
 	bp->ptp_clock_info.max_adj = bp->ptp_info->get_ptp_max_adj();
 	gem_ptp_init_timer(bp);
-	bp->ptp_clock = ptp_clock_register(&bp->ptp_clock_info, &dev->dev);
+	bp->ptp_clock = ptp_clock_register(&bp->ptp_clock_info, &netdev->dev);
 	if (IS_ERR(bp->ptp_clock)) {
 		pr_err("ptp clock register failed: %ld\n",
 			PTR_ERR(bp->ptp_clock));
@@ -353,9 +353,9 @@ void gem_ptp_init(struct net_device *dev)
 		 GEM_PTP_TIMER_NAME);
 }
 
-void gem_ptp_remove(struct net_device *ndev)
+void gem_ptp_remove(struct net_device *netdev)
 {
-	struct macb *bp = netdev_priv(ndev);
+	struct macb *bp = netdev_priv(netdev);
 
 	if (bp->ptp_clock) {
 		ptp_clock_unregister(bp->ptp_clock);
@@ -378,10 +378,10 @@ static int gem_ptp_set_ts_mode(struct macb *bp,
 	return 0;
 }
 
-int gem_get_hwtst(struct net_device *dev,
+int gem_get_hwtst(struct net_device *netdev,
 		  struct kernel_hwtstamp_config *tstamp_config)
 {
-	struct macb *bp = netdev_priv(dev);
+	struct macb *bp = netdev_priv(netdev);
 
 	*tstamp_config = bp->tstamp_config;
 	if (!macb_dma_ptp(bp))
@@ -402,13 +402,13 @@ static void gem_ptp_set_one_step_sync(struct macb *bp, u8 enable)
 		macb_writel(bp, NCR, reg_val & ~MACB_BIT(OSSMODE));
 }
 
-int gem_set_hwtst(struct net_device *dev,
+int gem_set_hwtst(struct net_device *netdev,
 		  struct kernel_hwtstamp_config *tstamp_config,
 		  struct netlink_ext_ack *extack)
 {
 	enum macb_bd_control tx_bd_control = TSTAMP_DISABLED;
 	enum macb_bd_control rx_bd_control = TSTAMP_DISABLED;
-	struct macb *bp = netdev_priv(dev);
+	struct macb *bp = netdev_priv(netdev);
 	u32 regval;
 
 	if (!macb_dma_ptp(bp))

-- 
2.53.0


^ permalink raw reply related

* [PATCH net-next v2 00/14] net: macb: implement context swapping
From: Théo Lebrun @ 2026-04-10 19:51 UTC (permalink / raw)
  To: Nicolas Ferre, Claudiu Beznea, Andrew Lunn, David S. Miller,
	Eric Dumazet, Jakub Kicinski, Paolo Abeni, Richard Cochran,
	Russell King
  Cc: Paolo Valerio, Conor Dooley, Nicolai Buchwitz,
	Vladimir Kondratiev, Gregory CLEMENT, Benoît Monin,
	Tawfik Bayouk, Thomas Petazzoni, Maxime Chevallier, netdev,
	linux-kernel, Théo Lebrun

MACB has a pretty primitive approach to buffer management. They are all
stored in `struct macb *bp`. On operations that require buffer realloc
(set_ringparam & change_mtu ATM), the only option is to close the
interface, change our global state and re-open the interface.

Two issues:
- It doesn't fly on memory pressured systems; we free our precious
  buffers and don't manage to reallocate fully, meaning our machine
  just lost its network access.
- Anecdotally, it is pretty slow because it implies a full PHY reinit.

Instead, we shall:
 - allocate a new context (including buffers) first
 - if it fails, early return without any impact to the interface
 - stop interface
 - update global state (bp, netdev, etc)
 - pass newly allocated buffer pointers to the hardware
 - start interface
 - free old context

This is what we implement here. Both .set_ringparam() and
.ndo_change_mtu() are covered by this series. In the future,
at least .set_channels() [0], XDP [1] and XSK [2] would benefit.

The change is super intrusive so conflicts will be major. Sorry!

Thanks,
Have a nice day,
Théo

[0]: https://lore.kernel.org/netdev/20260317-macb-set-channels-v4-0-1bd4f4ffcfca@bootlin.com/
[1]: https://lore.kernel.org/netdev/20260323221047.2749577-1-pvalerio@redhat.com/
[2]: https://lore.kernel.org/netdev/20260304-macb-xsk-v1-0-ba2ebe2bdaa3@bootlin.com/

Signed-off-by: Théo Lebrun <theo.lebrun@bootlin.com>
---
Changes in v2:
- Patch "add subset of `struct macb` to `struct macb_context`" was
  messed up. It contained much more than what the name implied. Split
  into three commits (I caused trouble by rebase reordering).
- Fix tieoff; V1 allocated it without initialisation.
- Fix NULL pointer dereference on context in mab_get_regs() and
  macb_get_ringparam() when interface is offline.
- Patch "unify device pointer naming convention":
  - Fix build issue when CONFIG_NETCONSOLE=y.
  - Rename `struct net_device *dev` to `netdev` in macb.h.
  - Rename `struct phy_device *phy` to `phydev` in macb_main.c.
- On swap, call netdev_tx_reset_queue() to reset all DQL counters.
- At end of swap, add missing kfree(old_ctx).
- During HW disabling in swap, grab bp->lock to protect against IRQ
  handler.
- On swap, cancel the three BH features MACB has:
  bp->hresp_err_bh_work, bp->tx_lpi_work and queue->tx_error_task.
- On swap, call macb_configure_dma() which writes buffer size to
  hardware registers. This is important because the change_mtu codepath
  changes the buffer size.
- Rebase onto latest net-next/main (58dd34dbd5b0) & resolve conflicts.
- Link to v1: https://patch.msgid.link/20260401-macb-context-v1-0-9590c5ab7272@bootlin.com

---
Théo Lebrun (14):
      net: macb: unify device pointer naming convention
      net: macb: unify `struct macb *` naming convention
      net: macb: unify queue index variable naming convention and types
      net: macb: enforce reverse christmas tree (RCT) convention
      net: macb: allocate tieoff descriptor once across device lifetime
      net: macb: introduce macb_context struct for buffer management
      net: macb: avoid macb_init_rx_buffer_size() modifying state
      net: macb: make `struct macb` subset reachable from macb_context struct
      net: macb: change caps helpers signatures
      net: macb: change function signatures to take contexts
      net: macb: introduce macb_context_alloc() helper
      net: macb: re-read ISR inside IRQ handler locked section
      net: macb: use context swapping in .set_ringparam()
      net: macb: use context swapping in .ndo_change_mtu()

 drivers/net/ethernet/cadence/macb.h      |  125 ++-
 drivers/net/ethernet/cadence/macb_main.c | 1767 +++++++++++++++++-------------
 drivers/net/ethernet/cadence/macb_pci.c  |   46 +-
 drivers/net/ethernet/cadence/macb_ptp.c  |   26 +-
 4 files changed, 1126 insertions(+), 838 deletions(-)
---
base-commit: 6b6916526425235d5875df21dfa6f31fdc098599
change-id: 20260401-macb-context-bd0caf20414d

Best regards,
--  
Théo Lebrun <theo.lebrun@bootlin.com>


^ permalink raw reply

* Re: [PATCH net-next 2/3] netdevsim: psp: handle the new crypt-offset and spi-threshold get/set operations
From: Akhilesh Samineni @ 2026-04-10 19:48 UTC (permalink / raw)
  To: Willem de Bruijn
  Cc: davem, edumazet, kuba, pabeni, andrew+netdev, horms, willemb,
	daniel.zahka, netdev, linux-kernel, jayakrishnan.udayavarma,
	ajit.khaparde, kiran.kella, sachin.suman
In-Reply-To: <willemdebruijn.kernel.327df0cb46f23@gmail.com>

[-- Attachment #1: Type: text/plain, Size: 2096 bytes --]

On Wed, Apr 8, 2026 at 3:19 AM Willem de Bruijn
<willemdebruijn.kernel@gmail.com> wrote:
>
> Akhilesh Samineni wrote:
> > Implement the crypt-offset and spi-threshold get/set in netdevsim PSP.
> >
> > Signed-off-by: Akhilesh Samineni <akhilesh.samineni@broadcom.com>
> > Reviewed-by: Kiran Kella <kiran.kella@broadcom.com>
> > Reviewed-by: Ajit Kumar Khaparde <ajit.khaparde@broadcom.com>
> > ---
> >  drivers/net/netdevsim/netdevsim.h | 2 ++
> >  drivers/net/netdevsim/psp.c       | 6 ++++++
> >  2 files changed, 8 insertions(+)
> >
> > diff --git a/drivers/net/netdevsim/netdevsim.h b/drivers/net/netdevsim/netdevsim.h
> > index c904e14f6b3f..3ad7d42391c0 100644
> > --- a/drivers/net/netdevsim/netdevsim.h
> > +++ b/drivers/net/netdevsim/netdevsim.h
> > @@ -117,6 +117,8 @@ struct netdevsim {
> >               struct psp_dev *dev;
> >               u32 spi;
> >               u32 assoc_cnt;
> > +             u8  crypt_offset;
> > +             u32 spi_threshold;
> >       } psp;
> >
> >       struct nsim_bus_dev *nsim_bus_dev;
> > diff --git a/drivers/net/netdevsim/psp.c b/drivers/net/netdevsim/psp.c
> > index 0b4d717253b0..9098edf00c5c 100644
> > --- a/drivers/net/netdevsim/psp.c
> > +++ b/drivers/net/netdevsim/psp.c
> > @@ -122,6 +122,11 @@ static int
> >  nsim_psp_set_config(struct psp_dev *psd, struct psp_dev_config *conf,
> >                   struct netlink_ext_ack *extack)
> >  {
> > +     struct netdevsim *ns = psd->drv_priv;
> > +
> > +     ns->psp.crypt_offset = conf->crypt_offset;
> > +     ns->psp.spi_threshold = conf->spi_threshold;
> > +
> >       return 0;
> >  }
> >
> > @@ -249,6 +254,7 @@ int nsim_psp_init(struct netdevsim *ns)
> >       if (err)
> >               return err;
> >
> > +     ns->psp.spi_threshold = PSP_SPI_THRESHOLD_DEFAULT;
> >       debugfs_create_file("psp_rereg", 0200, ddir, ns, &nsim_psp_rereg_fops);
> >       return 0;
>
> Default initialization should probably all complete before the device
> is made visible with psp_dev_create.

Yes. I will update it in the next v2 patch.

[-- Attachment #2: S/MIME Cryptographic Signature --]
[-- Type: application/pkcs7-signature, Size: 4211 bytes --]

^ permalink raw reply

* Re: [PATCH net-next 2/3] netdevsim: psp: handle the new crypt-offset and spi-threshold get/set operations
From: Akhilesh Samineni @ 2026-04-10 19:45 UTC (permalink / raw)
  To: Willem de Bruijn
  Cc: davem, edumazet, kuba, pabeni, andrew+netdev, horms, willemb,
	daniel.zahka, netdev, linux-kernel, jayakrishnan.udayavarma,
	ajit.khaparde, kiran.kella, sachin.suman
In-Reply-To: <willemdebruijn.kernel.2484afecaca4d@gmail.com>

[-- Attachment #1: Type: text/plain, Size: 2059 bytes --]

On Wed, Apr 8, 2026 at 3:13 AM Willem de Bruijn
<willemdebruijn.kernel@gmail.com> wrote:
>
> Akhilesh Samineni wrote:
> > Implement the crypt-offset and spi-threshold get/set in netdevsim PSP.
> >
> > Signed-off-by: Akhilesh Samineni <akhilesh.samineni@broadcom.com>
> > Reviewed-by: Kiran Kella <kiran.kella@broadcom.com>
> > Reviewed-by: Ajit Kumar Khaparde <ajit.khaparde@broadcom.com>
> > ---
> >  drivers/net/netdevsim/netdevsim.h | 2 ++
> >  drivers/net/netdevsim/psp.c       | 6 ++++++
> >  2 files changed, 8 insertions(+)
> >
> > diff --git a/drivers/net/netdevsim/netdevsim.h b/drivers/net/netdevsim/netdevsim.h
> > index c904e14f6b3f..3ad7d42391c0 100644
> > --- a/drivers/net/netdevsim/netdevsim.h
> > +++ b/drivers/net/netdevsim/netdevsim.h
> > @@ -117,6 +117,8 @@ struct netdevsim {
> >               struct psp_dev *dev;
> >               u32 spi;
> >               u32 assoc_cnt;
> > +             u8  crypt_offset;
>
> Minor: variable names are already not aligned. No need for two spaces.
>

Ack
> > +             u32 spi_threshold;
> >       } psp;
> >
> >       struct nsim_bus_dev *nsim_bus_dev;
> > diff --git a/drivers/net/netdevsim/psp.c b/drivers/net/netdevsim/psp.c
> > index 0b4d717253b0..9098edf00c5c 100644
> > --- a/drivers/net/netdevsim/psp.c
> > +++ b/drivers/net/netdevsim/psp.c
> > @@ -122,6 +122,11 @@ static int
> >  nsim_psp_set_config(struct psp_dev *psd, struct psp_dev_config *conf,
> >                   struct netlink_ext_ack *extack)
> >  {
> > +     struct netdevsim *ns = psd->drv_priv;
> > +
> > +     ns->psp.crypt_offset = conf->crypt_offset;
> > +     ns->psp.spi_threshold = conf->spi_threshold;
> > +
> >       return 0;
> >  }
> >
> > @@ -249,6 +254,7 @@ int nsim_psp_init(struct netdevsim *ns)
> >       if (err)
> >               return err;
> >
> > +     ns->psp.spi_threshold = PSP_SPI_THRESHOLD_DEFAULT;
> >       debugfs_create_file("psp_rereg", 0200, ddir, ns, &nsim_psp_rereg_fops);
> >       return 0;
> >  }
> > --
> > 2.45.4
> >
>
>

[-- Attachment #2: S/MIME Cryptographic Signature --]
[-- Type: application/pkcs7-signature, Size: 4211 bytes --]

^ permalink raw reply

* Re: [PATCH net-next 1/3] psp: add crypt-offset and spi-threshold get/set attributes
From: Akhilesh Samineni @ 2026-04-10 19:36 UTC (permalink / raw)
  To: Jakub Kicinski
  Cc: Willem de Bruijn, davem, edumazet, pabeni, andrew+netdev, horms,
	willemb, daniel.zahka, netdev, linux-kernel,
	jayakrishnan.udayavarma, ajit.khaparde, kiran.kella, sachin.suman
In-Reply-To: <20260407180432.102073cf@kernel.org>

[-- Attachment #1: Type: text/plain, Size: 596 bytes --]

On Wed, Apr 8, 2026 at 6:34 AM Jakub Kicinski <kuba@kernel.org> wrote:
>
> On Tue, 07 Apr 2026 17:37:41 -0400 Willem de Bruijn wrote:
> > > +   if (info->attrs[PSP_A_DEV_CRYPT_OFFSET])
> > > +           new_config.crypt_offset =
> > > +                   nla_get_u8(info->attrs[PSP_A_DEV_CRYPT_OFFSET]);
> >
> > PSP defines a 6-bit field in 4 octet units. Does this need bounds checking?
>
> More fundamentally, were we to support this -- is it a device property
> or an assoc property?

It's a device property. All associations under the device will share
the same crypt-offset.

[-- Attachment #2: S/MIME Cryptographic Signature --]
[-- Type: application/pkcs7-signature, Size: 4211 bytes --]

^ permalink raw reply

* Re: [PATCH net-next 1/3] psp: add crypt-offset and spi-threshold get/set attributes
From: Akhilesh Samineni @ 2026-04-10 19:34 UTC (permalink / raw)
  To: Willem de Bruijn
  Cc: davem, edumazet, kuba, pabeni, andrew+netdev, horms, willemb,
	daniel.zahka, netdev, linux-kernel, jayakrishnan.udayavarma,
	ajit.khaparde, kiran.kella, sachin.suman
In-Reply-To: <willemdebruijn.kernel.1d7f9f774aa55@gmail.com>

[-- Attachment #1: Type: text/plain, Size: 10028 bytes --]

On Wed, Apr 8, 2026 at 3:07 AM Willem de Bruijn
<willemdebruijn.kernel@gmail.com> wrote:
>
> Akhilesh Samineni wrote:
> > crypt-offset (Crypt Offset)
> > ----------------------------------
> > The crypt-offset attribute specifies the byte offset within a packet
> > from which encryption begins. This is a per-device attribute that
> > allows a portion of the packet header to remain in plaintext while
> > the rest of the payload is encrypted. This is useful in scenarios
> > where intermediate nodes need to inspect or process a fixed-size
> > header before the encrypted payload.
> >
> > The default value is 0, meaning encryption starts from the beginning
> > of the payload following the PSP header.
> >
> > spi-threshold (SPI Threshold)
> > ------------------------------
> > The SPI (Security Parameter Index) is a 32-bit per-device identifier
> > used to distinguish security associations. As SPI values are allocated
> > monotonically, a threshold is needed to trigger timely SPI rotation
> > before the space is exhausted.
> >
> > The spi-threshold attribute allows userspace to configure the value at
> > which an SPI rotation should be initiated. The default is set to
> > PSP_SPI_THRESHOLD_DEFAULT (~90% of 0x7FFFFFFF), providing a comfortable
> > margin to perform rotation without racing to exhaustion.
> >
> > NOTE: A follow-up series will add notification support to alert
> > subscribed users when the configured spi-threshold is reached, enabling
> > timely SPI rotation.
> >
> > Signed-off-by: Akhilesh Samineni <akhilesh.samineni@broadcom.com>
> > Reviewed-by: Kiran Kella <kiran.kella@broadcom.com>
> > Reviewed-by: Ajit Kumar Khaparde <ajit.khaparde@broadcom.com>
> > ---
> >  Documentation/netlink/specs/psp.yaml | 13 +++++++++++++
> >  include/net/psp/types.h              |  7 +++++++
> >  include/uapi/linux/psp.h             |  2 ++
> >  net/psp/psp-nl-gen.c                 |  6 ++++--
> >  net/psp/psp_main.c                   |  3 +++
> >  net/psp/psp_nl.c                     | 27 +++++++++++++++++++++++----
> >  6 files changed, 52 insertions(+), 6 deletions(-)
> >
> > diff --git a/Documentation/netlink/specs/psp.yaml b/Documentation/netlink/specs/psp.yaml
> > index f3a57782d2cf..b22869be91cf 100644
> > --- a/Documentation/netlink/specs/psp.yaml
> > +++ b/Documentation/netlink/specs/psp.yaml
> > @@ -38,6 +38,15 @@ attribute-sets:
> >          type: u32
> >          enum: version
> >          enum-as-flags: true
> > +      -
> > +        name: crypt-offset
> > +        doc: The offset from the end of the PSP header to the start of the encrypted payload.
>
> In 4 octet units?
>

Yes. crypt-offset is in 4 octet units only. I will update the
description accordingly in the next v2 patch.

> > +        type: u8
> > +      -
> > +        name: spi-threshold
> > +        doc: Threshold for the SPI to trigger notification to the user for appropriate rotate action.
> > +        type: u32
> > +
> >    -
> >      name: assoc
> >      attributes:
> > @@ -170,6 +179,8 @@ operations:
> >              - ifindex
> >              - psp-versions-cap
> >              - psp-versions-ena
> > +            - crypt-offset
> > +            - spi-threshold
> >          pre: psp-device-get-locked
> >          post: psp-device-unlock
> >        dump:
> > @@ -193,6 +204,8 @@ operations:
> >            attributes:
> >              - id
> >              - psp-versions-ena
> > +            - crypt-offset
> > +            - spi-threshold
> >          reply:
> >            attributes: []
> >          pre: psp-device-get-locked
> > diff --git a/include/net/psp/types.h b/include/net/psp/types.h
> > index 25a9096d4e7d..875f7822557f 100644
> > --- a/include/net/psp/types.h
> > +++ b/include/net/psp/types.h
> > @@ -25,6 +25,9 @@ struct psphdr {
> >  #define PSP_SPI_KEY_ID               GENMASK(30, 0)
> >  #define PSP_SPI_KEY_PHASE    BIT(31)
> >
> > +/* Default SPI threshold: ~90% of max SPI (0x7FFFFFFF) to allow rotation before exhaustion */
> > +#define PSP_SPI_THRESHOLD_DEFAULT    0x73333333
>
> Do you want to choose a more round number, in either hex or dec?
>

I think we can use 0x70000000; it's approximately 87.5% of the maximum SPI.

> > +
> >  #define PSPHDR_CRYPT_OFFSET  GENMASK(5, 0)
> >
> >  #define PSPHDR_VERFL_SAMPLE  BIT(7)
> > @@ -38,9 +41,13 @@ struct psphdr {
> >  /**
> >   * struct psp_dev_config - PSP device configuration
> >   * @versions: PSP versions enabled on the device
> > + * @crypt_offset: crypto offset configured on the device
> > + * @spi_threshold: SPI threshold value on the device
> >   */
> >  struct psp_dev_config {
> >       u32 versions;
> > +     u8 crypt_offset;
> > +     u32 spi_threshold;
> >  };
> >
> >  /**
> > diff --git a/include/uapi/linux/psp.h b/include/uapi/linux/psp.h
> > index a3a336488dc3..bb390159dc72 100644
> > --- a/include/uapi/linux/psp.h
> > +++ b/include/uapi/linux/psp.h
> > @@ -22,6 +22,8 @@ enum {
> >       PSP_A_DEV_IFINDEX,
> >       PSP_A_DEV_PSP_VERSIONS_CAP,
> >       PSP_A_DEV_PSP_VERSIONS_ENA,
> > +     PSP_A_DEV_CRYPT_OFFSET,
> > +     PSP_A_DEV_SPI_THRESHOLD,
> >
> >       __PSP_A_DEV_MAX,
> >       PSP_A_DEV_MAX = (__PSP_A_DEV_MAX - 1)
> > diff --git a/net/psp/psp-nl-gen.c b/net/psp/psp-nl-gen.c
> > index 22a48d0fa378..e50b8b80955c 100644
> > --- a/net/psp/psp-nl-gen.c
> > +++ b/net/psp/psp-nl-gen.c
> > @@ -23,9 +23,11 @@ static const struct nla_policy psp_dev_get_nl_policy[PSP_A_DEV_ID + 1] = {
> >  };
> >
> >  /* PSP_CMD_DEV_SET - do */
> > -static const struct nla_policy psp_dev_set_nl_policy[PSP_A_DEV_PSP_VERSIONS_ENA + 1] = {
> > +static const struct nla_policy psp_dev_set_nl_policy[PSP_A_DEV_SPI_THRESHOLD + 1] = {
> >       [PSP_A_DEV_ID] = NLA_POLICY_MIN(NLA_U32, 1),
> >       [PSP_A_DEV_PSP_VERSIONS_ENA] = NLA_POLICY_MASK(NLA_U32, 0xf),
> > +     [PSP_A_DEV_CRYPT_OFFSET] = { .type = NLA_U8, },
> > +     [PSP_A_DEV_SPI_THRESHOLD] = { .type = NLA_U32, },
> >  };
> >
> >  /* PSP_CMD_KEY_ROTATE - do */
> > @@ -75,7 +77,7 @@ static const struct genl_split_ops psp_nl_ops[] = {
> >               .doit           = psp_nl_dev_set_doit,
> >               .post_doit      = psp_device_unlock,
> >               .policy         = psp_dev_set_nl_policy,
> > -             .maxattr        = PSP_A_DEV_PSP_VERSIONS_ENA,
> > +             .maxattr        = PSP_A_DEV_SPI_THRESHOLD,
> >               .flags          = GENL_CMD_CAP_DO,
> >       },
> >       {
> > diff --git a/net/psp/psp_main.c b/net/psp/psp_main.c
> > index 9508b6c38003..536ee44db09d 100644
> > --- a/net/psp/psp_main.c
> > +++ b/net/psp/psp_main.c
> > @@ -79,6 +79,9 @@ psp_dev_create(struct net_device *netdev,
> >       INIT_LIST_HEAD(&psd->stale_assocs);
> >       refcount_set(&psd->refcnt, 1);
> >
> > +     /* ~90% of 0x7FFFFFFF; allows SPI rotation well before space is exhausted */
>
> Repeat comment. Not needed here.
>

Ack

> > +     psd->config.spi_threshold = PSP_SPI_THRESHOLD_DEFAULT;
> > +
> >       mutex_lock(&psp_devs_lock);
> >       err = xa_alloc_cyclic(&psp_devs, &psd->id, psd, xa_limit_16b,
> >                             &last_id, GFP_KERNEL);
> > diff --git a/net/psp/psp_nl.c b/net/psp/psp_nl.c
> > index 6afd7707ec12..fbb77460a24b 100644
> > --- a/net/psp/psp_nl.c
> > +++ b/net/psp/psp_nl.c
> > @@ -101,7 +101,9 @@ psp_nl_dev_fill(struct psp_dev *psd, struct sk_buff *rsp,
> >       if (nla_put_u32(rsp, PSP_A_DEV_ID, psd->id) ||
> >           nla_put_u32(rsp, PSP_A_DEV_IFINDEX, psd->main_netdev->ifindex) ||
> >           nla_put_u32(rsp, PSP_A_DEV_PSP_VERSIONS_CAP, psd->caps->versions) ||
> > -         nla_put_u32(rsp, PSP_A_DEV_PSP_VERSIONS_ENA, psd->config.versions))
> > +         nla_put_u32(rsp, PSP_A_DEV_PSP_VERSIONS_ENA, psd->config.versions) ||
> > +         nla_put_u8(rsp, PSP_A_DEV_CRYPT_OFFSET, psd->config.crypt_offset) ||
> > +         nla_put_u32(rsp, PSP_A_DEV_SPI_THRESHOLD, psd->config.spi_threshold))
> >               goto err_cancel_msg;
> >
> >       genlmsg_end(rsp, hdr);
> > @@ -193,6 +195,13 @@ int psp_nl_dev_set_doit(struct sk_buff *skb, struct genl_info *info)
> >
> >       memcpy(&new_config, &psd->config, sizeof(new_config));
> >
> > +     if (!info->attrs[PSP_A_DEV_PSP_VERSIONS_ENA] &&
> > +         !info->attrs[PSP_A_DEV_CRYPT_OFFSET] &&
> > +         !info->attrs[PSP_A_DEV_SPI_THRESHOLD]) {
> > +             NL_SET_ERR_MSG(info->extack, "No settings present");
> > +             return -EINVAL;
> > +     }
> > +
> >       if (info->attrs[PSP_A_DEV_PSP_VERSIONS_ENA]) {
> >               new_config.versions =
> >                       nla_get_u32(info->attrs[PSP_A_DEV_PSP_VERSIONS_ENA]);
> > @@ -200,9 +209,19 @@ int psp_nl_dev_set_doit(struct sk_buff *skb, struct genl_info *info)
> >                       NL_SET_ERR_MSG(info->extack, "Requested PSP versions not supported by the device");
> >                       return -EINVAL;
> >               }
> > -     } else {
> > -             NL_SET_ERR_MSG(info->extack, "No settings present");
> > -             return -EINVAL;
> > +     }
> > +
> > +     if (info->attrs[PSP_A_DEV_CRYPT_OFFSET])
> > +             new_config.crypt_offset =
> > +                     nla_get_u8(info->attrs[PSP_A_DEV_CRYPT_OFFSET]);
>
> PSP defines a 6-bit field in 4 octet units. Does this need bounds checking?
>

 Yes, I will add the bound checks in the next v2 patch.
> > +
> > +     if (info->attrs[PSP_A_DEV_SPI_THRESHOLD]) {
> > +             new_config.spi_threshold =
> > +                     nla_get_u32(info->attrs[PSP_A_DEV_SPI_THRESHOLD]);
> > +             if (new_config.spi_threshold & PSP_SPI_KEY_PHASE) {
> > +                     NL_SET_ERR_MSG(info->extack, "SPI threshold must not have bit 31 set");
> > +                     return -EINVAL;
> > +             }
> >       }
> >
> >       rsp = psp_nl_reply_new(info);
> > --
> > 2.45.4
> >
>
>

[-- Attachment #2: S/MIME Cryptographic Signature --]
[-- Type: application/pkcs7-signature, Size: 4211 bytes --]

^ permalink raw reply

* [PATCH net 2/2] sctp: discard stale INIT after handshake completion
From: Xin Long @ 2026-04-10 18:59 UTC (permalink / raw)
  To: network dev, linux-sctp
  Cc: davem, kuba, Eric Dumazet, Paolo Abeni, Simon Horman,
	Marcelo Ricardo Leitner, Florian Westphal, Yi Chen
In-Reply-To: <cover.1775847557.git.lucien.xin@gmail.com>

After an association reaches ESTABLISHED, the peer’s init_tag is already
known from the handshake. Any subsequent INIT with the same init_tag is
not a valid restart, but a delayed or duplicate INIT.

Drop such INIT chunks in sctp_sf_do_unexpected_init() instead of
processing them as new association attempts.

Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")
Signed-off-by: Xin Long <lucien.xin@gmail.com>
---
 net/sctp/sm_statefuns.c | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c
index 7b823d759141..3bec026ecbc0 100644
--- a/net/sctp/sm_statefuns.c
+++ b/net/sctp/sm_statefuns.c
@@ -1556,6 +1556,12 @@ static enum sctp_disposition sctp_sf_do_unexpected_init(
 	/* Tag the variable length parameters.  */
 	chunk->param_hdr.v = skb_pull(chunk->skb, sizeof(struct sctp_inithdr));
 
+	if (asoc->state >= SCTP_STATE_ESTABLISHED) {
+		/* Discard INIT matching peer vtag after handshake completion (stale INIT). */
+		if (chunk->subh.init_hdr->init_tag == asoc->peer.i.init_tag)
+			return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands);
+	}
+
 	/* Verify the INIT chunk before processing it. */
 	err_chunk = NULL;
 	if (!sctp_verify_init(net, ep, asoc, chunk->chunk_hdr->type,
-- 
2.47.1


^ permalink raw reply related

* [PATCH net 1/2] netfilter: skip recording stale or retransmitted INIT
From: Xin Long @ 2026-04-10 18:59 UTC (permalink / raw)
  To: network dev, linux-sctp
  Cc: davem, kuba, Eric Dumazet, Paolo Abeni, Simon Horman,
	Marcelo Ricardo Leitner, Florian Westphal, Yi Chen
In-Reply-To: <cover.1775847557.git.lucien.xin@gmail.com>

An INIT whose init_tag matches the peer's vtag does not provide new state
information. It indicates either:

- a stale INIT (after INIT-ACK has already been seen on the same side), or
- a retransmitted INIT (after INIT has already been recorded on the same
  side).

In both cases, the INIT must not update ct->proto.sctp.init[] state, since
it does not advance the handshake tracking and may otherwise corrupt
INIT/INIT-ACK validation logic.

Allow INIT processing only when the conntrack entry is newly created
(SCTP_CONNTRACK_NONE), or when the init_tag differs from the stored peer
vtag.

Note it skips the check for the ct with old_state SCTP_CONNTRACK_NONE in
nf_conntrack_sctp_packet(), as it is just created in sctp_new() where it
set ct->proto.sctp.vtag[IP_CT_DIR_REPLY] = ih->init_tag.

Fixes: 9fb9cbb1082d ("[NETFILTER]: Add nf_conntrack subsystem.")
Signed-off-by: Xin Long <lucien.xin@gmail.com>
---
 net/netfilter/nf_conntrack_proto_sctp.c | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/net/netfilter/nf_conntrack_proto_sctp.c b/net/netfilter/nf_conntrack_proto_sctp.c
index 645d2c43ebf7..7e10fa65cbdd 100644
--- a/net/netfilter/nf_conntrack_proto_sctp.c
+++ b/net/netfilter/nf_conntrack_proto_sctp.c
@@ -466,9 +466,13 @@ int nf_conntrack_sctp_packet(struct nf_conn *ct,
 			if (!ih)
 				goto out_unlock;
 
-			if (ct->proto.sctp.init[dir] && ct->proto.sctp.init[!dir])
-				ct->proto.sctp.init[!dir] = 0;
-			ct->proto.sctp.init[dir] = 1;
+			/* Do not record INIT matching peer vtag (stale or retransmitted INIT). */
+			if (old_state == SCTP_CONNTRACK_NONE ||
+			    ct->proto.sctp.vtag[!dir] != ih->init_tag) {
+				if (ct->proto.sctp.init[dir] && ct->proto.sctp.init[!dir])
+					ct->proto.sctp.init[!dir] = 0;
+				ct->proto.sctp.init[dir] = 1;
+			}
 
 			pr_debug("Setting vtag %x for dir %d\n", ih->init_tag, !dir);
 			ct->proto.sctp.vtag[!dir] = ih->init_tag;
-- 
2.47.1


^ permalink raw reply related

* [PATCH net 0/2] sctp: fix a vtag verification failure caused by stale INITs
From: Xin Long @ 2026-04-10 18:59 UTC (permalink / raw)
  To: network dev, linux-sctp
  Cc: davem, kuba, Eric Dumazet, Paolo Abeni, Simon Horman,
	Marcelo Ricardo Leitner, Florian Westphal, Yi Chen

Similar to Scenario B in commit 8e56b063c865 ( netfilter: handle the
connecting collision properly in nf_conntrack_proto_sctp"):

Scenario B: INIT_ACK is delayed until the peer completes its own handshake

  192.168.1.2 > 192.168.1.1: sctp (1) [INIT] [init tag: 3922216408]
    192.168.1.1 > 192.168.1.2: sctp (1) [INIT] [init tag: 144230885]
    192.168.1.2 > 192.168.1.1: sctp (1) [INIT ACK] [init tag: 3922216408]
    192.168.1.1 > 192.168.1.2: sctp (1) [COOKIE ECHO]
    192.168.1.2 > 192.168.1.1: sctp (1) [COOKIE ACK]
  192.168.1.1 > 192.168.1.2: sctp (1) [INIT ACK] [init tag: 3914796021] *

There is another case:

Scenario F: INIT is delayed until the peer completes its own handshake

  192.168.1.2 > 192.168.1.1: sctp (1) [INIT] [init tag: 3922216408]
  (OVS upcall)
    192.168.1.1 > 192.168.1.2: sctp (1) [INIT] [init tag: 144230885]
    192.168.1.2 > 192.168.1.1: sctp (1) [INIT ACK] [init tag: 3922216408]
    192.168.1.1 > 192.168.1.2: sctp (1) [COOKIE ECHO]
    192.168.1.2 > 192.168.1.1: sctp (1) [COOKIE ACK]
  192.168.1.2 > 192.168.1.1: sctp (1) [INIT] [init tag: 3922216408]
  (delayed)
  192.168.1.1 > 192.168.1.2: sctp (1) [INIT ACK] [init tag: 3914796021] *

In this case, the delayed INIT (e.g. due to OVS upcall) is recorded by
conntrack, which prevents vtag verification from dropping the unexpected
INIT-ACK in nf_conntrack_sctp_packet():

  vtag = ct->proto.sctp.vtag[!dir];
  if (!ct->proto.sctp.init[!dir] && vtag && vtag != ih->init_tag)
          goto out_unlock;

This happens because ct->proto.sctp.init[!dir] is set by the delayed INIT,
even though it is stale.

Fix this in two parts:

- In netfilter: Do not record INITs whose init_tag matches the peer vtag,
  as they carry no new handshake state in the 1st patch.

- In SCTP: Prevent endpoints from responding to such INITs with INIT-ACK,
  ensuring correctness even when middleboxes lack the netfilter fix in
  the 2nd patch.

A follow-up selftest for this scenario will be posted in a separate patch
by Yi Chen.

Xin Long (2):
  netfilter: skip recording stale or retransmitted INIT
  sctp: discard stale INIT after handshake completion

 net/netfilter/nf_conntrack_proto_sctp.c | 10 +++++++---
 net/sctp/sm_statefuns.c                 |  6 ++++++
 2 files changed, 13 insertions(+), 3 deletions(-)

-- 
2.47.1


^ permalink raw reply

* Re: [PATCH iwl-net 10/10] ice: allow setting min_tx_rate to 0 to resolve VF bandwidth oversubscription
From: Tony Nguyen @ 2026-04-10 18:58 UTC (permalink / raw)
  To: Aleksandr Loktionov, intel-wired-lan; +Cc: netdev
In-Reply-To: <20260403054029.3789616-11-aleksandr.loktionov@intel.com>



On 4/2/2026 10:40 PM, Aleksandr Loktionov wrote:
> ice_set_vf_bw() refuses to accept any min_tx_rate value when the
> total guaranteed bandwidth is already oversubscribed, even when the
> requested value is 0. This makes it impossible to recover from an
> oversubscribed state via "ip link set <pf> vf <id> min_tx_rate 0".
> 
> Allow a zero min_tx_rate to bypass the oversubscription check so
> users can always clear the guaranteed rate. Additionally print an
> informational message when the oversubscription guard fires to help
> diagnose why a non-zero request was rejected.
> 
> Fixes: 4ecc8633056b ("ice: Add support for VF rate limiting")
> Cc: stable@vger.kernel.org
> Signed-off-by: Sudheer Mogilappagari <sudheer.mogilappagari@intel.com>
> Signed-off-by: Aleksandr Loktionov <aleksandr.loktionov@intel.com>
> ---
>   drivers/net/ethernet/intel/ice/ice_sriov.c | 8 +++++++-
>   1 file changed, 7 insertions(+), 1 deletion(-)
> 
> diff --git a/drivers/net/ethernet/intel/ice/ice_sriov.c b/drivers/net/ethernet/intel/ice/ice_sriov.c
> index 7e00e09..6e3bec7 100644
> --- a/drivers/net/ethernet/intel/ice/ice_sriov.c
> +++ b/drivers/net/ethernet/intel/ice/ice_sriov.c
> @@ -1507,6 +1507,12 @@ ice_min_tx_rate_oversubscribed(struct ice_vf *vf, int min_tx_rate)
>   	all_vfs_min_tx_rate -= vf->min_tx_rate;
>   
>   	if (all_vfs_min_tx_rate + min_tx_rate > link_speed_mbps) {
> +		if (ice_calc_all_vfs_min_tx_rate(vf->pf) > link_speed_mbps) {

ice_calc_all_vfs_min_tx_rate() is already called above (out of this 
patch context), can we save that to an interim var and save this second 
call?

> +			dev_info(ice_pf_to_dev(vf->pf),
> +				 "The sum of min_tx_rate for all VFs is greater than the link speed\n");
> +			dev_info(ice_pf_to_dev(vf->pf),
> +				 "Set min_tx_rate to 0 on VFs to resolve oversubscription\n");

Why not 1 string/call?

Thanks,
Tony

> +		}
>   		dev_err(ice_pf_to_dev(vf->pf), "min_tx_rate of %d Mbps on VF %u would cause oversubscription of %d Mbps based on the current link speed %d Mbps\n",
>   			min_tx_rate, vf->vf_id,
>   			all_vfs_min_tx_rate + min_tx_rate - link_speed_mbps,
> @@ -1556,7 +1562,7 @@ ice_set_vf_bw(struct net_device *netdev, int vf_id, int min_tx_rate,
>   		goto out_put_vf;
>   	}
>   
> -	if (ice_min_tx_rate_oversubscribed(vf, min_tx_rate)) {
> +	if (min_tx_rate && ice_min_tx_rate_oversubscribed(vf, min_tx_rate)) {
>   		ret = -EINVAL;
>   		goto out_put_vf;
>   	}


^ permalink raw reply

* Re: [PATCH iwl-net 6/10] ice: check PHY autoneg capability before rejecting ethtool autoneg setting
From: Tony Nguyen @ 2026-04-10 18:58 UTC (permalink / raw)
  To: Aleksandr Loktionov, intel-wired-lan; +Cc: netdev, Jan Glaza
In-Reply-To: <20260403054029.3789616-7-aleksandr.loktionov@intel.com>



On 4/2/2026 10:40 PM, Aleksandr Loktionov wrote:
> ice_set_link_ksettings() rejects autoneg requests by comparing
> user settings against safe_ks which is populated by
> ice_phy_type_to_ethtool(). The Autoneg bit in safe_ks is set
> only if the current PHY configuration reports it supported,
> but this misses PHYs that support autoneg and have it available
> through PHY capabilities. Pull the autoneg flag from the actual
> PHY capabilities (already fetched earlier in the function) to
> ensure the user can toggle autoneg on any capable PHY.
> 
> Fixes: 5cd349c349d6 ("ice: report supported and advertised autoneg using PHY capabilities")
> Cc: stable@vger.kernel.org
> Signed-off-by: Jan Glaza <jan.glaza@intel.com>
> Signed-off-by: Aleksandr Loktionov <aleksandr.loktionov@intel.com>
> ---
>   drivers/net/ethernet/intel/ice/ice_ethtool.c | 8 ++++++++
>   1 file changed, 8 insertions(+)
> 
> diff --git a/drivers/net/ethernet/intel/ice/ice_ethtool.c b/drivers/net/ethernet/intel/ice/ice_ethtool.c
> index 49b9376..44483bc 100644
> --- a/drivers/net/ethernet/intel/ice/ice_ethtool.c
> +++ b/drivers/net/ethernet/intel/ice/ice_ethtool.c
> @@ -2654,6 +2654,14 @@ ice_set_link_ksettings(struct net_device *netdev,
>   	/* Get link modes supported by hardware.*/
>   	ice_phy_type_to_ethtool(netdev, &safe_ks);
>   
> +	/* Pull the value of autoneg from phy caps to ensure we allow
> +	 * toggling it on all PHYs that support it.
> +	 */
> +	if (ice_is_phy_caps_an_enabled(phy_caps)) {
> +		ethtool_link_ksettings_add_link_mode(&safe_ks, supported, Autoneg);
> +		set_bit(ETHTOOL_LINK_MODE_FEC_NONE_BIT, safe_ks.link_modes.supported);

 From Sashiko:

This isn't a bug, but should this use 
ethtool_link_ksettings_add_link_mode() instead of calling set_bit() 
directly? Using set_bit() on the link modes breaks the ethtool interface 
abstraction.

Also, does this incorrectly couple the ETHTOOL_LINK_MODE_FEC_NONE_BIT 
support with Autonegotiation support? Forward Error Correction support 
is independent of Autonegotiation.

For PHYs lacking Autonegotiation, the FEC none bit will not be added to 
safe_ks.link_modes.supported. When a user requests settings via ethtool, 
copy_ks.link_modes.advertising will likely contain the FEC none bit 
since it is unconditionally returned by ice_get_link_ksettings().

> +	}
> +
>   	/* and check against modes requested by user.
>   	 * Return an error if unsupported mode was set.
>   	 */


^ permalink raw reply

* Re: [ovs-dev] [PATCH net-next v2] net: openvswitch: decouple flow_table from ovs_mutex
From: Aaron Conole @ 2026-04-10 18:52 UTC (permalink / raw)
  To: Adrian Moreno via dev
  Cc: netdev, Adrian Moreno, open list:OPENVSWITCH, Paolo Abeni,
	open list, Ilya Maximets, Eric Dumazet, Simon Horman,
	Jakub Kicinski, David S. Miller
In-Reply-To: <20260407120418.356718-1-amorenoz@redhat.com>

Hi Adrian,

Thanks for the patch.  A few questions inline.

Adrian Moreno via dev <ovs-dev@openvswitch.org> writes:

> Currently the entire ovs module is write-protected using the global
> ovs_mutex. While this simple approach works fine for control-plane
> operations (such as vport configurations), requiring the global mutex
> for flow modifications can be problematic.
>
> During periods of high control-plane operations, e.g: netdevs (vports)
> coming and going, RTNL can suffer contention. This contention is easily
> transferred to the ovs_mutex as RTNL nests inside ovs_mutex. Flow
> modifications, however, are done as part of packet processing and having
> them wait for RTNL pressure to go away can lead to packet drops.
>
> This patch decouples flow_table modifications from ovs_mutex by means of
> the following:
>
> 1 - Make flow_table an rcu-protected pointer inside the datapath.
> This allows both objects to be protected independently while reducing the
> amount of changes required in "flow_table.c".
>
> 2 - Create a new mutex inside the flow_table that protects it from
> concurrent modifications.
> Putting the mutex inside flow_table makes it easier to consume for
> functions inside flow_table.c that do not currently take pointers to the
> datapath.
> Some function signatures need to be changed to accept flow_table so that
> lockdep checks can be performed.
>
> 3 - Create a reference count to temporarily extend rcu protection from
> the datapath to the flow_table.
> In order to use the flow_table without locking ovs_mutex, the flow_table
> pointer must be first dereferenced within an rcu-protected region.
> Next, the table->mutex needs to be locked to protect it from
> concurrent writes but mutexes must not be locked inside an rcu-protected
> region, so the rcu-protected region must be left at which point the
> datapath can be concurrently freed.
> To extend the protection beyond the rcu region, a reference count is used.
> One reference is held by the datapath, the other is temporarily
> increased during flow modifications. For example:
>
> Datapath deletion:
>
>   ovs_lock();
>   table = rcu_dereference_protected(dp->table, ...);
>   rcu_assign_pointer(dp->table, NULL);
>   ovs_flow_tbl_put(table);
>   ovs_unlock();

I guess it's possible now to have flow operations succeed on
'removed-but-not-yet-freed' tables.  That's probably worth documenting
somewhere, since it is a slight behavior change.  More below

> Flow modification:
>
>   rcu_read_lock();
>   dp = get_dp(...);
>   table = rcu_dereference(dp->table);
>   ovs_flow_tbl_get(table);
>   rcu_read_unlock();
>
>   mutex_lock(&table->lock);
>   /* Perform modifications on the flow_table */
>   mutex_unlock(&table->lock);
>   ovs_flow_tbl_put(table);
>
> Signed-off-by: Adrian Moreno <amorenoz@redhat.com>
> ---
> v2: Fix argument in ovs_flow_tbl_put (sparse)
>     Remove rcu checks in ovs_dp_masks_rebalance
> ---
>  net/openvswitch/datapath.c   | 285 ++++++++++++++++++++++++-----------
>  net/openvswitch/datapath.h   |   2 +-
>  net/openvswitch/flow.c       |  13 +-
>  net/openvswitch/flow.h       |   9 +-
>  net/openvswitch/flow_table.c | 180 ++++++++++++++--------
>  net/openvswitch/flow_table.h |  51 ++++++-
>  6 files changed, 380 insertions(+), 160 deletions(-)
>
> diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c
> index e209099218b4..9c234993520c 100644
> --- a/net/openvswitch/datapath.c
> +++ b/net/openvswitch/datapath.c
> @@ -88,13 +88,17 @@ static void ovs_notify(struct genl_family *family,
>   * DOC: Locking:
>   *
>   * All writes e.g. Writes to device state (add/remove datapath, port, set
> - * operations on vports, etc.), Writes to other state (flow table
> - * modifications, set miscellaneous datapath parameters, etc.) are protected
> - * by ovs_lock.
> + * operations on vports, etc.) and writes to other datapath parameters
> + * are protected by ovs_lock.
> + *
> + * Writes to the flow table are NOT protected by ovs_lock. Instead, a per-table
> + * mutex and reference count are used (see comment above "struct flow_table"
> + * definition). On some few occasions, the per-flow table mutex is nested
> + * inside ovs_mutex.
>   *
>   * Reads are protected by RCU.
>   *
> - * There are a few special cases (mostly stats) that have their own
> + * There are a few other special cases (mostly stats) that have their own
>   * synchronization but they nest under all of above and don't interact with
>   * each other.
>   *
> @@ -166,7 +170,6 @@ static void destroy_dp_rcu(struct rcu_head *rcu)
>  {
>  	struct datapath *dp = container_of(rcu, struct datapath, rcu);
>  
> -	ovs_flow_tbl_destroy(&dp->table);
>  	free_percpu(dp->stats_percpu);
>  	kfree(dp->ports);
>  	ovs_meters_exit(dp);
> @@ -247,6 +250,7 @@ void ovs_dp_process_packet(struct sk_buff *skb, struct sw_flow_key *key)
>  	struct ovs_pcpu_storage *ovs_pcpu = this_cpu_ptr(ovs_pcpu_storage);
>  	const struct vport *p = OVS_CB(skb)->input_vport;
>  	struct datapath *dp = p->dp;
> +	struct flow_table *table;
>  	struct sw_flow *flow;
>  	struct sw_flow_actions *sf_acts;
>  	struct dp_stats_percpu *stats;
> @@ -257,9 +261,16 @@ void ovs_dp_process_packet(struct sk_buff *skb, struct sw_flow_key *key)
>  	int error;
>  
>  	stats = this_cpu_ptr(dp->stats_percpu);
> +	table = rcu_dereference(dp->table);
> +	if (!table) {
> +		net_dbg_ratelimited("ovs: no flow table on datapath %s\n",
> +				    ovs_dp_name(dp));
> +		kfree_skb(skb);
> +		return;
> +	}
>  
>  	/* Look up flow. */
> -	flow = ovs_flow_tbl_lookup_stats(&dp->table, key, skb_get_hash(skb),
> +	flow = ovs_flow_tbl_lookup_stats(table, key, skb_get_hash(skb),
>  					 &n_mask_hit, &n_cache_hit);
>  	if (unlikely(!flow)) {
>  		struct dp_upcall_info upcall;
> @@ -752,12 +763,16 @@ static struct genl_family dp_packet_genl_family __ro_after_init = {
>  static void get_dp_stats(const struct datapath *dp, struct ovs_dp_stats *stats,
>  			 struct ovs_dp_megaflow_stats *mega_stats)
>  {
> +	struct flow_table *table = ovsl_dereference(dp->table);
>  	int i;
>  
>  	memset(mega_stats, 0, sizeof(*mega_stats));
>  
> -	stats->n_flows = ovs_flow_tbl_count(&dp->table);
> -	mega_stats->n_masks = ovs_flow_tbl_num_masks(&dp->table);
> +	if (table) {
> +		stats->n_flows = ovs_flow_tbl_count(table);

Previously, when calling this we'd be under the ovs_mutex and the read
on table->count would be somewhat coherent (for some definition of
coherent).  BUT we are now doing a bare read.  I'm not sure if we should
take the lock here, or at least give some kind of barrier (READ_ONCE and
update the count setting sites with WRITE_ONCEs)?  WDYT?

> +		mega_stats->n_masks = ovs_flow_tbl_num_masks(table);
> +	}
> +
>  
>  	stats->n_hit = stats->n_missed = stats->n_lost = 0;
>  
> @@ -829,15 +844,16 @@ static size_t ovs_flow_cmd_msg_size(const struct sw_flow_actions *acts,
>  		+ nla_total_size_64bit(8); /* OVS_FLOW_ATTR_USED */
>  }
>  
> -/* Called with ovs_mutex or RCU read lock. */
> +/* Called with table->lock or RCU read lock. */
>  static int ovs_flow_cmd_fill_stats(const struct sw_flow *flow,
> +				   const struct flow_table *table,
>  				   struct sk_buff *skb)
>  {
>  	struct ovs_flow_stats stats;
>  	__be16 tcp_flags;
>  	unsigned long used;
>  
> -	ovs_flow_stats_get(flow, &stats, &used, &tcp_flags);
> +	ovs_flow_stats_get(flow, table, &stats, &used, &tcp_flags);
>  
>  	if (used &&
>  	    nla_put_u64_64bit(skb, OVS_FLOW_ATTR_USED, ovs_flow_used_time(used),
> @@ -857,8 +873,9 @@ static int ovs_flow_cmd_fill_stats(const struct sw_flow *flow,
>  	return 0;
>  }
>  
> -/* Called with ovs_mutex or RCU read lock. */
> +/* Called with RCU read lock or table->lock held. */
>  static int ovs_flow_cmd_fill_actions(const struct sw_flow *flow,
> +				     const struct flow_table *table,
>  				     struct sk_buff *skb, int skb_orig_len)
>  {
>  	struct nlattr *start;
> @@ -878,7 +895,7 @@ static int ovs_flow_cmd_fill_actions(const struct sw_flow *flow,
>  	if (start) {
>  		const struct sw_flow_actions *sf_acts;
>  
> -		sf_acts = rcu_dereference_ovsl(flow->sf_acts);
> +		sf_acts = rcu_dereference_ovs_tbl(flow->sf_acts, table);
>  		err = ovs_nla_put_actions(sf_acts->actions,
>  					  sf_acts->actions_len, skb);
>  
> @@ -897,8 +914,10 @@ static int ovs_flow_cmd_fill_actions(const struct sw_flow *flow,
>  	return 0;
>  }
>  
> -/* Called with ovs_mutex or RCU read lock. */
> -static int ovs_flow_cmd_fill_info(const struct sw_flow *flow, int dp_ifindex,
> +/* Called with table->lock or RCU read lock. */
> +static int ovs_flow_cmd_fill_info(const struct sw_flow *flow,
> +				  const struct flow_table *table,
> +				  int dp_ifindex,
>  				  struct sk_buff *skb, u32 portid,
>  				  u32 seq, u32 flags, u8 cmd, u32 ufid_flags)
>  {
> @@ -929,12 +948,12 @@ static int ovs_flow_cmd_fill_info(const struct sw_flow *flow, int dp_ifindex,
>  			goto error;
>  	}
>  
> -	err = ovs_flow_cmd_fill_stats(flow, skb);
> +	err = ovs_flow_cmd_fill_stats(flow, table, skb);
>  	if (err)
>  		goto error;
>  
>  	if (should_fill_actions(ufid_flags)) {
> -		err = ovs_flow_cmd_fill_actions(flow, skb, skb_orig_len);
> +		err = ovs_flow_cmd_fill_actions(flow, table, skb, skb_orig_len);
>  		if (err)
>  			goto error;
>  	}
> @@ -968,8 +987,9 @@ static struct sk_buff *ovs_flow_cmd_alloc_info(const struct sw_flow_actions *act
>  	return skb;
>  }
>  
> -/* Called with ovs_mutex. */
> +/* Called with table->lock. */
>  static struct sk_buff *ovs_flow_cmd_build_info(const struct sw_flow *flow,
> +					       const struct flow_table *table,
>  					       int dp_ifindex,
>  					       struct genl_info *info, u8 cmd,
>  					       bool always, u32 ufid_flags)
> @@ -977,12 +997,12 @@ static struct sk_buff *ovs_flow_cmd_build_info(const struct sw_flow *flow,
>  	struct sk_buff *skb;
>  	int retval;
>  
> -	skb = ovs_flow_cmd_alloc_info(ovsl_dereference(flow->sf_acts),
> +	skb = ovs_flow_cmd_alloc_info(ovs_tbl_dereference(flow->sf_acts, table),
>  				      &flow->id, info, always, ufid_flags);
>  	if (IS_ERR_OR_NULL(skb))
>  		return skb;
>  
> -	retval = ovs_flow_cmd_fill_info(flow, dp_ifindex, skb,
> +	retval = ovs_flow_cmd_fill_info(flow, table, dp_ifindex, skb,
>  					info->snd_portid, info->snd_seq, 0,
>  					cmd, ufid_flags);
>  	if (WARN_ON_ONCE(retval < 0)) {
> @@ -998,6 +1018,7 @@ static int ovs_flow_cmd_new(struct sk_buff *skb, struct genl_info *info)
>  	struct nlattr **a = info->attrs;
>  	struct ovs_header *ovs_header = genl_info_userhdr(info);
>  	struct sw_flow *flow = NULL, *new_flow;
> +	struct flow_table *table;
>  	struct sw_flow_mask mask;
>  	struct sk_buff *reply;
>  	struct datapath *dp;
> @@ -1064,30 +1085,43 @@ static int ovs_flow_cmd_new(struct sk_buff *skb, struct genl_info *info)
>  		goto err_kfree_acts;
>  	}
>  

I think this can lead to a weird(?) behavior:

thread A (dp_destroy):                   thread b (ovs_flow_cmd_new):
rcu_assign_pointer(dp->table, NULL)
                                         rcu_read_lock();
                                         table =
                                         rcu_dereference(dp->table);
                                           [old table]
                                         ovs_flow_tbl_get(table)
                                             //refcnt change
                                         rcu_read_unlock()
ovs_flow_tbl_put(table) // refcnt chg
                                         mutex_lock(table->lock)
                                         ovs_flow_table_insert(...)
                                         [success reply]
                                         mutex_unlock(table->lock)
                                         ovs_flow_tbl_put(table)
                                         // table flow flush, etc.

I guess it isn't a huge deal (installing flow while deleting table would
be weird from a userspace perspective), and I think it is safe, but it
is worth mentioning that we can have such scenario now.

> -	ovs_lock();
> +	rcu_read_lock();
>  	dp = get_dp(net, ovs_header->dp_ifindex);
>  	if (unlikely(!dp)) {
>  		error = -ENODEV;
> -		goto err_unlock_ovs;
> +		rcu_read_unlock();
> +		goto err_kfree_reply;
>  	}
> +	table = rcu_dereference(dp->table);
> +	if (!table || !ovs_flow_tbl_get(table)) {
> +		error = -ENODEV;
> +		rcu_read_unlock();
> +		goto err_kfree_reply;
> +	}
> +	rcu_read_unlock();
> +
> +	/* It is safe to dereference "table" after leaving rcu read-protected
> +	 * region because it's pinned by refcount.
> +	 */
> +	mutex_lock(&table->lock);
>  
>  	/* Check if this is a duplicate flow */
>  	if (ovs_identifier_is_ufid(&new_flow->id))
> -		flow = ovs_flow_tbl_lookup_ufid(&dp->table, &new_flow->id);
> +		flow = ovs_flow_tbl_lookup_ufid(table, &new_flow->id);
>  	if (!flow)
> -		flow = ovs_flow_tbl_lookup(&dp->table, key);
> +		flow = ovs_flow_tbl_lookup(table, key);
>  	if (likely(!flow)) {
>  		rcu_assign_pointer(new_flow->sf_acts, acts);
>  
>  		/* Put flow in bucket. */
> -		error = ovs_flow_tbl_insert(&dp->table, new_flow, &mask);
> +		error = ovs_flow_tbl_insert(table, new_flow, &mask);
>  		if (unlikely(error)) {
>  			acts = NULL;
> -			goto err_unlock_ovs;
> +			goto err_unlock_tbl;
>  		}
>  
>  		if (unlikely(reply)) {
> -			error = ovs_flow_cmd_fill_info(new_flow,
> +			error = ovs_flow_cmd_fill_info(new_flow, table,
>  						       ovs_header->dp_ifindex,
>  						       reply, info->snd_portid,
>  						       info->snd_seq, 0,
> @@ -1095,7 +1129,8 @@ static int ovs_flow_cmd_new(struct sk_buff *skb, struct genl_info *info)
>  						       ufid_flags);
>  			BUG_ON(error < 0);
>  		}
> -		ovs_unlock();
> +		mutex_unlock(&table->lock);
> +		ovs_flow_tbl_put(table);
>  	} else {
>  		struct sw_flow_actions *old_acts;
>  
> @@ -1108,28 +1143,28 @@ static int ovs_flow_cmd_new(struct sk_buff *skb, struct genl_info *info)
>  		if (unlikely(info->nlhdr->nlmsg_flags & (NLM_F_CREATE
>  							 | NLM_F_EXCL))) {
>  			error = -EEXIST;
> -			goto err_unlock_ovs;
> +			goto err_unlock_tbl;
>  		}
>  		/* The flow identifier has to be the same for flow updates.
>  		 * Look for any overlapping flow.
>  		 */
>  		if (unlikely(!ovs_flow_cmp(flow, &match))) {
>  			if (ovs_identifier_is_key(&flow->id))
> -				flow = ovs_flow_tbl_lookup_exact(&dp->table,
> +				flow = ovs_flow_tbl_lookup_exact(table,
>  								 &match);
>  			else /* UFID matches but key is different */
>  				flow = NULL;
>  			if (!flow) {
>  				error = -ENOENT;
> -				goto err_unlock_ovs;
> +				goto err_unlock_tbl;
>  			}
>  		}
>  		/* Update actions. */
> -		old_acts = ovsl_dereference(flow->sf_acts);
> +		old_acts = ovs_tbl_dereference(flow->sf_acts, table);
>  		rcu_assign_pointer(flow->sf_acts, acts);
>  
>  		if (unlikely(reply)) {
> -			error = ovs_flow_cmd_fill_info(flow,
> +			error = ovs_flow_cmd_fill_info(flow, table,
>  						       ovs_header->dp_ifindex,
>  						       reply, info->snd_portid,
>  						       info->snd_seq, 0,
> @@ -1137,7 +1172,8 @@ static int ovs_flow_cmd_new(struct sk_buff *skb, struct genl_info *info)
>  						       ufid_flags);
>  			BUG_ON(error < 0);
>  		}
> -		ovs_unlock();
> +		mutex_unlock(&table->lock);
> +		ovs_flow_tbl_put(table);
>  
>  		ovs_nla_free_flow_actions_rcu(old_acts);
>  		ovs_flow_free(new_flow, false);
> @@ -1149,8 +1185,10 @@ static int ovs_flow_cmd_new(struct sk_buff *skb, struct genl_info *info)
>  	kfree(key);
>  	return 0;
>  
> -err_unlock_ovs:
> -	ovs_unlock();
> +err_unlock_tbl:
> +	mutex_unlock(&table->lock);
> +	ovs_flow_tbl_put(table);
> +err_kfree_reply:
>  	kfree_skb(reply);
>  err_kfree_acts:
>  	ovs_nla_free_flow_actions(acts);
> @@ -1244,6 +1282,7 @@ static int ovs_flow_cmd_set(struct sk_buff *skb, struct genl_info *info)
>  	struct net *net = sock_net(skb->sk);
>  	struct nlattr **a = info->attrs;
>  	struct ovs_header *ovs_header = genl_info_userhdr(info);
> +	struct flow_table *table;
>  	struct sw_flow_key key;
>  	struct sw_flow *flow;
>  	struct sk_buff *reply = NULL;
> @@ -1278,29 +1317,43 @@ static int ovs_flow_cmd_set(struct sk_buff *skb, struct genl_info *info)
>  		}
>  	}
>  
> -	ovs_lock();
> +	rcu_read_lock();
>  	dp = get_dp(net, ovs_header->dp_ifindex);
>  	if (unlikely(!dp)) {
>  		error = -ENODEV;
> -		goto err_unlock_ovs;
> +		rcu_read_unlock();
> +		goto err_free_reply;
>  	}
> +	table = rcu_dereference(dp->table);
> +	if (!table || !ovs_flow_tbl_get(table)) {
> +		rcu_read_unlock();
> +		error = -ENODEV;
> +		goto err_free_reply;
> +	}
> +	rcu_read_unlock();
> +
> +	/* It is safe to dereference "table" after leaving rcu read-protected
> +	 * region because it's pinned by refcount.
> +	 */
> +	mutex_lock(&table->lock);
> +
>  	/* Check that the flow exists. */
>  	if (ufid_present)
> -		flow = ovs_flow_tbl_lookup_ufid(&dp->table, &sfid);
> +		flow = ovs_flow_tbl_lookup_ufid(table, &sfid);
>  	else
> -		flow = ovs_flow_tbl_lookup_exact(&dp->table, &match);
> +		flow = ovs_flow_tbl_lookup_exact(table, &match);
>  	if (unlikely(!flow)) {
>  		error = -ENOENT;
> -		goto err_unlock_ovs;
> +		goto err_unlock_tbl;
>  	}
>  
>  	/* Update actions, if present. */
>  	if (likely(acts)) {
> -		old_acts = ovsl_dereference(flow->sf_acts);
> +		old_acts = ovs_tbl_dereference(flow->sf_acts, table);
>  		rcu_assign_pointer(flow->sf_acts, acts);
>  
>  		if (unlikely(reply)) {
> -			error = ovs_flow_cmd_fill_info(flow,
> +			error = ovs_flow_cmd_fill_info(flow, table,
>  						       ovs_header->dp_ifindex,
>  						       reply, info->snd_portid,
>  						       info->snd_seq, 0,
> @@ -1310,20 +1363,22 @@ static int ovs_flow_cmd_set(struct sk_buff *skb, struct genl_info *info)
>  		}
>  	} else {
>  		/* Could not alloc without acts before locking. */
> -		reply = ovs_flow_cmd_build_info(flow, ovs_header->dp_ifindex,
> +		reply = ovs_flow_cmd_build_info(flow, table,
> +						ovs_header->dp_ifindex,
>  						info, OVS_FLOW_CMD_SET, false,
>  						ufid_flags);
>  
>  		if (IS_ERR(reply)) {
>  			error = PTR_ERR(reply);
> -			goto err_unlock_ovs;
> +			goto err_unlock_tbl;
>  		}
>  	}
>  
>  	/* Clear stats. */
>  	if (a[OVS_FLOW_ATTR_CLEAR])
> -		ovs_flow_stats_clear(flow);
> -	ovs_unlock();
> +		ovs_flow_stats_clear(flow, table);
> +	mutex_unlock(&table->lock);
> +	ovs_flow_tbl_put(table);
>  
>  	if (reply)
>  		ovs_notify(&dp_flow_genl_family, reply, info);
> @@ -1332,8 +1387,10 @@ static int ovs_flow_cmd_set(struct sk_buff *skb, struct genl_info *info)
>  
>  	return 0;
>  
> -err_unlock_ovs:
> -	ovs_unlock();
> +err_unlock_tbl:
> +	mutex_unlock(&table->lock);
> +	ovs_flow_tbl_put(table);
> +err_free_reply:
>  	kfree_skb(reply);
>  err_kfree_acts:
>  	ovs_nla_free_flow_actions(acts);
> @@ -1346,6 +1403,7 @@ static int ovs_flow_cmd_get(struct sk_buff *skb, struct genl_info *info)
>  	struct nlattr **a = info->attrs;
>  	struct ovs_header *ovs_header = genl_info_userhdr(info);
>  	struct net *net = sock_net(skb->sk);
> +	struct flow_table *table;
>  	struct sw_flow_key key;
>  	struct sk_buff *reply;
>  	struct sw_flow *flow;
> @@ -1370,33 +1428,48 @@ static int ovs_flow_cmd_get(struct sk_buff *skb, struct genl_info *info)
>  	if (err)
>  		return err;
>  
> -	ovs_lock();
> +	rcu_read_lock();
>  	dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex);
>  	if (!dp) {
> -		err = -ENODEV;
> -		goto unlock;
> +		rcu_read_unlock();
> +		return -ENODEV;
>  	}
> +	table = rcu_dereference(dp->table);
> +	if (!table || !ovs_flow_tbl_get(table)) {
> +		rcu_read_unlock();
> +		return -ENODEV;
> +	}
> +	rcu_read_unlock();
> +
> +	/* It is safe to dereference "table" after leaving rcu read-protected
> +	 * region because it's pinned by refcount.
> +	 */
> +	mutex_lock(&table->lock);
> +
>  
>  	if (ufid_present)
> -		flow = ovs_flow_tbl_lookup_ufid(&dp->table, &ufid);
> +		flow = ovs_flow_tbl_lookup_ufid(table, &ufid);
>  	else
> -		flow = ovs_flow_tbl_lookup_exact(&dp->table, &match);
> +		flow = ovs_flow_tbl_lookup_exact(table, &match);
>  	if (!flow) {
>  		err = -ENOENT;
>  		goto unlock;
>  	}
>  
> -	reply = ovs_flow_cmd_build_info(flow, ovs_header->dp_ifindex, info,
> -					OVS_FLOW_CMD_GET, true, ufid_flags);
> +	reply = ovs_flow_cmd_build_info(flow, table, ovs_header->dp_ifindex,
> +					info, OVS_FLOW_CMD_GET, true,
> +					ufid_flags);
>  	if (IS_ERR(reply)) {
>  		err = PTR_ERR(reply);
>  		goto unlock;
>  	}
>  
> -	ovs_unlock();
> +	mutex_unlock(&table->lock);
> +	ovs_flow_tbl_put(table);
>  	return genlmsg_reply(reply, info);
>  unlock:
> -	ovs_unlock();
> +	mutex_unlock(&table->lock);
> +	ovs_flow_tbl_put(table);
>  	return err;
>  }
>  
> @@ -1405,6 +1478,7 @@ static int ovs_flow_cmd_del(struct sk_buff *skb, struct genl_info *info)
>  	struct nlattr **a = info->attrs;
>  	struct ovs_header *ovs_header = genl_info_userhdr(info);
>  	struct net *net = sock_net(skb->sk);
> +	struct flow_table *table;
>  	struct sw_flow_key key;
>  	struct sk_buff *reply;
>  	struct sw_flow *flow = NULL;
> @@ -1425,36 +1499,49 @@ static int ovs_flow_cmd_del(struct sk_buff *skb, struct genl_info *info)
>  			return err;
>  	}
>  
> -	ovs_lock();
> +	rcu_read_lock();
>  	dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex);
>  	if (unlikely(!dp)) {
> -		err = -ENODEV;
> -		goto unlock;
> +		rcu_read_unlock();
> +		return -ENODEV;
>  	}
> +	table = rcu_dereference(dp->table);
> +	if (!table || !ovs_flow_tbl_get(table)) {
> +		rcu_read_unlock();
> +		return -ENODEV;
> +	}
> +	rcu_read_unlock();
> +
> +	/* It is safe to dereference "table" after leaving rcu read-protected
> +	 * region because it's pinned by refcount.
> +	 */
> +	mutex_lock(&table->lock);
> +
>  
>  	if (unlikely(!a[OVS_FLOW_ATTR_KEY] && !ufid_present)) {
> -		err = ovs_flow_tbl_flush(&dp->table);
> +		err = ovs_flow_tbl_flush(table);
>  		goto unlock;
>  	}
>  
>  	if (ufid_present)
> -		flow = ovs_flow_tbl_lookup_ufid(&dp->table, &ufid);
> +		flow = ovs_flow_tbl_lookup_ufid(table, &ufid);
>  	else
> -		flow = ovs_flow_tbl_lookup_exact(&dp->table, &match);
> +		flow = ovs_flow_tbl_lookup_exact(table, &match);
>  	if (unlikely(!flow)) {
>  		err = -ENOENT;
>  		goto unlock;
>  	}
>  
> -	ovs_flow_tbl_remove(&dp->table, flow);
> -	ovs_unlock();
> +	ovs_flow_tbl_remove(table, flow);
> +	mutex_unlock(&table->lock);
>  
>  	reply = ovs_flow_cmd_alloc_info((const struct sw_flow_actions __force *) flow->sf_acts,
>  					&flow->id, info, false, ufid_flags);
>  	if (likely(reply)) {
>  		if (!IS_ERR(reply)) {
>  			rcu_read_lock();	/*To keep RCU checker happy. */
> -			err = ovs_flow_cmd_fill_info(flow, ovs_header->dp_ifindex,
> +			err = ovs_flow_cmd_fill_info(flow, table,
> +						     ovs_header->dp_ifindex,
>  						     reply, info->snd_portid,
>  						     info->snd_seq, 0,
>  						     OVS_FLOW_CMD_DEL,
> @@ -1473,10 +1560,12 @@ static int ovs_flow_cmd_del(struct sk_buff *skb, struct genl_info *info)
>  	}
>  
>  out_free:
> +	ovs_flow_tbl_put(table);
>  	ovs_flow_free(flow, true);
>  	return 0;
>  unlock:
> -	ovs_unlock();
> +	mutex_unlock(&table->lock);
> +	ovs_flow_tbl_put(table);
>  	return err;
>  }
>  
> @@ -1485,6 +1574,7 @@ static int ovs_flow_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb)
>  	struct nlattr *a[__OVS_FLOW_ATTR_MAX];
>  	struct ovs_header *ovs_header = genlmsg_data(nlmsg_data(cb->nlh));
>  	struct table_instance *ti;
> +	struct flow_table *table;
>  	struct datapath *dp;
>  	u32 ufid_flags;
>  	int err;
> @@ -1501,8 +1591,13 @@ static int ovs_flow_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb)
>  		rcu_read_unlock();
>  		return -ENODEV;
>  	}
> +	table = rcu_dereference(dp->table);
> +	if (!table) {
> +		rcu_read_unlock();
> +		return -ENODEV;
> +	}
>  
> -	ti = rcu_dereference(dp->table.ti);
> +	ti = rcu_dereference(table->ti);
>  	for (;;) {
>  		struct sw_flow *flow;
>  		u32 bucket, obj;
> @@ -1513,8 +1608,8 @@ static int ovs_flow_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb)
>  		if (!flow)
>  			break;
>  
> -		if (ovs_flow_cmd_fill_info(flow, ovs_header->dp_ifindex, skb,
> -					   NETLINK_CB(cb->skb).portid,
> +		if (ovs_flow_cmd_fill_info(flow, table, ovs_header->dp_ifindex,
> +					   skb, NETLINK_CB(cb->skb).portid,
>  					   cb->nlh->nlmsg_seq, NLM_F_MULTI,
>  					   OVS_FLOW_CMD_GET, ufid_flags) < 0)
>  			break;
> @@ -1598,8 +1693,13 @@ static int ovs_dp_cmd_fill_info(struct datapath *dp, struct sk_buff *skb,
>  	struct ovs_dp_stats dp_stats;
>  	struct ovs_dp_megaflow_stats dp_megaflow_stats;
>  	struct dp_nlsk_pids *pids = ovsl_dereference(dp->upcall_portids);
> +	struct flow_table *table;
>  	int err, pids_len;
>  
> +	table = ovsl_dereference(dp->table);
> +	if (!table)
> +		return -ENODEV;
> +
>  	ovs_header = genlmsg_put(skb, portid, seq, &dp_datapath_genl_family,
>  				 flags, cmd);
>  	if (!ovs_header)
> @@ -1625,7 +1725,7 @@ static int ovs_dp_cmd_fill_info(struct datapath *dp, struct sk_buff *skb,
>  		goto nla_put_failure;
>  
>  	if (nla_put_u32(skb, OVS_DP_ATTR_MASKS_CACHE_SIZE,
> -			ovs_flow_tbl_masks_cache_size(&dp->table)))
> +			ovs_flow_tbl_masks_cache_size(table)))
>  		goto nla_put_failure;
>  
>  	if (dp->user_features & OVS_DP_F_DISPATCH_UPCALL_PER_CPU && pids) {
> @@ -1736,6 +1836,7 @@ u32 ovs_dp_get_upcall_portid(const struct datapath *dp, uint32_t cpu_id)
>  static int ovs_dp_change(struct datapath *dp, struct nlattr *a[])
>  {
>  	u32 user_features = 0, old_features = dp->user_features;
> +	struct flow_table *table;
>  	int err;
>  
>  	if (a[OVS_DP_ATTR_USER_FEATURES]) {
> @@ -1757,8 +1858,12 @@ static int ovs_dp_change(struct datapath *dp, struct nlattr *a[])
>  		int err;
>  		u32 cache_size;
>  
> +		table = ovsl_dereference(dp->table);
> +		if (!table)
> +			return -ENODEV;
> +
>  		cache_size = nla_get_u32(a[OVS_DP_ATTR_MASKS_CACHE_SIZE]);
> -		err = ovs_flow_tbl_masks_cache_resize(&dp->table, cache_size);
> +		err = ovs_flow_tbl_masks_cache_resize(table, cache_size);
>  		if (err)
>  			return err;
>  	}
> @@ -1810,6 +1915,7 @@ static int ovs_dp_vport_init(struct datapath *dp)
>  static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info)
>  {
>  	struct nlattr **a = info->attrs;
> +	struct flow_table *table;
>  	struct vport_parms parms;
>  	struct sk_buff *reply;
>  	struct datapath *dp;
> @@ -1833,9 +1939,12 @@ static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info)
>  	ovs_dp_set_net(dp, sock_net(skb->sk));
>  
>  	/* Allocate table. */
> -	err = ovs_flow_tbl_init(&dp->table);
> -	if (err)
> +	table = ovs_flow_tbl_alloc();
> +	if (IS_ERR(table)) {
> +		err = PTR_ERR(table);
>  		goto err_destroy_dp;
> +	}
> +	rcu_assign_pointer(dp->table, table);
>  
>  	err = ovs_dp_stats_init(dp);
>  	if (err)
> @@ -1905,7 +2014,7 @@ static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info)
>  err_destroy_stats:
>  	free_percpu(dp->stats_percpu);
>  err_destroy_table:
> -	ovs_flow_tbl_destroy(&dp->table);
> +	ovs_flow_tbl_put(table);
>  err_destroy_dp:
>  	kfree(dp);
>  err_destroy_reply:
> @@ -1917,7 +2026,8 @@ static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info)
>  /* Called with ovs_mutex. */
>  static void __dp_destroy(struct datapath *dp)
>  {
> -	struct flow_table *table = &dp->table;
> +	struct flow_table *table = rcu_dereference_protected(dp->table,
> +					lockdep_ovsl_is_held());
>  	int i;
>  
>  	if (dp->user_features & OVS_DP_F_TC_RECIRC_SHARING)
> @@ -1939,14 +2049,10 @@ static void __dp_destroy(struct datapath *dp)
>  	 */
>  	ovs_dp_detach_port(ovs_vport_ovsl(dp, OVSP_LOCAL));
>  
> -	/* Flush sw_flow in the tables. RCU cb only releases resource
> -	 * such as dp, ports and tables. That may avoid some issues
> -	 * such as RCU usage warning.
> -	 */
> -	table_instance_flow_flush(table, ovsl_dereference(table->ti),
> -				  ovsl_dereference(table->ufid_ti));
> +	rcu_assign_pointer(dp->table, NULL);
> +	ovs_flow_tbl_put(table);
>  
> -	/* RCU destroy the ports, meters and flow tables. */
> +	/* RCU destroy the ports and meters. */
>  	call_rcu(&dp->rcu, destroy_dp_rcu);
>  }
>  
> @@ -2554,13 +2660,18 @@ static void ovs_dp_masks_rebalance(struct work_struct *work)
>  {
>  	struct ovs_net *ovs_net = container_of(work, struct ovs_net,
>  					       masks_rebalance.work);
> +	struct flow_table *table;
>  	struct datapath *dp;
>  
>  	ovs_lock();
> -
> -	list_for_each_entry(dp, &ovs_net->dps, list_node)
> -		ovs_flow_masks_rebalance(&dp->table);
> -
> +	list_for_each_entry(dp, &ovs_net->dps, list_node) {
> +		table = ovsl_dereference(dp->table);
> +		if (!table)
> +			continue;

Should we take a reference for table here?  I guess it's kindof safe
because of the ovs_lock() above, but if that gets removed it's possible
someone misses that there isn't a refcnt pin here (but everywhere else
has a ovs_flow_tbl_get before it).

> +		mutex_lock(&table->lock);
> +		ovs_flow_masks_rebalance(table);
> +		mutex_unlock(&table->lock);
> +	}
>  	ovs_unlock();
>  
>  	schedule_delayed_work(&ovs_net->masks_rebalance,
> diff --git a/net/openvswitch/datapath.h b/net/openvswitch/datapath.h
> index db0c3e69d66c..44773bf9f645 100644
> --- a/net/openvswitch/datapath.h
> +++ b/net/openvswitch/datapath.h
> @@ -90,7 +90,7 @@ struct datapath {
>  	struct list_head list_node;
>  
>  	/* Flow table. */
> -	struct flow_table table;
> +	struct flow_table __rcu *table;
>  
>  	/* Switch ports. */
>  	struct hlist_head *ports;
> diff --git a/net/openvswitch/flow.c b/net/openvswitch/flow.c
> index 66366982f604..0a748cf20f53 100644
> --- a/net/openvswitch/flow.c
> +++ b/net/openvswitch/flow.c
> @@ -124,8 +124,9 @@ void ovs_flow_stats_update(struct sw_flow *flow, __be16 tcp_flags,
>  	spin_unlock(&stats->lock);
>  }
>  
> -/* Must be called with rcu_read_lock or ovs_mutex. */
> +/* Must be called with rcu_read_lock or table->lock held. */
>  void ovs_flow_stats_get(const struct sw_flow *flow,
> +			const struct flow_table *table,
>  			struct ovs_flow_stats *ovs_stats,
>  			unsigned long *used, __be16 *tcp_flags)
>  {
> @@ -136,7 +137,8 @@ void ovs_flow_stats_get(const struct sw_flow *flow,
>  	memset(ovs_stats, 0, sizeof(*ovs_stats));
>  
>  	for_each_cpu(cpu, flow->cpu_used_mask) {
> -		struct sw_flow_stats *stats = rcu_dereference_ovsl(flow->stats[cpu]);
> +		struct sw_flow_stats *stats =
> +			rcu_dereference_ovs_tbl(flow->stats[cpu], table);
>  
>  		if (stats) {
>  			/* Local CPU may write on non-local stats, so we must
> @@ -153,13 +155,14 @@ void ovs_flow_stats_get(const struct sw_flow *flow,
>  	}
>  }
>  
> -/* Called with ovs_mutex. */
> -void ovs_flow_stats_clear(struct sw_flow *flow)
> +/* Called with table->lock held. */
> +void ovs_flow_stats_clear(struct sw_flow *flow, struct flow_table *table)
>  {
>  	unsigned int cpu;
>  
>  	for_each_cpu(cpu, flow->cpu_used_mask) {
> -		struct sw_flow_stats *stats = ovsl_dereference(flow->stats[cpu]);
> +		struct sw_flow_stats *stats =
> +			ovs_tbl_dereference(flow->stats[cpu], table);
>  
>  		if (stats) {
>  			spin_lock_bh(&stats->lock);
> diff --git a/net/openvswitch/flow.h b/net/openvswitch/flow.h
> index b5711aff6e76..e05ed6796e4e 100644
> --- a/net/openvswitch/flow.h
> +++ b/net/openvswitch/flow.h
> @@ -23,6 +23,7 @@
>  #include <net/dst_metadata.h>
>  #include <net/nsh.h>
>  
> +struct flow_table;
>  struct sk_buff;
>  
>  enum sw_flow_mac_proto {
> @@ -280,9 +281,11 @@ static inline bool ovs_identifier_is_key(const struct sw_flow_id *sfid)
>  
>  void ovs_flow_stats_update(struct sw_flow *, __be16 tcp_flags,
>  			   const struct sk_buff *);
> -void ovs_flow_stats_get(const struct sw_flow *, struct ovs_flow_stats *,
> -			unsigned long *used, __be16 *tcp_flags);
> -void ovs_flow_stats_clear(struct sw_flow *);
> +void ovs_flow_stats_get(const struct sw_flow *flow,
> +			const struct flow_table *table,
> +			struct ovs_flow_stats *stats, unsigned long *used,
> +			__be16 *tcp_flags);
> +void ovs_flow_stats_clear(struct sw_flow *flow, struct flow_table *table);
>  u64 ovs_flow_used_time(unsigned long flow_jiffies);
>  
>  int ovs_flow_key_update(struct sk_buff *skb, struct sw_flow_key *key);
> diff --git a/net/openvswitch/flow_table.c b/net/openvswitch/flow_table.c
> index 61c6a5f77c2e..d9dbe4b4807c 100644
> --- a/net/openvswitch/flow_table.c
> +++ b/net/openvswitch/flow_table.c
> @@ -45,6 +45,16 @@
>  static struct kmem_cache *flow_cache;
>  struct kmem_cache *flow_stats_cache __read_mostly;
>  
> +#ifdef CONFIG_LOCKDEP
> +int lockdep_ovs_tbl_is_held(const struct flow_table *table)
> +{
> +	if (debug_locks)
> +		return lockdep_is_held(&table->lock);
> +	else
> +		return 1;
> +}
> +#endif
> +
>  static u16 range_n_bytes(const struct sw_flow_key_range *range)
>  {
>  	return range->end - range->start;
> @@ -249,12 +259,12 @@ static int tbl_mask_array_realloc(struct flow_table *tbl, int size)
>  	if (!new)
>  		return -ENOMEM;
>  
> -	old = ovsl_dereference(tbl->mask_array);
> +	old = ovs_tbl_dereference(tbl->mask_array, tbl);
>  	if (old) {
>  		int i;
>  
>  		for (i = 0; i < old->max; i++) {
> -			if (ovsl_dereference(old->masks[i]))
> +			if (ovs_tbl_dereference(old->masks[i], tbl))
>  				new->masks[new->count++] = old->masks[i];
>  		}
>  		call_rcu(&old->rcu, mask_array_rcu_cb);
> @@ -268,7 +278,7 @@ static int tbl_mask_array_realloc(struct flow_table *tbl, int size)
>  static int tbl_mask_array_add_mask(struct flow_table *tbl,
>  				   struct sw_flow_mask *new)
>  {
> -	struct mask_array *ma = ovsl_dereference(tbl->mask_array);
> +	struct mask_array *ma = ovs_tbl_dereference(tbl->mask_array, tbl);
>  	int err, ma_count = READ_ONCE(ma->count);
>  
>  	if (ma_count >= ma->max) {
> @@ -277,7 +287,7 @@ static int tbl_mask_array_add_mask(struct flow_table *tbl,
>  		if (err)
>  			return err;
>  
> -		ma = ovsl_dereference(tbl->mask_array);
> +		ma = ovs_tbl_dereference(tbl->mask_array, tbl);
>  	} else {
>  		/* On every add or delete we need to reset the counters so
>  		 * every new mask gets a fair chance of being prioritized.
> @@ -285,7 +295,7 @@ static int tbl_mask_array_add_mask(struct flow_table *tbl,
>  		tbl_mask_array_reset_counters(ma);
>  	}
>  
> -	BUG_ON(ovsl_dereference(ma->masks[ma_count]));
> +	WARN_ON_ONCE(ovs_tbl_dereference(ma->masks[ma_count], tbl));
>  
>  	rcu_assign_pointer(ma->masks[ma_count], new);
>  	WRITE_ONCE(ma->count, ma_count + 1);
> @@ -296,12 +306,12 @@ static int tbl_mask_array_add_mask(struct flow_table *tbl,
>  static void tbl_mask_array_del_mask(struct flow_table *tbl,
>  				    struct sw_flow_mask *mask)
>  {
> -	struct mask_array *ma = ovsl_dereference(tbl->mask_array);
> +	struct mask_array *ma = ovs_tbl_dereference(tbl->mask_array, tbl);
>  	int i, ma_count = READ_ONCE(ma->count);
>  
>  	/* Remove the deleted mask pointers from the array */
>  	for (i = 0; i < ma_count; i++) {
> -		if (mask == ovsl_dereference(ma->masks[i]))
> +		if (mask == ovs_tbl_dereference(ma->masks[i], tbl))
>  			goto found;
>  	}
>  
> @@ -329,10 +339,10 @@ static void tbl_mask_array_del_mask(struct flow_table *tbl,
>  static void flow_mask_remove(struct flow_table *tbl, struct sw_flow_mask *mask)
>  {
>  	if (mask) {
> -		/* ovs-lock is required to protect mask-refcount and
> +		/* table lock is required to protect mask-refcount and
>  		 * mask list.
>  		 */
> -		ASSERT_OVSL();
> +		ASSERT_OVS_TBL(tbl);
>  		BUG_ON(!mask->ref_count);
>  		mask->ref_count--;
>  
> @@ -386,7 +396,8 @@ static struct mask_cache *tbl_mask_cache_alloc(u32 size)
>  }
>  int ovs_flow_tbl_masks_cache_resize(struct flow_table *table, u32 size)
>  {
> -	struct mask_cache *mc = rcu_dereference_ovsl(table->mask_cache);
> +	struct mask_cache *mc = rcu_dereference_ovs_tbl(table->mask_cache,
> +							table);
>  	struct mask_cache *new;
>  
>  	if (size == mc->cache_size)
> @@ -406,15 +417,23 @@ int ovs_flow_tbl_masks_cache_resize(struct flow_table *table, u32 size)
>  	return 0;
>  }
>  
> -int ovs_flow_tbl_init(struct flow_table *table)
> +struct flow_table *ovs_flow_tbl_alloc(void)
>  {
>  	struct table_instance *ti, *ufid_ti;
> +	struct flow_table *table;
>  	struct mask_cache *mc;
>  	struct mask_array *ma;
>  
> +	table = kzalloc_obj(*table, GFP_KERNEL);
> +	if (!table)
> +		return ERR_PTR(-ENOMEM);
> +
> +	mutex_init(&table->lock);
> +	refcount_set(&table->refcnt, 1);
> +
>  	mc = tbl_mask_cache_alloc(MC_DEFAULT_HASH_ENTRIES);
>  	if (!mc)
> -		return -ENOMEM;
> +		goto free_table;
>  
>  	ma = tbl_mask_array_alloc(MASK_ARRAY_SIZE_MIN);
>  	if (!ma)
> @@ -435,7 +454,7 @@ int ovs_flow_tbl_init(struct flow_table *table)
>  	table->last_rehash = jiffies;
>  	table->count = 0;
>  	table->ufid_count = 0;
> -	return 0;
> +	return table;
>  
>  free_ti:
>  	__table_instance_destroy(ti);
> @@ -443,7 +462,10 @@ int ovs_flow_tbl_init(struct flow_table *table)
>  	__mask_array_destroy(ma);
>  free_mask_cache:
>  	__mask_cache_destroy(mc);
> -	return -ENOMEM;
> +free_table:
> +	mutex_destroy(&table->lock);
> +	kfree(table);
> +	return ERR_PTR(-ENOMEM);
>  }
>  
>  static void flow_tbl_destroy_rcu_cb(struct rcu_head *rcu)
> @@ -470,7 +492,7 @@ static void table_instance_flow_free(struct flow_table *table,
>  	flow_mask_remove(table, flow->mask);
>  }
>  
> -/* Must be called with OVS mutex held. */
> +/* Must be called with table mutex held. */
>  void table_instance_flow_flush(struct flow_table *table,
>  			       struct table_instance *ti,
>  			       struct table_instance *ufid_ti)
> @@ -505,11 +527,11 @@ static void table_instance_destroy(struct table_instance *ti,
>  	call_rcu(&ufid_ti->rcu, flow_tbl_destroy_rcu_cb);
>  }
>  
> -/* No need for locking this function is called from RCU callback or
> - * error path.
> - */
> -void ovs_flow_tbl_destroy(struct flow_table *table)
> +/* No need for locking this function is called from RCU callback. */
> +static void ovs_flow_tbl_destroy_rcu(struct rcu_head *rcu)
>  {
> +	struct flow_table *table = container_of(rcu, struct flow_table, rcu);
> +
>  	struct table_instance *ti = rcu_dereference_raw(table->ti);
>  	struct table_instance *ufid_ti = rcu_dereference_raw(table->ufid_ti);
>  	struct mask_cache *mc = rcu_dereference_raw(table->mask_cache);
> @@ -518,6 +540,20 @@ void ovs_flow_tbl_destroy(struct flow_table *table)
>  	call_rcu(&mc->rcu, mask_cache_rcu_cb);
>  	call_rcu(&ma->rcu, mask_array_rcu_cb);
>  	table_instance_destroy(ti, ufid_ti);
> +	mutex_destroy(&table->lock);
> +	kfree(table);
> +}
> +
> +void ovs_flow_tbl_put(struct flow_table *table)
> +{
> +	if (refcount_dec_and_test(&table->refcnt)) {
> +		mutex_lock(&table->lock);
> +		table_instance_flow_flush(table,
> +					  ovs_tbl_dereference(table->ti, table),
> +					  ovs_tbl_dereference(table->ufid_ti, table));
> +		mutex_unlock(&table->lock);
> +		call_rcu(&table->rcu, ovs_flow_tbl_destroy_rcu);
> +	}
>  }
>  
>  struct sw_flow *ovs_flow_tbl_dump_next(struct table_instance *ti,
> @@ -571,7 +607,8 @@ static void ufid_table_instance_insert(struct table_instance *ti,
>  	hlist_add_head_rcu(&flow->ufid_table.node[ti->node_ver], head);
>  }
>  
> -static void flow_table_copy_flows(struct table_instance *old,
> +static void flow_table_copy_flows(struct flow_table *table,
> +				  struct table_instance *old,
>  				  struct table_instance *new, bool ufid)
>  {
>  	int old_ver;
> @@ -588,17 +625,18 @@ static void flow_table_copy_flows(struct table_instance *old,
>  		if (ufid)
>  			hlist_for_each_entry_rcu(flow, head,
>  						 ufid_table.node[old_ver],
> -						 lockdep_ovsl_is_held())
> +						 lockdep_ovs_tbl_is_held(table))
>  				ufid_table_instance_insert(new, flow);
>  		else
>  			hlist_for_each_entry_rcu(flow, head,
>  						 flow_table.node[old_ver],
> -						 lockdep_ovsl_is_held())
> +						 lockdep_ovs_tbl_is_held(table))
>  				table_instance_insert(new, flow);
>  	}
>  }
>  
> -static struct table_instance *table_instance_rehash(struct table_instance *ti,
> +static struct table_instance *table_instance_rehash(struct flow_table *table,
> +						    struct table_instance *ti,
>  						    int n_buckets, bool ufid)
>  {
>  	struct table_instance *new_ti;
> @@ -607,16 +645,19 @@ static struct table_instance *table_instance_rehash(struct table_instance *ti,
>  	if (!new_ti)
>  		return NULL;
>  
> -	flow_table_copy_flows(ti, new_ti, ufid);
> +	flow_table_copy_flows(table, ti, new_ti, ufid);
>  
>  	return new_ti;
>  }
>  
> +/* Must be called with flow_table->lock held. */
>  int ovs_flow_tbl_flush(struct flow_table *flow_table)
>  {
>  	struct table_instance *old_ti, *new_ti;
>  	struct table_instance *old_ufid_ti, *new_ufid_ti;
>  
> +	ASSERT_OVS_TBL(flow_table);
> +
>  	new_ti = table_instance_alloc(TBL_MIN_BUCKETS);
>  	if (!new_ti)
>  		return -ENOMEM;
> @@ -624,8 +665,8 @@ int ovs_flow_tbl_flush(struct flow_table *flow_table)
>  	if (!new_ufid_ti)
>  		goto err_free_ti;
>  
> -	old_ti = ovsl_dereference(flow_table->ti);
> -	old_ufid_ti = ovsl_dereference(flow_table->ufid_ti);
> +	old_ti = ovs_tbl_dereference(flow_table->ti, flow_table);
> +	old_ufid_ti = ovs_tbl_dereference(flow_table->ufid_ti, flow_table);
>  
>  	rcu_assign_pointer(flow_table->ti, new_ti);
>  	rcu_assign_pointer(flow_table->ufid_ti, new_ufid_ti);
> @@ -693,7 +734,8 @@ static bool ovs_flow_cmp_unmasked_key(const struct sw_flow *flow,
>  	return cmp_key(flow->id.unmasked_key, key, key_start, key_end);
>  }
>  
> -static struct sw_flow *masked_flow_lookup(struct table_instance *ti,
> +static struct sw_flow *masked_flow_lookup(struct flow_table *tbl,
> +					  struct table_instance *ti,
>  					  const struct sw_flow_key *unmasked,
>  					  const struct sw_flow_mask *mask,
>  					  u32 *n_mask_hit)
> @@ -709,7 +751,7 @@ static struct sw_flow *masked_flow_lookup(struct table_instance *ti,
>  	(*n_mask_hit)++;
>  
>  	hlist_for_each_entry_rcu(flow, head, flow_table.node[ti->node_ver],
> -				 lockdep_ovsl_is_held()) {
> +				 lockdep_ovs_tbl_is_held(tbl)) {
>  		if (flow->mask == mask && flow->flow_table.hash == hash &&
>  		    flow_cmp_masked_key(flow, &masked_key, &mask->range))
>  			return flow;
> @@ -736,9 +778,9 @@ static struct sw_flow *flow_lookup(struct flow_table *tbl,
>  	int i;
>  
>  	if (likely(*index < ma->max)) {
> -		mask = rcu_dereference_ovsl(ma->masks[*index]);
> +		mask = rcu_dereference_ovs_tbl(ma->masks[*index], tbl);
>  		if (mask) {
> -			flow = masked_flow_lookup(ti, key, mask, n_mask_hit);
> +			flow = masked_flow_lookup(tbl, ti, key, mask, n_mask_hit);
>  			if (flow) {
>  				u64_stats_update_begin(&stats->syncp);
>  				stats->usage_cntrs[*index]++;
> @@ -754,11 +796,11 @@ static struct sw_flow *flow_lookup(struct flow_table *tbl,
>  		if (i == *index)
>  			continue;
>  
> -		mask = rcu_dereference_ovsl(ma->masks[i]);
> +		mask = rcu_dereference_ovs_tbl(ma->masks[i], tbl);
>  		if (unlikely(!mask))
>  			break;
>  
> -		flow = masked_flow_lookup(ti, key, mask, n_mask_hit);
> +		flow = masked_flow_lookup(tbl, ti, key, mask, n_mask_hit);
>  		if (flow) { /* Found */
>  			*index = i;
>  			u64_stats_update_begin(&stats->syncp);
> @@ -845,8 +887,8 @@ struct sw_flow *ovs_flow_tbl_lookup_stats(struct flow_table *tbl,
>  struct sw_flow *ovs_flow_tbl_lookup(struct flow_table *tbl,
>  				    const struct sw_flow_key *key)
>  {
> -	struct table_instance *ti = rcu_dereference_ovsl(tbl->ti);
> -	struct mask_array *ma = rcu_dereference_ovsl(tbl->mask_array);
> +	struct table_instance *ti = rcu_dereference_ovs_tbl(tbl->ti, tbl);
> +	struct mask_array *ma = rcu_dereference_ovs_tbl(tbl->mask_array, tbl);
>  	u32 __always_unused n_mask_hit;
>  	u32 __always_unused n_cache_hit;
>  	struct sw_flow *flow;
> @@ -865,21 +907,22 @@ struct sw_flow *ovs_flow_tbl_lookup(struct flow_table *tbl,
>  struct sw_flow *ovs_flow_tbl_lookup_exact(struct flow_table *tbl,
>  					  const struct sw_flow_match *match)
>  {
> -	struct mask_array *ma = ovsl_dereference(tbl->mask_array);
> +	struct mask_array *ma = ovs_tbl_dereference(tbl->mask_array, tbl);
>  	int i;
>  
> -	/* Always called under ovs-mutex. */
> +	/* Always called under tbl->lock. */
>  	for (i = 0; i < ma->max; i++) {
> -		struct table_instance *ti = rcu_dereference_ovsl(tbl->ti);
> +		struct table_instance *ti =
> +				rcu_dereference_ovs_tbl(tbl->ti, tbl);
>  		u32 __always_unused n_mask_hit;
>  		struct sw_flow_mask *mask;
>  		struct sw_flow *flow;
>  
> -		mask = ovsl_dereference(ma->masks[i]);
> +		mask = ovs_tbl_dereference(ma->masks[i], tbl);
>  		if (!mask)
>  			continue;
>  
> -		flow = masked_flow_lookup(ti, match->key, mask, &n_mask_hit);
> +		flow = masked_flow_lookup(tbl, ti, match->key, mask, &n_mask_hit);
>  		if (flow && ovs_identifier_is_key(&flow->id) &&
>  		    ovs_flow_cmp_unmasked_key(flow, match)) {
>  			return flow;
> @@ -915,7 +958,7 @@ bool ovs_flow_cmp(const struct sw_flow *flow,
>  struct sw_flow *ovs_flow_tbl_lookup_ufid(struct flow_table *tbl,
>  					 const struct sw_flow_id *ufid)
>  {
> -	struct table_instance *ti = rcu_dereference_ovsl(tbl->ufid_ti);
> +	struct table_instance *ti = rcu_dereference_ovs_tbl(tbl->ufid_ti, tbl);
>  	struct sw_flow *flow;
>  	struct hlist_head *head;
>  	u32 hash;
> @@ -923,7 +966,7 @@ struct sw_flow *ovs_flow_tbl_lookup_ufid(struct flow_table *tbl,
>  	hash = ufid_hash(ufid);
>  	head = find_bucket(ti, hash);
>  	hlist_for_each_entry_rcu(flow, head, ufid_table.node[ti->node_ver],
> -				 lockdep_ovsl_is_held()) {
> +				 lockdep_ovs_tbl_is_held(tbl)) {
>  		if (flow->ufid_table.hash == hash &&
>  		    ovs_flow_cmp_ufid(flow, ufid))
>  			return flow;
> @@ -933,28 +976,33 @@ struct sw_flow *ovs_flow_tbl_lookup_ufid(struct flow_table *tbl,
>  
>  int ovs_flow_tbl_num_masks(const struct flow_table *table)
>  {
> -	struct mask_array *ma = rcu_dereference_ovsl(table->mask_array);
> +	struct mask_array *ma = rcu_dereference_ovs_tbl(table->mask_array,
> +							table);
>  	return READ_ONCE(ma->count);
>  }
>  
>  u32 ovs_flow_tbl_masks_cache_size(const struct flow_table *table)
>  {
> -	struct mask_cache *mc = rcu_dereference_ovsl(table->mask_cache);
> +	struct mask_cache *mc = rcu_dereference_ovs_tbl(table->mask_cache,
> +							table);
>  
>  	return READ_ONCE(mc->cache_size);
>  }
>  
> -static struct table_instance *table_instance_expand(struct table_instance *ti,
> +static struct table_instance *table_instance_expand(struct flow_table *table,
> +						    struct table_instance *ti,
>  						    bool ufid)
>  {
> -	return table_instance_rehash(ti, ti->n_buckets * 2, ufid);
> +	return table_instance_rehash(table, ti, ti->n_buckets * 2, ufid);
>  }
>  
> -/* Must be called with OVS mutex held. */
> +/* Must be called with table mutex held. */
>  void ovs_flow_tbl_remove(struct flow_table *table, struct sw_flow *flow)
>  {
> -	struct table_instance *ti = ovsl_dereference(table->ti);
> -	struct table_instance *ufid_ti = ovsl_dereference(table->ufid_ti);
> +	struct table_instance *ti = ovs_tbl_dereference(table->ti,
> +							table);
> +	struct table_instance *ufid_ti = ovs_tbl_dereference(table->ufid_ti,
> +							     table);
>  
>  	BUG_ON(table->count == 0);
>  	table_instance_flow_free(table, ti, ufid_ti, flow);
> @@ -988,10 +1036,10 @@ static struct sw_flow_mask *flow_mask_find(const struct flow_table *tbl,
>  	struct mask_array *ma;
>  	int i;
>  
> -	ma = ovsl_dereference(tbl->mask_array);
> +	ma = ovs_tbl_dereference(tbl->mask_array, tbl);
>  	for (i = 0; i < ma->max; i++) {
>  		struct sw_flow_mask *t;
> -		t = ovsl_dereference(ma->masks[i]);
> +		t = ovs_tbl_dereference(ma->masks[i], tbl);
>  
>  		if (t && mask_equal(mask, t))
>  			return t;
> @@ -1029,22 +1077,25 @@ static int flow_mask_insert(struct flow_table *tbl, struct sw_flow *flow,
>  	return 0;
>  }
>  
> -/* Must be called with OVS mutex held. */
> +/* Must be called with table mutex held. */
>  static void flow_key_insert(struct flow_table *table, struct sw_flow *flow)
>  {
>  	struct table_instance *new_ti = NULL;
>  	struct table_instance *ti;
>  
> +	ASSERT_OVS_TBL(table);
> +
>  	flow->flow_table.hash = flow_hash(&flow->key, &flow->mask->range);
> -	ti = ovsl_dereference(table->ti);
> +	ti = ovs_tbl_dereference(table->ti, table);
>  	table_instance_insert(ti, flow);
>  	table->count++;
>  
>  	/* Expand table, if necessary, to make room. */
>  	if (table->count > ti->n_buckets)
> -		new_ti = table_instance_expand(ti, false);
> +		new_ti = table_instance_expand(table, ti, false);
>  	else if (time_after(jiffies, table->last_rehash + REHASH_INTERVAL))
> -		new_ti = table_instance_rehash(ti, ti->n_buckets, false);
> +		new_ti = table_instance_rehash(table, ti, ti->n_buckets,
> +					       false);
>  
>  	if (new_ti) {
>  		rcu_assign_pointer(table->ti, new_ti);
> @@ -1053,13 +1104,15 @@ static void flow_key_insert(struct flow_table *table, struct sw_flow *flow)
>  	}
>  }
>  
> -/* Must be called with OVS mutex held. */
> +/* Must be called with table mutex held. */
>  static void flow_ufid_insert(struct flow_table *table, struct sw_flow *flow)
>  {
>  	struct table_instance *ti;
>  
> +	ASSERT_OVS_TBL(table);
> +
>  	flow->ufid_table.hash = ufid_hash(&flow->id);
> -	ti = ovsl_dereference(table->ufid_ti);
> +	ti = ovs_tbl_dereference(table->ufid_ti, table);
>  	ufid_table_instance_insert(ti, flow);
>  	table->ufid_count++;
>  
> @@ -1067,7 +1120,7 @@ static void flow_ufid_insert(struct flow_table *table, struct sw_flow *flow)
>  	if (table->ufid_count > ti->n_buckets) {
>  		struct table_instance *new_ti;
>  
> -		new_ti = table_instance_expand(ti, true);
> +		new_ti = table_instance_expand(table, ti, true);
>  		if (new_ti) {
>  			rcu_assign_pointer(table->ufid_ti, new_ti);
>  			call_rcu(&ti->rcu, flow_tbl_destroy_rcu_cb);
> @@ -1075,12 +1128,14 @@ static void flow_ufid_insert(struct flow_table *table, struct sw_flow *flow)
>  	}
>  }
>  
> -/* Must be called with OVS mutex held. */
> +/* Must be called with table mutex held. */
>  int ovs_flow_tbl_insert(struct flow_table *table, struct sw_flow *flow,
>  			const struct sw_flow_mask *mask)
>  {
>  	int err;
>  
> +	ASSERT_OVS_TBL(table);
> +
>  	err = flow_mask_insert(table, flow, mask);
>  	if (err)
>  		return err;
> @@ -1099,10 +1154,11 @@ static int compare_mask_and_count(const void *a, const void *b)
>  	return (s64)mc_b->counter - (s64)mc_a->counter;
>  }
>  
> -/* Must be called with OVS mutex held. */
> +/* Must be called with table->lock held. */
>  void ovs_flow_masks_rebalance(struct flow_table *table)
>  {
> -	struct mask_array *ma = rcu_dereference_ovsl(table->mask_array);
> +	struct mask_array *ma = rcu_dereference_ovs_tbl(table->mask_array,
> +							table);
>  	struct mask_count *masks_and_count;
>  	struct mask_array *new;
>  	int masks_entries = 0;
> @@ -1117,7 +1173,7 @@ void ovs_flow_masks_rebalance(struct flow_table *table)
>  		struct sw_flow_mask *mask;
>  		int cpu;
>  
> -		mask = rcu_dereference_ovsl(ma->masks[i]);
> +		mask = rcu_dereference_ovs_tbl(ma->masks[i], table);
>  		if (unlikely(!mask))
>  			break;
>  
> @@ -1171,7 +1227,7 @@ void ovs_flow_masks_rebalance(struct flow_table *table)
>  	for (i = 0; i < masks_entries; i++) {
>  		int index = masks_and_count[i].index;
>  
> -		if (ovsl_dereference(ma->masks[index]))
> +		if (ovs_tbl_dereference(ma->masks[index], table))
>  			new->masks[new->count++] = ma->masks[index];
>  	}
>  
> diff --git a/net/openvswitch/flow_table.h b/net/openvswitch/flow_table.h
> index f524dc3e4862..cffd412c9045 100644
> --- a/net/openvswitch/flow_table.h
> +++ b/net/openvswitch/flow_table.h
> @@ -59,7 +59,29 @@ struct table_instance {
>  	u32 hash_seed;
>  };
>  
> +/* Locking:
> + *
> + * flow_table is _not_ protected by ovs_lock (see comment above ovs_mutex
> + * in datapath.c).
> + *
> + * All writes to flow_table are protected by the embedded "lock".
> + * In order to ensure datapath destruction does not trigger the destruction
> + * of the flow_table, "refcnt" is used. Therefore, writers must:
> + * 1 - Enter rcu read-protected section
> + * 2 - Increase "table->refcnt"
> + * 3 - Leave rcu read-protected section (to avoid using mutexes inside rcu)
> + * 4 - Lock "table->lock"
> + * 5 - Perform modifications
> + * 6 - Release "table->lock"
> + * 7 - Decrease "table->refcnt"
> + *
> + * Reads are protected by RCU.
> + */
>  struct flow_table {
> +	/* Locks flow table writes. */
> +	struct mutex lock;
> +	refcount_t refcnt;
> +	struct rcu_head rcu;
>  	struct table_instance __rcu *ti;
>  	struct table_instance __rcu *ufid_ti;
>  	struct mask_cache __rcu *mask_cache;
> @@ -71,15 +93,40 @@ struct flow_table {
>  
>  extern struct kmem_cache *flow_stats_cache;
>  
> +#ifdef CONFIG_LOCKDEP
> +int lockdep_ovs_tbl_is_held(const struct flow_table *table);
> +#else
> +static inline int lockdep_ovs_tbl_is_held(const struct flow_table *table)
> +{
> +	(void)table;
> +	return 1;
> +}
> +#endif
> +
> +#define ASSERT_OVS_TBL(tbl)   WARN_ON(!lockdep_ovs_tbl_is_held(tbl))
> +
> +/* Lock-protected update-allowed dereferences.*/
> +#define ovs_tbl_dereference(p, tbl)	\
> +	rcu_dereference_protected(p, lockdep_ovs_tbl_is_held(tbl))
> +
> +/* Read dereferences can be protected by either RCU, table lock or ovs_mutex. */
> +#define rcu_dereference_ovs_tbl(p, tbl) \
> +	rcu_dereference_check(p,		\
> +		lockdep_ovs_tbl_is_held(tbl) || lockdep_ovsl_is_held())
> +
>  int ovs_flow_init(void);
>  void ovs_flow_exit(void);
>  
>  struct sw_flow *ovs_flow_alloc(void);
>  void ovs_flow_free(struct sw_flow *, bool deferred);
>  
> -int ovs_flow_tbl_init(struct flow_table *);
> +struct flow_table *ovs_flow_tbl_alloc(void);
> +void ovs_flow_tbl_put(struct flow_table *table);
> +static inline bool ovs_flow_tbl_get(struct flow_table *table)
> +{
> +	return refcount_inc_not_zero(&table->refcnt);
> +}
>  int ovs_flow_tbl_count(const struct flow_table *table);
> -void ovs_flow_tbl_destroy(struct flow_table *table);
>  int ovs_flow_tbl_flush(struct flow_table *flow_table);
>  
>  int ovs_flow_tbl_insert(struct flow_table *table, struct sw_flow *flow,


^ permalink raw reply

* [PATCH v3 net-next 13/15] net/sched: sch_cake: annotate data-races in cake_dump_stats()
From: Eric Dumazet @ 2026-04-10 18:22 UTC (permalink / raw)
  To: David S . Miller, Jakub Kicinski, Paolo Abeni
  Cc: Simon Horman, Jamal Hadi Salim, Jiri Pirko, netdev, eric.dumazet,
	Eric Dumazet, Toke Høiland-Jørgensen
In-Reply-To: <20260410182257.774311-1-edumazet@google.com>

cake_dump_stats() and cake_dump_class_stats() run without qdisc
spinlock being held.

Add READ_ONCE()/WRITE_ONCE() annotations.

Fixes: 046f6fd5daef ("sched: Add Common Applications Kept Enhanced (cake) qdisc")
Signed-off-by: Eric Dumazet <edumazet@google.com>
Cc: "Toke Høiland-Jørgensen" <toke@toke.dk>
---
 net/sched/sch_cake.c | 404 ++++++++++++++++++++++++-------------------
 1 file changed, 225 insertions(+), 179 deletions(-)

diff --git a/net/sched/sch_cake.c b/net/sched/sch_cake.c
index 32e672820c00a88c6d8fe77a6308405e016525ea..f523f0aa4d830e9d3ec4d43bb123e1dc4f8f289d 100644
--- a/net/sched/sch_cake.c
+++ b/net/sched/sch_cake.c
@@ -399,14 +399,14 @@ static void cake_configure_rates(struct Qdisc *sch, u64 rate, bool rate_adjust);
  * Here, invsqrt is a fixed point number (< 1.0), 32bit mantissa, aka Q0.32
  */
 
-static void cobalt_newton_step(struct cobalt_vars *vars)
+static void cobalt_newton_step(struct cobalt_vars *vars, u32 count)
 {
 	u32 invsqrt, invsqrt2;
 	u64 val;
 
 	invsqrt = vars->rec_inv_sqrt;
 	invsqrt2 = ((u64)invsqrt * invsqrt) >> 32;
-	val = (3LL << 32) - ((u64)vars->count * invsqrt2);
+	val = (3LL << 32) - ((u64)count * invsqrt2);
 
 	val >>= 2; /* avoid overflow in following multiply */
 	val = (val * invsqrt) >> (32 - 2 + 1);
@@ -414,12 +414,12 @@ static void cobalt_newton_step(struct cobalt_vars *vars)
 	vars->rec_inv_sqrt = val;
 }
 
-static void cobalt_invsqrt(struct cobalt_vars *vars)
+static void cobalt_invsqrt(struct cobalt_vars *vars, u32 count)
 {
-	if (vars->count < REC_INV_SQRT_CACHE)
-		vars->rec_inv_sqrt = inv_sqrt_cache[vars->count];
+	if (count < REC_INV_SQRT_CACHE)
+		vars->rec_inv_sqrt = inv_sqrt_cache[count];
 	else
-		cobalt_newton_step(vars);
+		cobalt_newton_step(vars, count);
 }
 
 static void cobalt_vars_init(struct cobalt_vars *vars)
@@ -449,16 +449,19 @@ static bool cobalt_queue_full(struct cobalt_vars *vars,
 	bool up = false;
 
 	if (ktime_to_ns(ktime_sub(now, vars->blue_timer)) > p->target) {
-		up = !vars->p_drop;
-		vars->p_drop += p->p_inc;
-		if (vars->p_drop < p->p_inc)
-			vars->p_drop = ~0;
-		vars->blue_timer = now;
-	}
-	vars->dropping = true;
-	vars->drop_next = now;
+		u32 p_drop = vars->p_drop;
+
+		up = !p_drop;
+		p_drop += p->p_inc;
+		if (p_drop < p->p_inc)
+			p_drop = ~0;
+		WRITE_ONCE(vars->p_drop, p_drop);
+		WRITE_ONCE(vars->blue_timer, now);
+	}
+	WRITE_ONCE(vars->dropping, true);
+	WRITE_ONCE(vars->drop_next, now);
 	if (!vars->count)
-		vars->count = 1;
+		WRITE_ONCE(vars->count, 1);
 
 	return up;
 }
@@ -474,21 +477,25 @@ static bool cobalt_queue_empty(struct cobalt_vars *vars,
 
 	if (vars->p_drop &&
 	    ktime_to_ns(ktime_sub(now, vars->blue_timer)) > p->target) {
-		if (vars->p_drop < p->p_dec)
-			vars->p_drop = 0;
+		u32 p_drop = vars->p_drop;
+
+		if (p_drop < p->p_dec)
+			p_drop = 0;
 		else
-			vars->p_drop -= p->p_dec;
-		vars->blue_timer = now;
-		down = !vars->p_drop;
+			p_drop -= p->p_dec;
+		WRITE_ONCE(vars->p_drop, p_drop);
+		WRITE_ONCE(vars->blue_timer, now);
+		down = !p_drop;
 	}
-	vars->dropping = false;
+	WRITE_ONCE(vars->dropping, false);
 
 	if (vars->count && ktime_to_ns(ktime_sub(now, vars->drop_next)) >= 0) {
-		vars->count--;
-		cobalt_invsqrt(vars);
-		vars->drop_next = cobalt_control(vars->drop_next,
-						 p->interval,
-						 vars->rec_inv_sqrt);
+		WRITE_ONCE(vars->count, vars->count - 1);
+		cobalt_invsqrt(vars, vars->count);
+		WRITE_ONCE(vars->drop_next,
+			   cobalt_control(vars->drop_next,
+					  p->interval,
+					  vars->rec_inv_sqrt));
 	}
 
 	return down;
@@ -507,6 +514,7 @@ static enum qdisc_drop_reason cobalt_should_drop(struct cobalt_vars *vars,
 	bool next_due, over_target;
 	ktime_t schedule;
 	u64 sojourn;
+	u32 count;
 
 /* The 'schedule' variable records, in its sign, whether 'now' is before or
  * after 'drop_next'.  This allows 'drop_next' to be updated before the next
@@ -528,45 +536,50 @@ static enum qdisc_drop_reason cobalt_should_drop(struct cobalt_vars *vars,
 	over_target = sojourn > p->target &&
 		      sojourn > p->mtu_time * bulk_flows * 2 &&
 		      sojourn > p->mtu_time * 4;
-	next_due = vars->count && ktime_to_ns(schedule) >= 0;
+	count = vars->count;
+	next_due = count && ktime_to_ns(schedule) >= 0;
 
 	vars->ecn_marked = false;
 
 	if (over_target) {
 		if (!vars->dropping) {
-			vars->dropping = true;
-			vars->drop_next = cobalt_control(now,
-							 p->interval,
-							 vars->rec_inv_sqrt);
+			WRITE_ONCE(vars->dropping, true);
+			WRITE_ONCE(vars->drop_next,
+				   cobalt_control(now,
+						  p->interval,
+						  vars->rec_inv_sqrt));
 		}
-		if (!vars->count)
-			vars->count = 1;
+		if (!count)
+			count = 1;
 	} else if (vars->dropping) {
-		vars->dropping = false;
+		WRITE_ONCE(vars->dropping, false);
 	}
 
 	if (next_due && vars->dropping) {
 		/* Use ECN mark if possible, otherwise drop */
-		if (!(vars->ecn_marked = INET_ECN_set_ce(skb)))
+		vars->ecn_marked = INET_ECN_set_ce(skb);
+		if (!vars->ecn_marked)
 			reason = QDISC_DROP_CONGESTED;
 
-		vars->count++;
-		if (!vars->count)
-			vars->count--;
-		cobalt_invsqrt(vars);
-		vars->drop_next = cobalt_control(vars->drop_next,
-						 p->interval,
-						 vars->rec_inv_sqrt);
+		count++;
+		if (!count)
+			count--;
+		cobalt_invsqrt(vars, count);
+		WRITE_ONCE(vars->drop_next,
+			   cobalt_control(vars->drop_next,
+					  p->interval,
+					  vars->rec_inv_sqrt));
 		schedule = ktime_sub(now, vars->drop_next);
 	} else {
 		while (next_due) {
-			vars->count--;
-			cobalt_invsqrt(vars);
-			vars->drop_next = cobalt_control(vars->drop_next,
-							 p->interval,
-							 vars->rec_inv_sqrt);
+			count--;
+			cobalt_invsqrt(vars, count);
+			WRITE_ONCE(vars->drop_next,
+				   cobalt_control(vars->drop_next,
+						  p->interval,
+						  vars->rec_inv_sqrt));
 			schedule = ktime_sub(now, vars->drop_next);
-			next_due = vars->count && ktime_to_ns(schedule) >= 0;
+			next_due = count && ktime_to_ns(schedule) >= 0;
 		}
 	}
 
@@ -575,11 +588,12 @@ static enum qdisc_drop_reason cobalt_should_drop(struct cobalt_vars *vars,
 	    get_random_u32() < vars->p_drop)
 		reason = QDISC_DROP_FLOOD_PROTECTION;
 
+	WRITE_ONCE(vars->count, count);
 	/* Overload the drop_next field as an activity timeout */
-	if (!vars->count)
-		vars->drop_next = ktime_add_ns(now, p->interval);
+	if (count)
+		WRITE_ONCE(vars->drop_next, ktime_add_ns(now, p->interval));
 	else if (ktime_to_ns(schedule) > 0 && reason == QDISC_DROP_UNSPEC)
-		vars->drop_next = now;
+		WRITE_ONCE(vars->drop_next, now);
 
 	return reason;
 }
@@ -813,7 +827,7 @@ static u32 cake_hash(struct cake_tin_data *q, const struct sk_buff *skb,
 		     i++, k = (k + 1) % CAKE_SET_WAYS) {
 			if (q->tags[outer_hash + k] == flow_hash) {
 				if (i)
-					q->way_hits++;
+					WRITE_ONCE(q->way_hits, q->way_hits + 1);
 
 				if (!q->flows[outer_hash + k].set) {
 					/* need to increment host refcnts */
@@ -831,7 +845,7 @@ static u32 cake_hash(struct cake_tin_data *q, const struct sk_buff *skb,
 		for (i = 0; i < CAKE_SET_WAYS;
 			 i++, k = (k + 1) % CAKE_SET_WAYS) {
 			if (!q->flows[outer_hash + k].set) {
-				q->way_misses++;
+				WRITE_ONCE(q->way_misses, q->way_misses + 1);
 				allocate_src = cake_dsrc(flow_mode);
 				allocate_dst = cake_ddst(flow_mode);
 				goto found;
@@ -841,7 +855,7 @@ static u32 cake_hash(struct cake_tin_data *q, const struct sk_buff *skb,
 		/* With no empty queues, default to the original
 		 * queue, accept the collision, update the host tags.
 		 */
-		q->way_collisions++;
+		WRITE_ONCE(q->way_collisions, q->way_collisions + 1);
 		allocate_src = cake_dsrc(flow_mode);
 		allocate_dst = cake_ddst(flow_mode);
 
@@ -875,7 +889,8 @@ static u32 cake_hash(struct cake_tin_data *q, const struct sk_buff *skb,
 			q->flows[reduced_hash].srchost = srchost_idx;
 
 			if (q->flows[reduced_hash].set == CAKE_SET_BULK)
-				cake_inc_srchost_bulk_flow_count(q, &q->flows[reduced_hash], flow_mode);
+				cake_inc_srchost_bulk_flow_count(q, &q->flows[reduced_hash],
+								 flow_mode);
 		}
 
 		if (allocate_dst) {
@@ -899,7 +914,8 @@ static u32 cake_hash(struct cake_tin_data *q, const struct sk_buff *skb,
 			q->flows[reduced_hash].dsthost = dsthost_idx;
 
 			if (q->flows[reduced_hash].set == CAKE_SET_BULK)
-				cake_inc_dsthost_bulk_flow_count(q, &q->flows[reduced_hash], flow_mode);
+				cake_inc_dsthost_bulk_flow_count(q, &q->flows[reduced_hash],
+								 flow_mode);
 		}
 	}
 
@@ -1379,9 +1395,9 @@ static u32 cake_calc_overhead(struct cake_sched_data *qd, u32 len, u32 off)
 		len -= off;
 
 	if (qd->max_netlen < len)
-		qd->max_netlen = len;
+		WRITE_ONCE(qd->max_netlen, len);
 	if (qd->min_netlen > len)
-		qd->min_netlen = len;
+		WRITE_ONCE(qd->min_netlen, len);
 
 	len += q->rate_overhead;
 
@@ -1401,9 +1417,9 @@ static u32 cake_calc_overhead(struct cake_sched_data *qd, u32 len, u32 off)
 	}
 
 	if (qd->max_adjlen < len)
-		qd->max_adjlen = len;
+		WRITE_ONCE(qd->max_adjlen, len);
 	if (qd->min_adjlen > len)
-		qd->min_adjlen = len;
+		WRITE_ONCE(qd->min_adjlen, len);
 
 	return len;
 }
@@ -1416,7 +1432,7 @@ static u32 cake_overhead(struct cake_sched_data *q, const struct sk_buff *skb)
 	u16 segs = qdisc_pkt_segs(skb);
 	u32 len = qdisc_pkt_len(skb);
 
-	q->avg_netoff = cake_ewma(q->avg_netoff, off << 16, 8);
+	WRITE_ONCE(q->avg_netoff, cake_ewma(q->avg_netoff, off << 16, 8));
 
 	if (segs == 1)
 		return cake_calc_overhead(q, len, off);
@@ -1590,16 +1606,17 @@ static unsigned int cake_drop(struct Qdisc *sch, struct sk_buff **to_free)
 	}
 
 	if (cobalt_queue_full(&flow->cvars, &b->cparams, now))
-		b->unresponsive_flow_count++;
+		WRITE_ONCE(b->unresponsive_flow_count,
+			   b->unresponsive_flow_count + 1);
 
 	len = qdisc_pkt_len(skb);
 	q->buffer_used      -= skb->truesize;
-	b->backlogs[idx]    -= len;
-	b->tin_backlog      -= len;
+	WRITE_ONCE(b->backlogs[idx], b->backlogs[idx] - len);
+	WRITE_ONCE(b->tin_backlog, b->tin_backlog - len);
 	qstats_backlog_sub(sch, len);
 
-	flow->dropped++;
-	b->tin_dropped++;
+	WRITE_ONCE(flow->dropped, flow->dropped + 1);
+	WRITE_ONCE(b->tin_dropped, b->tin_dropped + 1);
 
 	if (q->config->rate_flags & CAKE_FLAG_INGRESS)
 		cake_advance_shaper(q, b, skb, now, true);
@@ -1795,7 +1812,7 @@ static s32 cake_enqueue(struct sk_buff *skb, struct Qdisc *sch,
 	}
 
 	if (unlikely(len > b->max_skblen))
-		b->max_skblen = len;
+		WRITE_ONCE(b->max_skblen, len);
 
 	if (qdisc_pkt_segs(skb) > 1 && q->config->rate_flags & CAKE_FLAG_SPLIT_GSO) {
 		struct sk_buff *segs, *nskb;
@@ -1819,13 +1836,13 @@ static s32 cake_enqueue(struct sk_buff *skb, struct Qdisc *sch,
 			numsegs++;
 			slen += segs->len;
 			q->buffer_used += segs->truesize;
-			b->packets++;
 		}
 
 		/* stats */
-		b->bytes	    += slen;
-		b->backlogs[idx]    += slen;
-		b->tin_backlog      += slen;
+		WRITE_ONCE(b->bytes, b->bytes + slen);
+		WRITE_ONCE(b->packets, b->packets + numsegs);
+		WRITE_ONCE(b->backlogs[idx], b->backlogs[idx] + slen);
+		WRITE_ONCE(b->tin_backlog, b->tin_backlog + slen);
 		qstats_backlog_add(sch, slen);
 		q->avg_window_bytes += slen;
 
@@ -1843,10 +1860,10 @@ static s32 cake_enqueue(struct sk_buff *skb, struct Qdisc *sch,
 			ack = cake_ack_filter(q, flow);
 
 		if (ack) {
-			b->ack_drops++;
+			WRITE_ONCE(b->ack_drops, b->ack_drops + 1);
 			qdisc_qstats_drop(sch);
 			ack_pkt_len = qdisc_pkt_len(ack);
-			b->bytes += ack_pkt_len;
+			WRITE_ONCE(b->bytes, b->bytes + ack_pkt_len);
 			q->buffer_used += skb->truesize - ack->truesize;
 			if (q->config->rate_flags & CAKE_FLAG_INGRESS)
 				cake_advance_shaper(q, b, ack, now, true);
@@ -1859,10 +1876,10 @@ static s32 cake_enqueue(struct sk_buff *skb, struct Qdisc *sch,
 		}
 
 		/* stats */
-		b->packets++;
-		b->bytes	    += len - ack_pkt_len;
-		b->backlogs[idx]    += len - ack_pkt_len;
-		b->tin_backlog      += len - ack_pkt_len;
+		WRITE_ONCE(b->packets, b->packets + 1);
+		WRITE_ONCE(b->bytes, b->bytes + len - ack_pkt_len);
+		WRITE_ONCE(b->backlogs[idx], b->backlogs[idx] + len - ack_pkt_len);
+		WRITE_ONCE(b->tin_backlog, b->tin_backlog + len - ack_pkt_len);
 		qstats_backlog_add(sch, len - ack_pkt_len);
 		q->avg_window_bytes += len - ack_pkt_len;
 	}
@@ -1894,9 +1911,9 @@ static s32 cake_enqueue(struct sk_buff *skb, struct Qdisc *sch,
 			u64 b = q->avg_window_bytes * (u64)NSEC_PER_SEC;
 
 			b = div64_u64(b, window_interval);
-			q->avg_peak_bandwidth =
-				cake_ewma(q->avg_peak_bandwidth, b,
-					  b > q->avg_peak_bandwidth ? 2 : 8);
+			WRITE_ONCE(q->avg_peak_bandwidth,
+				   cake_ewma(q->avg_peak_bandwidth, b,
+					     b > q->avg_peak_bandwidth ? 2 : 8));
 			q->avg_window_bytes = 0;
 			q->avg_window_begin = now;
 
@@ -1917,27 +1934,30 @@ static s32 cake_enqueue(struct sk_buff *skb, struct Qdisc *sch,
 		if (!flow->set) {
 			list_add_tail(&flow->flowchain, &b->new_flows);
 		} else {
-			b->decaying_flow_count--;
+			WRITE_ONCE(b->decaying_flow_count,
+				   b->decaying_flow_count - 1);
 			list_move_tail(&flow->flowchain, &b->new_flows);
 		}
 		flow->set = CAKE_SET_SPARSE;
-		b->sparse_flow_count++;
+		WRITE_ONCE(b->sparse_flow_count,
+			   b->sparse_flow_count + 1);
 
-		flow->deficit = cake_get_flow_quantum(b, flow, q->config->flow_mode);
+		WRITE_ONCE(flow->deficit,
+			   cake_get_flow_quantum(b, flow, q->config->flow_mode));
 	} else if (flow->set == CAKE_SET_SPARSE_WAIT) {
 		/* this flow was empty, accounted as a sparse flow, but actually
 		 * in the bulk rotation.
 		 */
 		flow->set = CAKE_SET_BULK;
-		b->sparse_flow_count--;
-		b->bulk_flow_count++;
+		WRITE_ONCE(b->sparse_flow_count, b->sparse_flow_count - 1);
+		WRITE_ONCE(b->bulk_flow_count, b->bulk_flow_count + 1);
 
 		cake_inc_srchost_bulk_flow_count(b, flow, q->config->flow_mode);
 		cake_inc_dsthost_bulk_flow_count(b, flow, q->config->flow_mode);
 	}
 
 	if (q->buffer_used > q->buffer_max_used)
-		q->buffer_max_used = q->buffer_used;
+		WRITE_ONCE(q->buffer_max_used, q->buffer_used);
 
 	if (q->buffer_used <= q->buffer_limit)
 		return NET_XMIT_SUCCESS;
@@ -1976,8 +1996,8 @@ static struct sk_buff *cake_dequeue_one(struct Qdisc *sch)
 	if (flow->head) {
 		skb = dequeue_head(flow);
 		len = qdisc_pkt_len(skb);
-		b->backlogs[q->cur_flow] -= len;
-		b->tin_backlog		 -= len;
+		WRITE_ONCE(b->backlogs[q->cur_flow], b->backlogs[q->cur_flow] - len);
+		WRITE_ONCE(b->tin_backlog, b->tin_backlog - len);
 		qstats_backlog_sub(sch, len);
 		q->buffer_used		 -= skb->truesize;
 		qdisc_qlen_dec(sch);
@@ -2042,7 +2062,7 @@ static struct sk_buff *cake_dequeue(struct Qdisc *sch)
 
 		cake_configure_rates(sch, new_rate, true);
 		q->last_checked_active = now;
-		q->active_queues = num_active_qs;
+		WRITE_ONCE(q->active_queues, num_active_qs);
 	}
 
 begin:
@@ -2149,8 +2169,10 @@ static struct sk_buff *cake_dequeue(struct Qdisc *sch)
 		 */
 		if (flow->set == CAKE_SET_SPARSE) {
 			if (flow->head) {
-				b->sparse_flow_count--;
-				b->bulk_flow_count++;
+				WRITE_ONCE(b->sparse_flow_count,
+					   b->sparse_flow_count - 1);
+				WRITE_ONCE(b->bulk_flow_count,
+					   b->bulk_flow_count + 1);
 
 				cake_inc_srchost_bulk_flow_count(b, flow, q->config->flow_mode);
 				cake_inc_dsthost_bulk_flow_count(b, flow, q->config->flow_mode);
@@ -2165,7 +2187,8 @@ static struct sk_buff *cake_dequeue(struct Qdisc *sch)
 			}
 		}
 
-		flow->deficit += cake_get_flow_quantum(b, flow, q->config->flow_mode);
+		WRITE_ONCE(flow->deficit,
+			   flow->deficit + cake_get_flow_quantum(b, flow, q->config->flow_mode));
 		list_move_tail(&flow->flowchain, &b->old_flows);
 
 		goto retry;
@@ -2177,7 +2200,8 @@ static struct sk_buff *cake_dequeue(struct Qdisc *sch)
 		if (!skb) {
 			/* this queue was actually empty */
 			if (cobalt_queue_empty(&flow->cvars, &b->cparams, now))
-				b->unresponsive_flow_count--;
+				WRITE_ONCE(b->unresponsive_flow_count,
+					   b->unresponsive_flow_count - 1);
 
 			if (flow->cvars.p_drop || flow->cvars.count ||
 			    ktime_before(now, flow->cvars.drop_next)) {
@@ -2187,16 +2211,22 @@ static struct sk_buff *cake_dequeue(struct Qdisc *sch)
 				list_move_tail(&flow->flowchain,
 					       &b->decaying_flows);
 				if (flow->set == CAKE_SET_BULK) {
-					b->bulk_flow_count--;
+					WRITE_ONCE(b->bulk_flow_count,
+						   b->bulk_flow_count - 1);
 
-					cake_dec_srchost_bulk_flow_count(b, flow, q->config->flow_mode);
-					cake_dec_dsthost_bulk_flow_count(b, flow, q->config->flow_mode);
+					cake_dec_srchost_bulk_flow_count(b, flow,
+									 q->config->flow_mode);
+					cake_dec_dsthost_bulk_flow_count(b, flow,
+									 q->config->flow_mode);
 
-					b->decaying_flow_count++;
+					WRITE_ONCE(b->decaying_flow_count,
+						   b->decaying_flow_count + 1);
 				} else if (flow->set == CAKE_SET_SPARSE ||
 					   flow->set == CAKE_SET_SPARSE_WAIT) {
-					b->sparse_flow_count--;
-					b->decaying_flow_count++;
+					WRITE_ONCE(b->sparse_flow_count,
+						   b->sparse_flow_count - 1);
+					WRITE_ONCE(b->decaying_flow_count,
+						   b->decaying_flow_count + 1);
 				}
 				flow->set = CAKE_SET_DECAYING;
 			} else {
@@ -2204,14 +2234,20 @@ static struct sk_buff *cake_dequeue(struct Qdisc *sch)
 				list_del_init(&flow->flowchain);
 				if (flow->set == CAKE_SET_SPARSE ||
 				    flow->set == CAKE_SET_SPARSE_WAIT)
-					b->sparse_flow_count--;
+					WRITE_ONCE(b->sparse_flow_count,
+						   b->sparse_flow_count - 1);
 				else if (flow->set == CAKE_SET_BULK) {
-					b->bulk_flow_count--;
+					WRITE_ONCE(b->bulk_flow_count,
+						   b->bulk_flow_count - 1);
 
-					cake_dec_srchost_bulk_flow_count(b, flow, q->config->flow_mode);
-					cake_dec_dsthost_bulk_flow_count(b, flow, q->config->flow_mode);
-				} else
-					b->decaying_flow_count--;
+					cake_dec_srchost_bulk_flow_count(b, flow,
+									 q->config->flow_mode);
+					cake_dec_dsthost_bulk_flow_count(b, flow,
+									 q->config->flow_mode);
+				} else {
+					WRITE_ONCE(b->decaying_flow_count,
+						   b->decaying_flow_count - 1);
+				}
 
 				flow->set = CAKE_SET_NONE;
 			}
@@ -2230,11 +2266,11 @@ static struct sk_buff *cake_dequeue(struct Qdisc *sch)
 		if (q->config->rate_flags & CAKE_FLAG_INGRESS) {
 			len = cake_advance_shaper(q, b, skb,
 						  now, true);
-			flow->deficit -= len;
+			WRITE_ONCE(flow->deficit, flow->deficit - len);
 			b->tin_deficit -= len;
 		}
-		flow->dropped++;
-		b->tin_dropped++;
+		WRITE_ONCE(flow->dropped, flow->dropped + 1);
+		WRITE_ONCE(b->tin_dropped, b->tin_dropped + 1);
 		qdisc_tree_reduce_backlog(sch, 1, qdisc_pkt_len(skb));
 		qdisc_qstats_drop(sch);
 		qdisc_dequeue_drop(sch, skb, reason);
@@ -2242,20 +2278,22 @@ static struct sk_buff *cake_dequeue(struct Qdisc *sch)
 			goto retry;
 	}
 
-	b->tin_ecn_mark += !!flow->cvars.ecn_marked;
+	WRITE_ONCE(b->tin_ecn_mark, b->tin_ecn_mark + !!flow->cvars.ecn_marked);
 	qdisc_bstats_update(sch, skb);
 	WRITE_ONCE(q->last_active, now);
 
 	/* collect delay stats */
 	delay = ktime_to_ns(ktime_sub(now, cobalt_get_enqueue_time(skb)));
-	b->avge_delay = cake_ewma(b->avge_delay, delay, 8);
-	b->peak_delay = cake_ewma(b->peak_delay, delay,
-				  delay > b->peak_delay ? 2 : 8);
-	b->base_delay = cake_ewma(b->base_delay, delay,
-				  delay < b->base_delay ? 2 : 8);
+	WRITE_ONCE(b->avge_delay, cake_ewma(b->avge_delay, delay, 8));
+	WRITE_ONCE(b->peak_delay,
+		   cake_ewma(b->peak_delay, delay,
+			     delay > b->peak_delay ? 2 : 8));
+	WRITE_ONCE(b->base_delay,
+		   cake_ewma(b->base_delay, delay,
+			     delay < b->base_delay ? 2 : 8));
 
 	len = cake_advance_shaper(q, b, skb, now, false);
-	flow->deficit -= len;
+	WRITE_ONCE(flow->deficit, flow->deficit - len);
 	b->tin_deficit -= len;
 
 	if (ktime_after(q->time_next_packet, now) && sch->q.qlen) {
@@ -2329,9 +2367,8 @@ static void cake_set_rate(struct cake_tin_data *b, u64 rate, u32 mtu,
 	u8  rate_shft = 0;
 	u64 rate_ns = 0;
 
-	b->flow_quantum = 1514;
 	if (rate) {
-		b->flow_quantum = max(min(rate >> 12, 1514ULL), 300ULL);
+		WRITE_ONCE(b->flow_quantum, max(min(rate >> 12, 1514ULL), 300ULL));
 		rate_shft = 34;
 		rate_ns = ((u64)NSEC_PER_SEC) << rate_shft;
 		rate_ns = div64_u64(rate_ns, max(MIN_RATE, rate));
@@ -2339,9 +2376,11 @@ static void cake_set_rate(struct cake_tin_data *b, u64 rate, u32 mtu,
 			rate_ns >>= 1;
 			rate_shft--;
 		}
-	} /* else unlimited, ie. zero delay */
-
-	b->tin_rate_bps  = rate;
+	} else {
+		/* else unlimited, ie. zero delay */
+		WRITE_ONCE(b->flow_quantum, 1514);
+	}
+	WRITE_ONCE(b->tin_rate_bps, rate);
 	b->tin_rate_ns   = rate_ns;
 	b->tin_rate_shft = rate_shft;
 
@@ -2350,10 +2389,11 @@ static void cake_set_rate(struct cake_tin_data *b, u64 rate, u32 mtu,
 
 	byte_target_ns = (byte_target * rate_ns) >> rate_shft;
 
-	b->cparams.target = max((byte_target_ns * 3) / 2, target_ns);
-	b->cparams.interval = max(rtt_est_ns +
-				     b->cparams.target - target_ns,
-				     b->cparams.target * 2);
+	WRITE_ONCE(b->cparams.target,
+		   max((byte_target_ns * 3) / 2, target_ns));
+	WRITE_ONCE(b->cparams.interval,
+		   max(rtt_est_ns + b->cparams.target - target_ns,
+		       b->cparams.target * 2));
 	b->cparams.mtu_time = byte_target_ns;
 	b->cparams.p_inc = 1 << 24; /* 1/256 */
 	b->cparams.p_dec = 1 << 20; /* 1/4096 */
@@ -2611,25 +2651,27 @@ static void cake_reconfigure(struct Qdisc *sch)
 {
 	struct cake_sched_data *qd = qdisc_priv(sch);
 	struct cake_sched_config *q = qd->config;
+	u32 buffer_limit;
 
 	cake_configure_rates(sch, qd->config->rate_bps, false);
 
 	if (q->buffer_config_limit) {
-		qd->buffer_limit = q->buffer_config_limit;
+		buffer_limit = q->buffer_config_limit;
 	} else if (q->rate_bps) {
 		u64 t = q->rate_bps * q->interval;
 
 		do_div(t, USEC_PER_SEC / 4);
-		qd->buffer_limit = max_t(u32, t, 4U << 20);
+		buffer_limit = max_t(u32, t, 4U << 20);
 	} else {
-		qd->buffer_limit = ~0;
+		buffer_limit = ~0;
 	}
 
 	sch->flags &= ~TCQ_F_CAN_BYPASS;
 
-	qd->buffer_limit = min(qd->buffer_limit,
-			       max(sch->limit * psched_mtu(qdisc_dev(sch)),
-				   q->buffer_config_limit));
+	WRITE_ONCE(qd->buffer_limit,
+		   min(buffer_limit,
+		       max(sch->limit * psched_mtu(qdisc_dev(sch)),
+			   q->buffer_config_limit)));
 }
 
 static int cake_config_change(struct cake_sched_config *q, struct nlattr *opt,
@@ -2774,10 +2816,10 @@ static int cake_change(struct Qdisc *sch, struct nlattr *opt,
 		return ret;
 
 	if (overhead_changed) {
-		qd->max_netlen = 0;
-		qd->max_adjlen = 0;
-		qd->min_netlen = ~0;
-		qd->min_adjlen = ~0;
+		WRITE_ONCE(qd->max_netlen, 0);
+		WRITE_ONCE(qd->max_adjlen, 0);
+		WRITE_ONCE(qd->min_netlen, ~0);
+		WRITE_ONCE(qd->min_adjlen, ~0);
 	}
 
 	if (qd->tins) {
@@ -2995,15 +3037,15 @@ static int cake_dump_stats(struct Qdisc *sch, struct gnet_dump *d)
 			goto nla_put_failure;			       \
 	} while (0)
 
-	PUT_STAT_U64(CAPACITY_ESTIMATE64, q->avg_peak_bandwidth);
-	PUT_STAT_U32(MEMORY_LIMIT, q->buffer_limit);
-	PUT_STAT_U32(MEMORY_USED, q->buffer_max_used);
-	PUT_STAT_U32(AVG_NETOFF, ((q->avg_netoff + 0x8000) >> 16));
-	PUT_STAT_U32(MAX_NETLEN, q->max_netlen);
-	PUT_STAT_U32(MAX_ADJLEN, q->max_adjlen);
-	PUT_STAT_U32(MIN_NETLEN, q->min_netlen);
-	PUT_STAT_U32(MIN_ADJLEN, q->min_adjlen);
-	PUT_STAT_U32(ACTIVE_QUEUES, q->active_queues);
+	PUT_STAT_U64(CAPACITY_ESTIMATE64, READ_ONCE(q->avg_peak_bandwidth));
+	PUT_STAT_U32(MEMORY_LIMIT, READ_ONCE(q->buffer_limit));
+	PUT_STAT_U32(MEMORY_USED, READ_ONCE(q->buffer_max_used));
+	PUT_STAT_U32(AVG_NETOFF, ((READ_ONCE(q->avg_netoff) + 0x8000) >> 16));
+	PUT_STAT_U32(MAX_NETLEN, READ_ONCE(q->max_netlen));
+	PUT_STAT_U32(MAX_ADJLEN, READ_ONCE(q->max_adjlen));
+	PUT_STAT_U32(MIN_NETLEN, READ_ONCE(q->min_netlen));
+	PUT_STAT_U32(MIN_ADJLEN, READ_ONCE(q->min_adjlen));
+	PUT_STAT_U32(ACTIVE_QUEUES, READ_ONCE(q->active_queues));
 
 #undef PUT_STAT_U32
 #undef PUT_STAT_U64
@@ -3029,38 +3071,38 @@ static int cake_dump_stats(struct Qdisc *sch, struct gnet_dump *d)
 		if (!ts)
 			goto nla_put_failure;
 
-		PUT_TSTAT_U64(THRESHOLD_RATE64, b->tin_rate_bps);
-		PUT_TSTAT_U64(SENT_BYTES64, b->bytes);
-		PUT_TSTAT_U32(BACKLOG_BYTES, b->tin_backlog);
+		PUT_TSTAT_U64(THRESHOLD_RATE64, READ_ONCE(b->tin_rate_bps));
+		PUT_TSTAT_U64(SENT_BYTES64, READ_ONCE(b->bytes));
+		PUT_TSTAT_U32(BACKLOG_BYTES, READ_ONCE(b->tin_backlog));
 
 		PUT_TSTAT_U32(TARGET_US,
-			      ktime_to_us(ns_to_ktime(b->cparams.target)));
+			ktime_to_us(ns_to_ktime(READ_ONCE(b->cparams.target))));
 		PUT_TSTAT_U32(INTERVAL_US,
-			      ktime_to_us(ns_to_ktime(b->cparams.interval)));
+			ktime_to_us(ns_to_ktime(READ_ONCE(b->cparams.interval))));
 
-		PUT_TSTAT_U32(SENT_PACKETS, b->packets);
-		PUT_TSTAT_U32(DROPPED_PACKETS, b->tin_dropped);
-		PUT_TSTAT_U32(ECN_MARKED_PACKETS, b->tin_ecn_mark);
-		PUT_TSTAT_U32(ACKS_DROPPED_PACKETS, b->ack_drops);
+		PUT_TSTAT_U32(SENT_PACKETS, READ_ONCE(b->packets));
+		PUT_TSTAT_U32(DROPPED_PACKETS, READ_ONCE(b->tin_dropped));
+		PUT_TSTAT_U32(ECN_MARKED_PACKETS, READ_ONCE(b->tin_ecn_mark));
+		PUT_TSTAT_U32(ACKS_DROPPED_PACKETS, READ_ONCE(b->ack_drops));
 
 		PUT_TSTAT_U32(PEAK_DELAY_US,
-			      ktime_to_us(ns_to_ktime(b->peak_delay)));
+			      ktime_to_us(ns_to_ktime(READ_ONCE(b->peak_delay))));
 		PUT_TSTAT_U32(AVG_DELAY_US,
-			      ktime_to_us(ns_to_ktime(b->avge_delay)));
+			      ktime_to_us(ns_to_ktime(READ_ONCE(b->avge_delay))));
 		PUT_TSTAT_U32(BASE_DELAY_US,
-			      ktime_to_us(ns_to_ktime(b->base_delay)));
+			      ktime_to_us(ns_to_ktime(READ_ONCE(b->base_delay))));
 
-		PUT_TSTAT_U32(WAY_INDIRECT_HITS, b->way_hits);
-		PUT_TSTAT_U32(WAY_MISSES, b->way_misses);
-		PUT_TSTAT_U32(WAY_COLLISIONS, b->way_collisions);
+		PUT_TSTAT_U32(WAY_INDIRECT_HITS, READ_ONCE(b->way_hits));
+		PUT_TSTAT_U32(WAY_MISSES, READ_ONCE(b->way_misses));
+		PUT_TSTAT_U32(WAY_COLLISIONS, READ_ONCE(b->way_collisions));
 
-		PUT_TSTAT_U32(SPARSE_FLOWS, b->sparse_flow_count +
-					    b->decaying_flow_count);
-		PUT_TSTAT_U32(BULK_FLOWS, b->bulk_flow_count);
-		PUT_TSTAT_U32(UNRESPONSIVE_FLOWS, b->unresponsive_flow_count);
-		PUT_TSTAT_U32(MAX_SKBLEN, b->max_skblen);
+		PUT_TSTAT_U32(SPARSE_FLOWS, READ_ONCE(b->sparse_flow_count) +
+					    READ_ONCE(b->decaying_flow_count));
+		PUT_TSTAT_U32(BULK_FLOWS, READ_ONCE(b->bulk_flow_count));
+		PUT_TSTAT_U32(UNRESPONSIVE_FLOWS, READ_ONCE(b->unresponsive_flow_count));
+		PUT_TSTAT_U32(MAX_SKBLEN, READ_ONCE(b->max_skblen));
 
-		PUT_TSTAT_U32(FLOW_QUANTUM, b->flow_quantum);
+		PUT_TSTAT_U32(FLOW_QUANTUM, READ_ONCE(b->flow_quantum));
 		nla_nest_end(d->skb, ts);
 	}
 
@@ -3128,7 +3170,7 @@ static int cake_dump_class_stats(struct Qdisc *sch, unsigned long cl,
 
 		flow = &b->flows[idx % CAKE_QUEUES];
 
-		if (flow->head) {
+		if (READ_ONCE(flow->head)) {
 			sch_tree_lock(sch);
 			skb = flow->head;
 			while (skb) {
@@ -3137,13 +3179,15 @@ static int cake_dump_class_stats(struct Qdisc *sch, unsigned long cl,
 			}
 			sch_tree_unlock(sch);
 		}
-		qs.backlog = b->backlogs[idx % CAKE_QUEUES];
-		qs.drops = flow->dropped;
+		qs.backlog = READ_ONCE(b->backlogs[idx % CAKE_QUEUES]);
+		qs.drops = READ_ONCE(flow->dropped);
 	}
 	if (gnet_stats_copy_queue(d, NULL, &qs, qs.qlen) < 0)
 		return -1;
 	if (flow) {
 		ktime_t now = ktime_get();
+		bool dropping;
+		u32 p_drop;
 
 		stats = nla_nest_start_noflag(d->skb, TCA_STATS_APP);
 		if (!stats)
@@ -3158,21 +3202,23 @@ static int cake_dump_class_stats(struct Qdisc *sch, unsigned long cl,
 			goto nla_put_failure;			       \
 	} while (0)
 
-		PUT_STAT_S32(DEFICIT, flow->deficit);
-		PUT_STAT_U32(DROPPING, flow->cvars.dropping);
-		PUT_STAT_U32(COBALT_COUNT, flow->cvars.count);
-		PUT_STAT_U32(P_DROP, flow->cvars.p_drop);
-		if (flow->cvars.p_drop) {
+		PUT_STAT_S32(DEFICIT, READ_ONCE(flow->deficit));
+		dropping = READ_ONCE(flow->cvars.dropping);
+		PUT_STAT_U32(DROPPING, dropping);
+		PUT_STAT_U32(COBALT_COUNT, READ_ONCE(flow->cvars.count));
+		p_drop = READ_ONCE(flow->cvars.p_drop);
+		PUT_STAT_U32(P_DROP, p_drop);
+		if (p_drop) {
 			PUT_STAT_S32(BLUE_TIMER_US,
 				     ktime_to_us(
 					     ktime_sub(now,
-						       flow->cvars.blue_timer)));
+						       READ_ONCE(flow->cvars.blue_timer))));
 		}
-		if (flow->cvars.dropping) {
+		if (dropping) {
 			PUT_STAT_S32(DROP_NEXT_US,
 				     ktime_to_us(
 					     ktime_sub(now,
-						       flow->cvars.drop_next)));
+						       READ_ONCE(flow->cvars.drop_next))));
 		}
 
 		if (nla_nest_end(d->skb, stats) < 0)
@@ -3298,10 +3344,10 @@ static int cake_mq_change(struct Qdisc *sch, struct nlattr *opt,
 		struct cake_sched_data *qd = qdisc_priv(chld);
 
 		if (overhead_changed) {
-			qd->max_netlen = 0;
-			qd->max_adjlen = 0;
-			qd->min_netlen = ~0;
-			qd->min_adjlen = ~0;
+			WRITE_ONCE(qd->max_netlen, 0);
+			WRITE_ONCE(qd->max_adjlen, 0);
+			WRITE_ONCE(qd->min_netlen, ~0);
+			WRITE_ONCE(qd->min_adjlen, ~0);
 		}
 
 		if (qd->tins) {
-- 
2.53.0.1213.gd9a14994de-goog


^ permalink raw reply related

* [PATCH v3 net-next 10/15] net/sched: sch_fq_pie: annotate data-races in fq_pie_dump_stats()
From: Eric Dumazet @ 2026-04-10 18:22 UTC (permalink / raw)
  To: David S . Miller, Jakub Kicinski, Paolo Abeni
  Cc: Simon Horman, Jamal Hadi Salim, Jiri Pirko, netdev, eric.dumazet,
	Eric Dumazet
In-Reply-To: <20260410182257.774311-1-edumazet@google.com>

fq_codel_dump_stats() acquires the qdisc spinlock a bit too late.

Move this acquisition before we fill tc_fq_pie_xstats with live data.

Alternative would be to add READ_ONCE() and WRITE_ONCE() annotations,
but the spinlock is needed anyway.

Fixes: ec97ecf1ebe4 ("net: sched: add Flow Queue PIE packet scheduler")
Signed-off-by: Eric Dumazet <edumazet@google.com>
---
 net/sched/sch_fq_pie.c | 19 ++++++++++---------
 1 file changed, 10 insertions(+), 9 deletions(-)

diff --git a/net/sched/sch_fq_pie.c b/net/sched/sch_fq_pie.c
index 197f0df0a6eb06ab4ce25eefe01d32a35dbd84af..72f48fa4010bebbe6be212938b457db21ff3c5a0 100644
--- a/net/sched/sch_fq_pie.c
+++ b/net/sched/sch_fq_pie.c
@@ -509,18 +509,19 @@ static int fq_pie_dump(struct Qdisc *sch, struct sk_buff *skb)
 static int fq_pie_dump_stats(struct Qdisc *sch, struct gnet_dump *d)
 {
 	struct fq_pie_sched_data *q = qdisc_priv(sch);
-	struct tc_fq_pie_xstats st = {
-		.packets_in	= q->stats.packets_in,
-		.overlimit	= q->stats.overlimit,
-		.overmemory	= q->overmemory,
-		.dropped	= q->stats.dropped,
-		.ecn_mark	= q->stats.ecn_mark,
-		.new_flow_count = q->new_flow_count,
-		.memory_usage   = q->memory_usage,
-	};
+	struct tc_fq_pie_xstats st = { 0 };
 	struct list_head *pos;
 
 	sch_tree_lock(sch);
+
+	st.packets_in	= q->stats.packets_in;
+	st.overlimit	= q->stats.overlimit;
+	st.overmemory	= q->overmemory;
+	st.dropped	= q->stats.dropped;
+	st.ecn_mark	= q->stats.ecn_mark;
+	st.new_flow_count = q->new_flow_count;
+	st.memory_usage   = q->memory_usage;
+
 	list_for_each(pos, &q->new_flows)
 		st.new_flows_len++;
 
-- 
2.53.0.1213.gd9a14994de-goog


^ permalink raw reply related

* [PATCH v3 net-next 01/15] net/sched: rename qstats_overlimit_inc() to qstats_cpu_overlimit_inc()
From: Eric Dumazet @ 2026-04-10 18:22 UTC (permalink / raw)
  To: David S . Miller, Jakub Kicinski, Paolo Abeni
  Cc: Simon Horman, Jamal Hadi Salim, Jiri Pirko, netdev, eric.dumazet,
	Eric Dumazet
In-Reply-To: <20260410182257.774311-1-edumazet@google.com>

qstats_overlimit_inc() is only used to increment per cpu overlimits.

It can use this_cpu_inc() to avoid this_cpu_ptr() extra cost
and avoid potential store tearing.

Change qstats_overlimit_inc() name and its argument type.

Also add a WRITE_ONCE() in qdisc_qstats_overlimit() to prevent
store tearing.

$ scripts/bloat-o-meter -t vmlinux.0 vmlinux.1
add/remove: 0/0 grow/shrink: 0/5 up/down: 0/-72 (-72)
Function                                     old     new   delta
tcf_skbmod_act                               772     764      -8
tcf_police_act                               733     725      -8
tcf_mirred_to_dev                           1126    1114     -12
tcf_ife_act                                 1077    1061     -16
tcf_mirred_act                              1324    1296     -28
Total: Before=29610901, After=29610829, chg -0.00%

Signed-off-by: Eric Dumazet <edumazet@google.com>
---
 include/net/act_api.h     | 2 +-
 include/net/sch_generic.h | 6 +++---
 net/sched/act_ife.c       | 4 ++--
 net/sched/act_police.c    | 2 +-
 net/sched/act_skbmod.c    | 2 +-
 5 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/include/net/act_api.h b/include/net/act_api.h
index d11b791079302f50c47e174979767e0b24afc59a..2ec4ef9a5d0c8e9110f92f135cc3c31a38af0479 100644
--- a/include/net/act_api.h
+++ b/include/net/act_api.h
@@ -250,7 +250,7 @@ static inline void tcf_action_inc_drop_qstats(struct tc_action *a)
 static inline void tcf_action_inc_overlimit_qstats(struct tc_action *a)
 {
 	if (likely(a->cpu_qstats)) {
-		qstats_overlimit_inc(this_cpu_ptr(a->cpu_qstats));
+		qstats_cpu_overlimit_inc(a->cpu_qstats);
 		return;
 	}
 	atomic_inc(&a->tcfa_overlimits);
diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
index 5af262ec4bbd2d5021904df127a849e52c26178a..3ee383c6fc3f66f1aecd9ebc675fbd143852c150 100644
--- a/include/net/sch_generic.h
+++ b/include/net/sch_generic.h
@@ -1004,9 +1004,9 @@ static inline void qstats_drop_inc(struct gnet_stats_queue *qstats)
 	qstats->drops++;
 }
 
-static inline void qstats_overlimit_inc(struct gnet_stats_queue *qstats)
+static inline void qstats_cpu_overlimit_inc(struct gnet_stats_queue __percpu *qstats)
 {
-	qstats->overlimits++;
+	this_cpu_inc(qstats->overlimits);
 }
 
 static inline void qdisc_qstats_drop(struct Qdisc *sch)
@@ -1021,7 +1021,7 @@ static inline void qdisc_qstats_cpu_drop(struct Qdisc *sch)
 
 static inline void qdisc_qstats_overlimit(struct Qdisc *sch)
 {
-	sch->qstats.overlimits++;
+	WRITE_ONCE(sch->qstats.overlimits, sch->qstats.overlimits + 1);
 }
 
 static inline int qdisc_qstats_copy(struct gnet_dump *d, struct Qdisc *sch)
diff --git a/net/sched/act_ife.c b/net/sched/act_ife.c
index d5e8a91bb4eb9f1f1f084e199b5ada4e7f7e7205..e1b825e14900d6f46bbfd1b7f72ab6cd554d8a73 100644
--- a/net/sched/act_ife.c
+++ b/net/sched/act_ife.c
@@ -750,7 +750,7 @@ static int tcf_ife_decode(struct sk_buff *skb, const struct tc_action *a,
 			 */
 			pr_info_ratelimited("Unknown metaid %d dlen %d\n",
 					    mtype, dlen);
-			qstats_overlimit_inc(this_cpu_ptr(ife->common.cpu_qstats));
+			qstats_cpu_overlimit_inc(ife->common.cpu_qstats);
 		}
 	}
 
@@ -814,7 +814,7 @@ static int tcf_ife_encode(struct sk_buff *skb, const struct tc_action *a,
 		/* abuse overlimits to count when we allow packet
 		 * with no metadata
 		 */
-		qstats_overlimit_inc(this_cpu_ptr(ife->common.cpu_qstats));
+		qstats_cpu_overlimit_inc(ife->common.cpu_qstats);
 		return action;
 	}
 	/* could be stupid policy setup or mtu config
diff --git a/net/sched/act_police.c b/net/sched/act_police.c
index 12ea9e5a600536b603ea73cc99b4c00381287219..8060f43e4d11c0a26e1475db06b76426f50c5975 100644
--- a/net/sched/act_police.c
+++ b/net/sched/act_police.c
@@ -307,7 +307,7 @@ TC_INDIRECT_SCOPE int tcf_police_act(struct sk_buff *skb,
 	}
 
 inc_overlimits:
-	qstats_overlimit_inc(this_cpu_ptr(police->common.cpu_qstats));
+	qstats_cpu_overlimit_inc(police->common.cpu_qstats);
 inc_drops:
 	if (ret == TC_ACT_SHOT)
 		qstats_drop_inc(this_cpu_ptr(police->common.cpu_qstats));
diff --git a/net/sched/act_skbmod.c b/net/sched/act_skbmod.c
index 23ca46138f040d38de37684439873921bc9c86af..a464b0a3c1b81dba6c28c1141aa38c5c7cad3acb 100644
--- a/net/sched/act_skbmod.c
+++ b/net/sched/act_skbmod.c
@@ -87,7 +87,7 @@ TC_INDIRECT_SCOPE int tcf_skbmod_act(struct sk_buff *skb,
 	return p->action;
 
 drop:
-	qstats_overlimit_inc(this_cpu_ptr(d->common.cpu_qstats));
+	qstats_cpu_overlimit_inc(d->common.cpu_qstats);
 	return TC_ACT_SHOT;
 }
 
-- 
2.53.0.1213.gd9a14994de-goog


^ permalink raw reply related

* [PATCH v3 net-next 09/15] net/sched: sch_pie: annotate data-races in pie_dump_stats()
From: Eric Dumazet @ 2026-04-10 18:22 UTC (permalink / raw)
  To: David S . Miller, Jakub Kicinski, Paolo Abeni
  Cc: Simon Horman, Jamal Hadi Salim, Jiri Pirko, netdev, eric.dumazet,
	Eric Dumazet
In-Reply-To: <20260410182257.774311-1-edumazet@google.com>

pie_dump_stats() only runs with RTNL held,
reading fields that can be changed in qdisc fast path.

Add READ_ONCE()/WRITE_ONCE() annotations.

Alternative would be to acquire the qdisc spinlock, but our long-term
goal is to make qdisc dump operations lockless as much as we can.

tc_pie_xstats fields don't need to be latched atomically,
otherwise this bug would have been caught earlier.

Fixes: edb09eb17ed8 ("net: sched: do not acquire qdisc spinlock in qdisc/class stats dump")
Signed-off-by: Eric Dumazet <edumazet@google.com>
---
 include/net/pie.h   |  2 +-
 net/sched/sch_pie.c | 38 +++++++++++++++++++-------------------
 2 files changed, 20 insertions(+), 20 deletions(-)

diff --git a/include/net/pie.h b/include/net/pie.h
index 01cbc66825a40bd21c0a044b1180cbbc346785df..1f3db0c355149b41823a891c9156cac625122031 100644
--- a/include/net/pie.h
+++ b/include/net/pie.h
@@ -104,7 +104,7 @@ static inline void pie_vars_init(struct pie_vars *vars)
 	vars->dq_tstamp = DTIME_INVALID;
 	vars->accu_prob = 0;
 	vars->dq_count = DQCOUNT_INVALID;
-	vars->avg_dq_rate = 0;
+	WRITE_ONCE(vars->avg_dq_rate, 0);
 }
 
 static inline struct pie_skb_cb *get_pie_cb(const struct sk_buff *skb)
diff --git a/net/sched/sch_pie.c b/net/sched/sch_pie.c
index 16f3f629cb8e4be71431f7e50a278e3c7fdba8d0..fb53fbf0e328571be72b66ba4e75a938e1963422 100644
--- a/net/sched/sch_pie.c
+++ b/net/sched/sch_pie.c
@@ -90,7 +90,7 @@ static int pie_qdisc_enqueue(struct sk_buff *skb, struct Qdisc *sch,
 	bool enqueue = false;
 
 	if (unlikely(qdisc_qlen(sch) >= sch->limit)) {
-		q->stats.overlimit++;
+		WRITE_ONCE(q->stats.overlimit, q->stats.overlimit + 1);
 		goto out;
 	}
 
@@ -104,7 +104,7 @@ static int pie_qdisc_enqueue(struct sk_buff *skb, struct Qdisc *sch,
 		/* If packet is ecn capable, mark it if drop probability
 		 * is lower than 10%, else drop it.
 		 */
-		q->stats.ecn_mark++;
+		WRITE_ONCE(q->stats.ecn_mark, q->stats.ecn_mark + 1);
 		enqueue = true;
 	}
 
@@ -114,15 +114,15 @@ static int pie_qdisc_enqueue(struct sk_buff *skb, struct Qdisc *sch,
 		if (!q->params.dq_rate_estimator)
 			pie_set_enqueue_time(skb);
 
-		q->stats.packets_in++;
+		WRITE_ONCE(q->stats.packets_in, q->stats.packets_in + 1);
 		if (qdisc_qlen(sch) > q->stats.maxq)
-			q->stats.maxq = qdisc_qlen(sch);
+			WRITE_ONCE(q->stats.maxq, qdisc_qlen(sch));
 
 		return qdisc_enqueue_tail(skb, sch);
 	}
 
 out:
-	q->stats.dropped++;
+	WRITE_ONCE(q->stats.dropped, q->stats.dropped + 1);
 	q->vars.accu_prob = 0;
 	return qdisc_drop_reason(skb, sch, to_free, reason);
 }
@@ -267,11 +267,11 @@ void pie_process_dequeue(struct sk_buff *skb, struct pie_params *params,
 			count = count / dtime;
 
 			if (vars->avg_dq_rate == 0)
-				vars->avg_dq_rate = count;
+				WRITE_ONCE(vars->avg_dq_rate, count);
 			else
-				vars->avg_dq_rate =
+				WRITE_ONCE(vars->avg_dq_rate,
 				    (vars->avg_dq_rate -
-				     (vars->avg_dq_rate >> 3)) + (count >> 3);
+				     (vars->avg_dq_rate >> 3)) + (count >> 3));
 
 			/* If the queue has receded below the threshold, we hold
 			 * on to the last drain rate calculated, else we reset
@@ -381,7 +381,7 @@ void pie_calculate_probability(struct pie_params *params, struct pie_vars *vars,
 	if (delta > 0) {
 		/* prevent overflow */
 		if (vars->prob < oldprob) {
-			vars->prob = MAX_PROB;
+			WRITE_ONCE(vars->prob, MAX_PROB);
 			/* Prevent normalization error. If probability is at
 			 * maximum value already, we normalize it here, and
 			 * skip the check to do a non-linear drop in the next
@@ -392,7 +392,7 @@ void pie_calculate_probability(struct pie_params *params, struct pie_vars *vars,
 	} else {
 		/* prevent underflow */
 		if (vars->prob > oldprob)
-			vars->prob = 0;
+			WRITE_ONCE(vars->prob, 0);
 	}
 
 	/* Non-linear drop in probability: Reduce drop probability quickly if
@@ -403,7 +403,7 @@ void pie_calculate_probability(struct pie_params *params, struct pie_vars *vars,
 		/* Reduce drop probability to 98.4% */
 		vars->prob -= vars->prob / 64;
 
-	vars->qdelay = qdelay;
+	WRITE_ONCE(vars->qdelay, qdelay);
 	vars->backlog_old = backlog;
 
 	/* We restart the measurement cycle if the following conditions are met
@@ -502,21 +502,21 @@ static int pie_dump_stats(struct Qdisc *sch, struct gnet_dump *d)
 	struct pie_sched_data *q = qdisc_priv(sch);
 	struct tc_pie_xstats st = {
 		.prob		= q->vars.prob << BITS_PER_BYTE,
-		.delay		= ((u32)PSCHED_TICKS2NS(q->vars.qdelay)) /
+		.delay		= ((u32)PSCHED_TICKS2NS(READ_ONCE(q->vars.qdelay))) /
 				   NSEC_PER_USEC,
-		.packets_in	= q->stats.packets_in,
-		.overlimit	= q->stats.overlimit,
-		.maxq		= q->stats.maxq,
-		.dropped	= q->stats.dropped,
-		.ecn_mark	= q->stats.ecn_mark,
+		.packets_in	= READ_ONCE(q->stats.packets_in),
+		.overlimit	= READ_ONCE(q->stats.overlimit),
+		.maxq		= READ_ONCE(q->stats.maxq),
+		.dropped	= READ_ONCE(q->stats.dropped),
+		.ecn_mark	= READ_ONCE(q->stats.ecn_mark),
 	};
 
 	/* avg_dq_rate is only valid if dq_rate_estimator is enabled */
 	st.dq_rate_estimating = q->params.dq_rate_estimator;
 
 	/* unscale and return dq_rate in bytes per sec */
-	if (q->params.dq_rate_estimator)
-		st.avg_dq_rate = q->vars.avg_dq_rate *
+	if (st.dq_rate_estimating)
+		st.avg_dq_rate = READ_ONCE(q->vars.avg_dq_rate) *
 				 (PSCHED_TICKS_PER_SEC) >> PIE_SCALE;
 
 	return gnet_stats_copy_app(d, &st, sizeof(st));
-- 
2.53.0.1213.gd9a14994de-goog


^ permalink raw reply related

* [PATCH v3 net-next 03/15] net/sched: add READ_ONCE() in gnet_stats_add_queue[_cpu]
From: Eric Dumazet @ 2026-04-10 18:22 UTC (permalink / raw)
  To: David S . Miller, Jakub Kicinski, Paolo Abeni
  Cc: Simon Horman, Jamal Hadi Salim, Jiri Pirko, netdev, eric.dumazet,
	Eric Dumazet
In-Reply-To: <20260410182257.774311-1-edumazet@google.com>

Stats are read locklessly, add READ_ONCE() to prevent load-stearing.

Write side will be handled in separate patches.

Signed-off-by: Eric Dumazet <edumazet@google.com>
---
 net/core/gen_stats.c | 20 ++++++++++----------
 1 file changed, 10 insertions(+), 10 deletions(-)

diff --git a/net/core/gen_stats.c b/net/core/gen_stats.c
index b71ccaec0991461333dbe465ee619bca4a06e75b..1a2380e74272de8eaf3d4ef453e56105a31e9edf 100644
--- a/net/core/gen_stats.c
+++ b/net/core/gen_stats.c
@@ -345,11 +345,11 @@ static void gnet_stats_add_queue_cpu(struct gnet_stats_queue *qstats,
 	for_each_possible_cpu(i) {
 		const struct gnet_stats_queue *qcpu = per_cpu_ptr(q, i);
 
-		qstats->qlen += qcpu->qlen;
-		qstats->backlog += qcpu->backlog;
-		qstats->drops += qcpu->drops;
-		qstats->requeues += qcpu->requeues;
-		qstats->overlimits += qcpu->overlimits;
+		qstats->qlen += READ_ONCE(qcpu->qlen);
+		qstats->backlog += READ_ONCE(qcpu->backlog);
+		qstats->drops += READ_ONCE(qcpu->drops);
+		qstats->requeues += READ_ONCE(qcpu->requeues);
+		qstats->overlimits += READ_ONCE(qcpu->overlimits);
 	}
 }
 
@@ -360,11 +360,11 @@ void gnet_stats_add_queue(struct gnet_stats_queue *qstats,
 	if (cpu) {
 		gnet_stats_add_queue_cpu(qstats, cpu);
 	} else {
-		qstats->qlen += q->qlen;
-		qstats->backlog += q->backlog;
-		qstats->drops += q->drops;
-		qstats->requeues += q->requeues;
-		qstats->overlimits += q->overlimits;
+		qstats->qlen += READ_ONCE(q->qlen);
+		qstats->backlog += READ_ONCE(q->backlog);
+		qstats->drops += READ_ONCE(q->drops);
+		qstats->requeues += READ_ONCE(q->requeues);
+		qstats->overlimits += READ_ONCE(q->overlimits);
 	}
 }
 EXPORT_SYMBOL(gnet_stats_add_queue);
-- 
2.53.0.1213.gd9a14994de-goog


^ permalink raw reply related

* Re: [PATCH net] netrom: do some basic forms of validation on incoming frames
From: Dan Cross @ 2026-04-10 18:23 UTC (permalink / raw)
  To: jj
  Cc: Simon Horman, Greg Kroah-Hartman, Jakub Kicinski, netdev,
	linux-kernel, David S. Miller, Eric Dumazet, Paolo Abeni,
	linux-hams, Yizhe Zhuang, stable
In-Reply-To: <18e3df62-34f9-4de0-903b-19919d7ae2ca@eastlink.ca>

On Fri, Apr 10, 2026 at 11:49 AM jj <ve1jot@eastlink.ca> wrote:
> This is NOT an obsolete protocol..this is in use by amateur radio
> operators world-wide...we use it for RF comms usually, because what
> happens if the internet goes "down", we can still provide comms over
> slower RF links....(plus it's a fun mode)please PLEASE do not drop...and
> sorry for the noise...

There are at least three separable issues being conflated here.

One is whether amateur radio operators are using AX.25, NET/ROM, and
ROSE.  They are; that's indisputable.

Another is whether those operators are using the implementation in the
Linux kernel.  Some are (myself included), though many fewer than are
using the protocols generally.

The third is whether preserving the implementation of these in the
kernel is the best mechanism for using those protocols on Linux-based
systems.  For that, I would argue that no, it is not.

Taking just AX.25, the current implementation has known deficiencies:
it is buggy, implements an older version of the protocol, and at best
receives nominal maintenance: notably, the newer networking tools
(`ip`, `ss`, etc) meant as replacements for `netstat`, `route`, and
`ifconfig` have not been updated to incorporate information about the
amateur radio protocols, and recent changes have left them broken for
long stretches of time.  More details are available online, such as at
https://blog.habets.se/2021/11/AX25-user-space.html

There is very little to recommend the kernel implementations, and any
unique functionality they once provided, such as IP over AX.25, can be
done via other means in userspace; e.g., one can use TAP/TUN for IP
over AX.25.

Therefore, it would be better to remove these from the kernel, and
implement them in userspace instead, or use an existing userspace
implementation (e.g., LinBPQ or similar).  Backwards compatibility
with existing Linux applications that expect to use the sockets API
with amateur radio could `LD_PRELOAD` a shim compatibility library
that simulates the current programming interface.  There is simply no
reason to preserve these in the kernel, and bluntly, the
implementation is pure drag at this point.

Note that this doesn't preclude anyone from using AX.25 et al on
Linux, or force dependency on the Internet: it just moves the
implementation of those protocols out of the kernel and into a normal
userspace program, which is arguably easier to maintain and iterate on
for the ham community, anyway.

        - Dan C.
          (KZ2X)

> On 2026-04-10 07:28, Simon Horman wrote:
> > On Fri, Apr 10, 2026 at 07:24:36AM +0200, Greg Kroah-Hartman wrote:
> >> On Thu, Apr 09, 2026 at 08:32:35PM -0700, Jakub Kicinski wrote:
> >>> On Thu, 9 Apr 2026 20:03:28 +0100 Simon Horman wrote:
> >>>> I expect that checking skb->len isn't sufficient here
> >>>> and pskb_may_pull needs to be used to ensure that
> >>>> the data is also available in the linear section of the skb.
> >>> Or for simplicity we could also be testing against skb_headlen()
> >>> since we don't expect any legit non-linear frames here? Dunno.
> > Sure, that's find by me if it leads to simpler code than
> > using pskb_may_pull(). Else I'd lean towards pskb_may_pull()
> > as it is a more general approach that feels worth proliferating.
> >
> >> I'll be glad to change this either way, your call.  Given that this is
> >> an obsolete protocol that seems to only be a target for drive-by fuzzers
> >> to attack, whatever the simplest thing to do to quiet them up I'll be
> >> glad to implement.
> >>
> >> Or can we just delete this stuff entirely?  :)
> > Deleting sounds good to me.
> > But we likely need a deprecation process.
> > In which case fixing these bugs still makes sense for the short term.
> >
>

^ permalink raw reply

* [PATCH v3 net-next 14/15] net/sched: mq: no longer acquire qdisc spinlocks in dump operations
From: Eric Dumazet @ 2026-04-10 18:22 UTC (permalink / raw)
  To: David S . Miller, Jakub Kicinski, Paolo Abeni
  Cc: Simon Horman, Jamal Hadi Salim, Jiri Pirko, netdev, eric.dumazet,
	Eric Dumazet
In-Reply-To: <20260410182257.774311-1-edumazet@google.com>

Prepare mq_dump_common(), mqprio_dump() and mqprio_dump_class_stats()
for RTNL avoidance.

Use private variables instead of assuming sch->bstats and sch->qstats
can be used when folding stats from children.

This means the children qdisc spinlocks no longer need to be acquired.

Add qdisc_qlen_lockless() helper, and change gnet_stats_add_basic()
prototype.

Signed-off-by: Eric Dumazet <edumazet@google.com>
---
 include/net/gen_stats.h   |  9 +++--
 include/net/sch_generic.h | 14 ++++++++
 net/core/gen_estimator.c  | 24 ++++++-------
 net/core/gen_stats.c      | 17 +++++-----
 net/sched/sch_mq.c        | 33 +++++++++++-------
 net/sched/sch_mqprio.c    | 71 +++++++++++++++++++--------------------
 6 files changed, 95 insertions(+), 73 deletions(-)

diff --git a/include/net/gen_stats.h b/include/net/gen_stats.h
index 7aa2b8e1fb298c4f994a745b114fc4da785ddf4b..5484b67298e3fe94fe84f0e929799362d21499df 100644
--- a/include/net/gen_stats.h
+++ b/include/net/gen_stats.h
@@ -21,6 +21,11 @@ struct gnet_stats_basic_sync {
 	struct u64_stats_sync syncp;
 } __aligned(2 * sizeof(u64));
 
+struct gnet_stats {
+	u64	bytes;
+	u64	packets;
+};
+
 struct net_rate_estimator;
 
 struct gnet_dump {
@@ -49,9 +54,9 @@ int gnet_stats_start_copy_compat(struct sk_buff *skb, int type,
 int gnet_stats_copy_basic(struct gnet_dump *d,
 			  struct gnet_stats_basic_sync __percpu *cpu,
 			  struct gnet_stats_basic_sync *b, bool running);
-void gnet_stats_add_basic(struct gnet_stats_basic_sync *bstats,
+void gnet_stats_add_basic(struct gnet_stats *bstats,
 			  struct gnet_stats_basic_sync __percpu *cpu,
-			  struct gnet_stats_basic_sync *b, bool running);
+			  const struct gnet_stats_basic_sync *b, bool running);
 int gnet_stats_copy_basic_hw(struct gnet_dump *d,
 			     struct gnet_stats_basic_sync __percpu *cpu,
 			     struct gnet_stats_basic_sync *b, bool running);
diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
index b0564a39caf4471619b74179a06a0e41e3765d94..92683be33527bb0a5147d095ba08f5f8494933dd 100644
--- a/include/net/sch_generic.h
+++ b/include/net/sch_generic.h
@@ -542,6 +542,11 @@ static inline int qdisc_qlen(const struct Qdisc *q)
 	return q->q.qlen;
 }
 
+static inline int qdisc_qlen_lockless(const struct Qdisc *q)
+{
+	return READ_ONCE(q->q.qlen);
+}
+
 static inline void qdisc_qlen_inc(struct Qdisc *q)
 {
 	WRITE_ONCE(q->q.qlen, q->q.qlen + 1);
@@ -947,6 +952,15 @@ static inline void _bstats_update(struct gnet_stats_basic_sync *bstats,
 	u64_stats_update_end(&bstats->syncp);
 }
 
+static inline void _bstats_set(struct gnet_stats_basic_sync *bstats,
+			       u64 bytes, u64 packets)
+{
+	u64_stats_update_begin(&bstats->syncp);
+	u64_stats_set(&bstats->bytes, bytes);
+	u64_stats_set(&bstats->packets, packets);
+	u64_stats_update_end(&bstats->syncp);
+}
+
 static inline void bstats_update(struct gnet_stats_basic_sync *bstats,
 				 const struct sk_buff *skb)
 {
diff --git a/net/core/gen_estimator.c b/net/core/gen_estimator.c
index c34e58c6c3e666743e72978f9a78cf7f95a360c3..40990aee45590f2c56c070b0d28f856fc82d1f55 100644
--- a/net/core/gen_estimator.c
+++ b/net/core/gen_estimator.c
@@ -60,9 +60,10 @@ struct net_rate_estimator {
 };
 
 static void est_fetch_counters(struct net_rate_estimator *e,
-			       struct gnet_stats_basic_sync *b)
+			       struct gnet_stats *b)
 {
-	gnet_stats_basic_sync_init(b);
+	b->packets = 0;
+	b->bytes = 0;
 	if (e->stats_lock)
 		spin_lock(e->stats_lock);
 
@@ -76,18 +77,15 @@ static void est_fetch_counters(struct net_rate_estimator *e,
 static void est_timer(struct timer_list *t)
 {
 	struct net_rate_estimator *est = timer_container_of(est, t, timer);
-	struct gnet_stats_basic_sync b;
-	u64 b_bytes, b_packets;
+	struct gnet_stats b;
 	u64 rate, brate;
 
 	est_fetch_counters(est, &b);
-	b_bytes = u64_stats_read(&b.bytes);
-	b_packets = u64_stats_read(&b.packets);
 
-	brate = (b_bytes - est->last_bytes) << (10 - est->intvl_log);
+	brate = (b.bytes - est->last_bytes) << (10 - est->intvl_log);
 	brate = (brate >> est->ewma_log) - (est->avbps >> est->ewma_log);
 
-	rate = (b_packets - est->last_packets) << (10 - est->intvl_log);
+	rate = (b.packets - est->last_packets) << (10 - est->intvl_log);
 	rate = (rate >> est->ewma_log) - (est->avpps >> est->ewma_log);
 
 	preempt_disable_nested();
@@ -97,8 +95,8 @@ static void est_timer(struct timer_list *t)
 	write_seqcount_end(&est->seq);
 	preempt_enable_nested();
 
-	est->last_bytes = b_bytes;
-	est->last_packets = b_packets;
+	est->last_bytes = b.bytes;
+	est->last_packets = b.packets;
 
 	est->next_jiffies += ((HZ/4) << est->intvl_log);
 
@@ -138,7 +136,7 @@ int gen_new_estimator(struct gnet_stats_basic_sync *bstats,
 {
 	struct gnet_estimator *parm = nla_data(opt);
 	struct net_rate_estimator *old, *est;
-	struct gnet_stats_basic_sync b;
+	struct gnet_stats b;
 	int intvl_log;
 
 	if (nla_len(opt) < sizeof(*parm))
@@ -172,8 +170,8 @@ int gen_new_estimator(struct gnet_stats_basic_sync *bstats,
 	est_fetch_counters(est, &b);
 	if (lock)
 		local_bh_enable();
-	est->last_bytes = u64_stats_read(&b.bytes);
-	est->last_packets = u64_stats_read(&b.packets);
+	est->last_bytes = b.bytes;
+	est->last_packets = b.packets;
 
 	if (lock)
 		spin_lock_bh(lock);
diff --git a/net/core/gen_stats.c b/net/core/gen_stats.c
index 1a2380e74272de8eaf3d4ef453e56105a31e9edf..14ee7a4e3709ad5c64a158d3c8d1177ada3a32b0 100644
--- a/net/core/gen_stats.c
+++ b/net/core/gen_stats.c
@@ -123,10 +123,9 @@ void gnet_stats_basic_sync_init(struct gnet_stats_basic_sync *b)
 }
 EXPORT_SYMBOL(gnet_stats_basic_sync_init);
 
-static void gnet_stats_add_basic_cpu(struct gnet_stats_basic_sync *bstats,
+static void gnet_stats_add_basic_cpu(struct gnet_stats *bstats,
 				     struct gnet_stats_basic_sync __percpu *cpu)
 {
-	u64 t_bytes = 0, t_packets = 0;
 	int i;
 
 	for_each_possible_cpu(i) {
@@ -140,19 +139,18 @@ static void gnet_stats_add_basic_cpu(struct gnet_stats_basic_sync *bstats,
 			packets = u64_stats_read(&bcpu->packets);
 		} while (u64_stats_fetch_retry(&bcpu->syncp, start));
 
-		t_bytes += bytes;
-		t_packets += packets;
+		bstats->bytes += bytes;
+		bstats->packets += packets;
 	}
-	_bstats_update(bstats, t_bytes, t_packets);
 }
 
-void gnet_stats_add_basic(struct gnet_stats_basic_sync *bstats,
+void gnet_stats_add_basic(struct gnet_stats *bstats,
 			  struct gnet_stats_basic_sync __percpu *cpu,
-			  struct gnet_stats_basic_sync *b, bool running)
+			  const struct gnet_stats_basic_sync *b, bool running)
 {
 	unsigned int start;
-	u64 bytes = 0;
 	u64 packets = 0;
+	u64 bytes = 0;
 
 	WARN_ON_ONCE((cpu || running) && in_hardirq());
 
@@ -167,7 +165,8 @@ void gnet_stats_add_basic(struct gnet_stats_basic_sync *bstats,
 		packets = u64_stats_read(&b->packets);
 	} while (running && u64_stats_fetch_retry(&b->syncp, start));
 
-	_bstats_update(bstats, bytes, packets);
+	bstats->bytes += bytes;
+	bstats->packets += packets;
 }
 EXPORT_SYMBOL(gnet_stats_add_basic);
 
diff --git a/net/sched/sch_mq.c b/net/sched/sch_mq.c
index ec8c91d3fde04e59daec2aecdb14d6bf50715e15..0d83e69f2f679988d56920c16acb659d2d1ba636 100644
--- a/net/sched/sch_mq.c
+++ b/net/sched/sch_mq.c
@@ -143,30 +143,39 @@ EXPORT_SYMBOL_NS_GPL(mq_attach, "NET_SCHED_INTERNAL");
 void mq_dump_common(struct Qdisc *sch, struct sk_buff *skb)
 {
 	struct net_device *dev = qdisc_dev(sch);
+	struct gnet_stats_queue qstats = { 0 };
+	struct gnet_stats bstats = { 0 };
+	const struct Qdisc *qdisc;
 	unsigned int qlen = 0;
-	struct Qdisc *qdisc;
 	unsigned int ntx;
 
-	gnet_stats_basic_sync_init(&sch->bstats);
-	memset(&sch->qstats, 0, sizeof(sch->qstats));
-
 	/* MQ supports lockless qdiscs. However, statistics accounting needs
 	 * to account for all, none, or a mix of locked and unlocked child
 	 * qdiscs. Percpu stats are added to counters in-band and locking
 	 * qdisc totals are added at end.
 	 */
+	rcu_read_lock();
 	for (ntx = 0; ntx < dev->num_tx_queues; ntx++) {
-		qdisc = rtnl_dereference(netdev_get_tx_queue(dev, ntx)->qdisc_sleeping);
-		spin_lock_bh(qdisc_lock(qdisc));
+		qdisc = rcu_dereference(netdev_get_tx_queue(dev, ntx)->qdisc_sleeping);
 
-		gnet_stats_add_basic(&sch->bstats, qdisc->cpu_bstats,
-				     &qdisc->bstats, false);
-		gnet_stats_add_queue(&sch->qstats, qdisc->cpu_qstats,
+		gnet_stats_add_basic(&bstats, qdisc->cpu_bstats,
+				     &qdisc->bstats, true);
+		gnet_stats_add_queue(&qstats, qdisc->cpu_qstats,
 				     &qdisc->qstats);
-		qlen += qdisc_qlen(qdisc);
-
-		spin_unlock_bh(qdisc_lock(qdisc));
+		qlen += qdisc_qlen_lockless(qdisc);
 	}
+	rcu_read_unlock();
+
+	spin_lock_bh(qdisc_lock(sch));
+	_bstats_set(&sch->bstats, bstats.bytes, bstats.packets);
+	spin_unlock_bh(qdisc_lock(sch));
+
+	WRITE_ONCE(sch->qstats.qlen, qstats.qlen);
+	WRITE_ONCE(sch->qstats.backlog, qstats.backlog);
+	WRITE_ONCE(sch->qstats.drops, qstats.drops);
+	WRITE_ONCE(sch->qstats.requeues, qstats.requeues);
+	WRITE_ONCE(sch->qstats.overlimits, qstats.overlimits);
+
 	WRITE_ONCE(sch->q.qlen, qlen);
 }
 EXPORT_SYMBOL_NS_GPL(mq_dump_common, "NET_SCHED_INTERNAL");
diff --git a/net/sched/sch_mqprio.c b/net/sched/sch_mqprio.c
index 91a92992cd24ab6c30bf7db2288c08cd493c7bc3..0f58b3a3e99a100df929de110fe0bda1a44cc7d6 100644
--- a/net/sched/sch_mqprio.c
+++ b/net/sched/sch_mqprio.c
@@ -554,32 +554,40 @@ static int mqprio_dump(struct Qdisc *sch, struct sk_buff *skb)
 	struct net_device *dev = qdisc_dev(sch);
 	struct mqprio_sched *priv = qdisc_priv(sch);
 	struct nlattr *nla = (struct nlattr *)skb_tail_pointer(skb);
+	struct gnet_stats_queue qstats = { 0 };
 	struct tc_mqprio_qopt opt = { 0 };
+	struct gnet_stats bstats = { 0 };
+	const struct Qdisc *qdisc;
 	unsigned int qlen = 0;
-	struct Qdisc *qdisc;
 	unsigned int ntx;
 
-	qlen = 0;
-	gnet_stats_basic_sync_init(&sch->bstats);
-	memset(&sch->qstats, 0, sizeof(sch->qstats));
-
 	/* MQ supports lockless qdiscs. However, statistics accounting needs
 	 * to account for all, none, or a mix of locked and unlocked child
 	 * qdiscs. Percpu stats are added to counters in-band and locking
 	 * qdisc totals are added at end.
 	 */
+	rcu_read_lock();
 	for (ntx = 0; ntx < dev->num_tx_queues; ntx++) {
-		qdisc = rtnl_dereference(netdev_get_tx_queue(dev, ntx)->qdisc_sleeping);
-		spin_lock_bh(qdisc_lock(qdisc));
+		qdisc = rcu_dereference(netdev_get_tx_queue(dev, ntx)->qdisc_sleeping);
 
-		gnet_stats_add_basic(&sch->bstats, qdisc->cpu_bstats,
-				     &qdisc->bstats, false);
-		gnet_stats_add_queue(&sch->qstats, qdisc->cpu_qstats,
+		gnet_stats_add_basic(&bstats, qdisc->cpu_bstats,
+				     &qdisc->bstats, true);
+		gnet_stats_add_queue(&qstats, qdisc->cpu_qstats,
 				     &qdisc->qstats);
-		qlen += qdisc_qlen(qdisc);
-
-		spin_unlock_bh(qdisc_lock(qdisc));
+		qlen += qdisc_qlen_lockless(qdisc);
 	}
+	rcu_read_unlock();
+
+	spin_lock_bh(qdisc_lock(sch));
+	_bstats_set(&sch->bstats, bstats.bytes, bstats.packets);
+	spin_unlock_bh(qdisc_lock(sch));
+
+	WRITE_ONCE(sch->qstats.qlen, qstats.qlen);
+	WRITE_ONCE(sch->qstats.backlog, qstats.backlog);
+	WRITE_ONCE(sch->qstats.drops, qstats.drops);
+	WRITE_ONCE(sch->qstats.requeues, qstats.requeues);
+	WRITE_ONCE(sch->qstats.overlimits, qstats.overlimits);
+
 	WRITE_ONCE(sch->q.qlen, qlen);
 
 	mqprio_qopt_reconstruct(dev, &opt);
@@ -661,45 +669,34 @@ static int mqprio_dump_class(struct Qdisc *sch, unsigned long cl,
 
 static int mqprio_dump_class_stats(struct Qdisc *sch, unsigned long cl,
 				   struct gnet_dump *d)
-	__releases(d->lock)
-	__acquires(d->lock)
 {
 	if (cl >= TC_H_MIN_PRIORITY) {
 		struct net_device *dev = qdisc_dev(sch);
 		struct netdev_tc_txq tc = dev->tc_to_txq[cl & TC_BITMASK];
-		struct gnet_stats_queue qstats = {0};
+		struct gnet_stats_queue qstats = { 0 };
 		struct gnet_stats_basic_sync bstats;
+		struct gnet_stats _bstats = { 0 };
 		u32 qlen = 0;
 		int i;
 
-		gnet_stats_basic_sync_init(&bstats);
-		/* Drop lock here it will be reclaimed before touching
-		 * statistics this is required because the d->lock we
-		 * hold here is the look on dev_queue->qdisc_sleeping
-		 * also acquired below.
-		 */
-		if (d->lock)
-			spin_unlock_bh(d->lock);
-
+		rcu_read_lock();
 		for (i = tc.offset; i < tc.offset + tc.count; i++) {
-			struct netdev_queue *q = netdev_get_tx_queue(dev, i);
-			struct Qdisc *qdisc = rtnl_dereference(q->qdisc);
-
-			spin_lock_bh(qdisc_lock(qdisc));
+			const struct netdev_queue *q = netdev_get_tx_queue(dev, i);
+			const struct Qdisc *qdisc = rcu_dereference(q->qdisc);
 
-			gnet_stats_add_basic(&bstats, qdisc->cpu_bstats,
-					     &qdisc->bstats, false);
+			gnet_stats_add_basic(&_bstats, qdisc->cpu_bstats,
+					     &qdisc->bstats, true);
 			gnet_stats_add_queue(&qstats, qdisc->cpu_qstats,
 					     &qdisc->qstats);
-			qlen += qdisc_qlen(qdisc);
-
-			spin_unlock_bh(qdisc_lock(qdisc));
+			qlen += qdisc_qlen_lockless(qdisc);
 		}
+		rcu_read_unlock();
+		u64_stats_init(&bstats.syncp);
+		u64_stats_set(&bstats.bytes, _bstats.bytes);
+		u64_stats_set(&bstats.packets, _bstats.packets);
+
 		qlen = qlen + qstats.qlen;
 
-		/* Reclaim root sleeping lock before completing stats */
-		if (d->lock)
-			spin_lock_bh(d->lock);
 		if (gnet_stats_copy_basic(d, NULL, &bstats, false) < 0 ||
 		    gnet_stats_copy_queue(d, NULL, &qstats, qlen) < 0)
 			return -1;
-- 
2.53.0.1213.gd9a14994de-goog


^ permalink raw reply related

* [PATCH v3 net-next 12/15] net/sched: sch_choke: annotate data-races in choke_dump_stats()
From: Eric Dumazet @ 2026-04-10 18:22 UTC (permalink / raw)
  To: David S . Miller, Jakub Kicinski, Paolo Abeni
  Cc: Simon Horman, Jamal Hadi Salim, Jiri Pirko, netdev, eric.dumazet,
	Eric Dumazet
In-Reply-To: <20260410182257.774311-1-edumazet@google.com>

choke_dump_stats() only runs with RTNL held,
reading fields that can be changed in qdisc fast path.

Add READ_ONCE()/WRITE_ONCE() annotations.

Fixes: edb09eb17ed8 ("net: sched: do not acquire qdisc spinlock in qdisc/class stats dump")
Signed-off-by: Eric Dumazet <edumazet@google.com>
---
 net/sched/sch_choke.c | 26 ++++++++++++++++----------
 1 file changed, 16 insertions(+), 10 deletions(-)

diff --git a/net/sched/sch_choke.c b/net/sched/sch_choke.c
index cd0785ad8e74314e6d5c88144ffcf64f286e02dd..73d3e673dc7b16cf2b9ac1d622da280c2ceb064a 100644
--- a/net/sched/sch_choke.c
+++ b/net/sched/sch_choke.c
@@ -229,7 +229,7 @@ static int choke_enqueue(struct sk_buff *skb, struct Qdisc *sch,
 
 		/* Draw a packet at random from queue and compare flow */
 		if (choke_match_random(q, skb, &idx)) {
-			q->stats.matched++;
+			WRITE_ONCE(q->stats.matched, q->stats.matched + 1);
 			choke_drop_by_idx(sch, idx, to_free);
 			goto congestion_drop;
 		}
@@ -241,11 +241,13 @@ static int choke_enqueue(struct sk_buff *skb, struct Qdisc *sch,
 			qdisc_qstats_overlimit(sch);
 			if (use_harddrop(q) || !use_ecn(q) ||
 			    !INET_ECN_set_ce(skb)) {
-				q->stats.forced_drop++;
+				WRITE_ONCE(q->stats.forced_drop,
+					   q->stats.forced_drop + 1);
 				goto congestion_drop;
 			}
 
-			q->stats.forced_mark++;
+			WRITE_ONCE(q->stats.forced_mark,
+				   q->stats.forced_mark + 1);
 		} else if (++q->vars.qcount) {
 			if (red_mark_probability(p, &q->vars, q->vars.qavg)) {
 				q->vars.qcount = 0;
@@ -253,11 +255,13 @@ static int choke_enqueue(struct sk_buff *skb, struct Qdisc *sch,
 
 				qdisc_qstats_overlimit(sch);
 				if (!use_ecn(q) || !INET_ECN_set_ce(skb)) {
-					q->stats.prob_drop++;
+					WRITE_ONCE(q->stats.prob_drop,
+					           q->stats.prob_drop + 1);
 					goto congestion_drop;
 				}
 
-				q->stats.prob_mark++;
+				WRITE_ONCE(q->stats.prob_mark,
+					   q->stats.prob_mark + 1);
 			}
 		} else
 			q->vars.qR = red_random(p);
@@ -272,7 +276,7 @@ static int choke_enqueue(struct sk_buff *skb, struct Qdisc *sch,
 		return NET_XMIT_SUCCESS;
 	}
 
-	q->stats.pdrop++;
+	WRITE_ONCE(q->stats.pdrop, q->stats.pdrop + 1);
 	return qdisc_drop(skb, sch, to_free);
 
 congestion_drop:
@@ -461,10 +465,12 @@ static int choke_dump_stats(struct Qdisc *sch, struct gnet_dump *d)
 {
 	struct choke_sched_data *q = qdisc_priv(sch);
 	struct tc_choke_xstats st = {
-		.early	= q->stats.prob_drop + q->stats.forced_drop,
-		.marked	= q->stats.prob_mark + q->stats.forced_mark,
-		.pdrop	= q->stats.pdrop,
-		.matched = q->stats.matched,
+		.early	= READ_ONCE(q->stats.prob_drop) +
+			  READ_ONCE(q->stats.forced_drop),
+		.marked	= READ_ONCE(q->stats.prob_mark) +
+			  READ_ONCE(q->stats.forced_mark),
+		.pdrop	= READ_ONCE(q->stats.pdrop),
+		.matched = READ_ONCE(q->stats.matched),
 	};
 
 	return gnet_stats_copy_app(d, &st, sizeof(st));
-- 
2.53.0.1213.gd9a14994de-goog


^ permalink raw reply related

* [PATCH v3 net-next 11/15] net_sched: sch_hhf: annotate data-races in hhf_dump_stats()
From: Eric Dumazet @ 2026-04-10 18:22 UTC (permalink / raw)
  To: David S . Miller, Jakub Kicinski, Paolo Abeni
  Cc: Simon Horman, Jamal Hadi Salim, Jiri Pirko, netdev, eric.dumazet,
	Eric Dumazet
In-Reply-To: <20260410182257.774311-1-edumazet@google.com>

hhf_dump_stats() only runs with RTNL held,
reading fields that can be changed in qdisc fast path.

Add READ_ONCE()/WRITE_ONCE() annotations.

Fixes: edb09eb17ed8 ("net: sched: do not acquire qdisc spinlock in qdisc/class stats dump")
Signed-off-by: Eric Dumazet <edumazet@google.com>
---
 net/sched/sch_hhf.c | 19 ++++++++++---------
 1 file changed, 10 insertions(+), 9 deletions(-)

diff --git a/net/sched/sch_hhf.c b/net/sched/sch_hhf.c
index 69b6f0a5471cb9a3b7b760144683f2b249091d89..1e25b75daae2e5de31bd212dfa1f6d7aea927174 100644
--- a/net/sched/sch_hhf.c
+++ b/net/sched/sch_hhf.c
@@ -198,7 +198,8 @@ static struct hh_flow_state *seek_list(const u32 hash,
 				return NULL;
 			list_del(&flow->flowchain);
 			kfree(flow);
-			q->hh_flows_current_cnt--;
+			WRITE_ONCE(q->hh_flows_current_cnt,
+				   q->hh_flows_current_cnt - 1);
 		} else if (flow->hash_id == hash) {
 			return flow;
 		}
@@ -226,7 +227,7 @@ static struct hh_flow_state *alloc_new_hh(struct list_head *head,
 	}
 
 	if (q->hh_flows_current_cnt >= q->hh_flows_limit) {
-		q->hh_flows_overlimit++;
+		WRITE_ONCE(q->hh_flows_overlimit, q->hh_flows_overlimit + 1);
 		return NULL;
 	}
 	/* Create new entry. */
@@ -234,7 +235,7 @@ static struct hh_flow_state *alloc_new_hh(struct list_head *head,
 	if (!flow)
 		return NULL;
 
-	q->hh_flows_current_cnt++;
+	WRITE_ONCE(q->hh_flows_current_cnt, q->hh_flows_current_cnt + 1);
 	INIT_LIST_HEAD(&flow->flowchain);
 	list_add_tail(&flow->flowchain, head);
 
@@ -309,7 +310,7 @@ static enum wdrr_bucket_idx hhf_classify(struct sk_buff *skb, struct Qdisc *sch)
 			return WDRR_BUCKET_FOR_NON_HH;
 		flow->hash_id = hash;
 		flow->hit_timestamp = now;
-		q->hh_flows_total_cnt++;
+		WRITE_ONCE(q->hh_flows_total_cnt, q->hh_flows_total_cnt + 1);
 
 		/* By returning without updating counters in q->hhf_arrays,
 		 * we implicitly implement "shielding" (see Optimization O1).
@@ -404,7 +405,7 @@ static int hhf_enqueue(struct sk_buff *skb, struct Qdisc *sch,
 		return NET_XMIT_SUCCESS;
 
 	prev_backlog = sch->qstats.backlog;
-	q->drop_overlimit++;
+	WRITE_ONCE(q->drop_overlimit, q->drop_overlimit + 1);
 	/* Return Congestion Notification only if we dropped a packet from this
 	 * bucket.
 	 */
@@ -687,10 +688,10 @@ static int hhf_dump_stats(struct Qdisc *sch, struct gnet_dump *d)
 {
 	struct hhf_sched_data *q = qdisc_priv(sch);
 	struct tc_hhf_xstats st = {
-		.drop_overlimit = q->drop_overlimit,
-		.hh_overlimit	= q->hh_flows_overlimit,
-		.hh_tot_count	= q->hh_flows_total_cnt,
-		.hh_cur_count	= q->hh_flows_current_cnt,
+		.drop_overlimit = READ_ONCE(q->drop_overlimit),
+		.hh_overlimit	= READ_ONCE(q->hh_flows_overlimit),
+		.hh_tot_count	= READ_ONCE(q->hh_flows_total_cnt),
+		.hh_cur_count	= READ_ONCE(q->hh_flows_current_cnt),
 	};
 
 	return gnet_stats_copy_app(d, &st, sizeof(st));
-- 
2.53.0.1213.gd9a14994de-goog


^ permalink raw reply related

* [PATCH v3 net-next 08/15] net/sched: sch_fq_codel: remove data-races from fq_codel_dump_stats()
From: Eric Dumazet @ 2026-04-10 18:22 UTC (permalink / raw)
  To: David S . Miller, Jakub Kicinski, Paolo Abeni
  Cc: Simon Horman, Jamal Hadi Salim, Jiri Pirko, netdev, eric.dumazet,
	Eric Dumazet
In-Reply-To: <20260410182257.774311-1-edumazet@google.com>

fq_codel_dump_stats() acquires the qdisc spinlock a bit too late.

Move this acquisition before we fill st.qdisc_stats with live data.

Fixes: edb09eb17ed8 ("net: sched: do not acquire qdisc spinlock in qdisc/class stats dump")
Signed-off-by: Eric Dumazet <edumazet@google.com>
---
 net/sched/sch_fq_codel.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/net/sched/sch_fq_codel.c b/net/sched/sch_fq_codel.c
index 3a348be18551033dcf41ce632eb4b563221040fa..95769b19a04fb392e14a48353e94fb2ee299565a 100644
--- a/net/sched/sch_fq_codel.c
+++ b/net/sched/sch_fq_codel.c
@@ -586,6 +586,8 @@ static int fq_codel_dump_stats(struct Qdisc *sch, struct gnet_dump *d)
 	};
 	struct list_head *pos;
 
+	sch_tree_lock(sch);
+
 	st.qdisc_stats.maxpacket = q->cstats.maxpacket;
 	st.qdisc_stats.drop_overlimit = q->drop_overlimit;
 	st.qdisc_stats.ecn_mark = q->cstats.ecn_mark;
@@ -594,7 +596,6 @@ static int fq_codel_dump_stats(struct Qdisc *sch, struct gnet_dump *d)
 	st.qdisc_stats.memory_usage  = q->memory_usage;
 	st.qdisc_stats.drop_overmemory = q->drop_overmemory;
 
-	sch_tree_lock(sch);
 	list_for_each(pos, &q->new_flows)
 		st.qdisc_stats.new_flows_len++;
 
-- 
2.53.0.1213.gd9a14994de-goog


^ permalink raw reply related

* [PATCH v3 net-next 07/15] net/sched: sch_red: annotate data-races in red_dump_stats()
From: Eric Dumazet @ 2026-04-10 18:22 UTC (permalink / raw)
  To: David S . Miller, Jakub Kicinski, Paolo Abeni
  Cc: Simon Horman, Jamal Hadi Salim, Jiri Pirko, netdev, eric.dumazet,
	Eric Dumazet
In-Reply-To: <20260410182257.774311-1-edumazet@google.com>

red_dump_stats() only runs with RTNL held,
reading fields that can be changed in qdisc fast path.

Add READ_ONCE()/WRITE_ONCE() annotations.

Alternative would be to acquire the qdisc spinlock, but our long-term
goal is to make qdisc dump operations lockless as much as we can.

tc_red_xstats fields don't need to be latched atomically,
otherwise this bug would have been caught earlier.

Fixes: edb09eb17ed8 ("net: sched: do not acquire qdisc spinlock in qdisc/class stats dump")
Signed-off-by: Eric Dumazet <edumazet@google.com>
---
 net/sched/sch_red.c | 31 +++++++++++++++++++++----------
 1 file changed, 21 insertions(+), 10 deletions(-)

diff --git a/net/sched/sch_red.c b/net/sched/sch_red.c
index 7db97c96351309bc3e64fa50570a1928f2b2ce55..268f1ba4520cd74da60e7b1ca08974fcfdced680 100644
--- a/net/sched/sch_red.c
+++ b/net/sched/sch_red.c
@@ -90,17 +90,20 @@ static int red_enqueue(struct sk_buff *skb, struct Qdisc *sch,
 	case RED_PROB_MARK:
 		qdisc_qstats_overlimit(sch);
 		if (!red_use_ecn(q)) {
-			q->stats.prob_drop++;
+			WRITE_ONCE(q->stats.prob_drop,
+				   q->stats.prob_drop + 1);
 			goto congestion_drop;
 		}
 
 		if (INET_ECN_set_ce(skb)) {
-			q->stats.prob_mark++;
+			WRITE_ONCE(q->stats.prob_mark,
+				   q->stats.prob_mark + 1);
 			skb = tcf_qevent_handle(&q->qe_mark, sch, skb, to_free, &ret);
 			if (!skb)
 				return NET_XMIT_CN | ret;
 		} else if (!red_use_nodrop(q)) {
-			q->stats.prob_drop++;
+			WRITE_ONCE(q->stats.prob_drop,
+				   q->stats.prob_drop + 1);
 			goto congestion_drop;
 		}
 
@@ -111,17 +114,20 @@ static int red_enqueue(struct sk_buff *skb, struct Qdisc *sch,
 		reason = QDISC_DROP_OVERLIMIT;
 		qdisc_qstats_overlimit(sch);
 		if (red_use_harddrop(q) || !red_use_ecn(q)) {
-			q->stats.forced_drop++;
+			WRITE_ONCE(q->stats.forced_drop,
+				   q->stats.forced_drop + 1);
 			goto congestion_drop;
 		}
 
 		if (INET_ECN_set_ce(skb)) {
-			q->stats.forced_mark++;
+			WRITE_ONCE(q->stats.forced_mark,
+				   q->stats.forced_mark + 1);
 			skb = tcf_qevent_handle(&q->qe_mark, sch, skb, to_free, &ret);
 			if (!skb)
 				return NET_XMIT_CN | ret;
 		} else if (!red_use_nodrop(q)) {
-			q->stats.forced_drop++;
+			WRITE_ONCE(q->stats.forced_drop,
+				   q->stats.forced_drop + 1);
 			goto congestion_drop;
 		}
 
@@ -135,7 +141,8 @@ static int red_enqueue(struct sk_buff *skb, struct Qdisc *sch,
 		qstats_backlog_add(sch, len);
 		qdisc_qlen_inc(sch);
 	} else if (net_xmit_drop_count(ret)) {
-		q->stats.pdrop++;
+		WRITE_ONCE(q->stats.pdrop,
+			   q->stats.pdrop + 1);
 		qdisc_qstats_drop(sch);
 	}
 	return ret;
@@ -463,9 +470,13 @@ static int red_dump_stats(struct Qdisc *sch, struct gnet_dump *d)
 		dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_QDISC_RED,
 					      &hw_stats_request);
 	}
-	st.early = q->stats.prob_drop + q->stats.forced_drop;
-	st.pdrop = q->stats.pdrop;
-	st.marked = q->stats.prob_mark + q->stats.forced_mark;
+	st.early = READ_ONCE(q->stats.prob_drop) +
+		   READ_ONCE(q->stats.forced_drop);
+
+	st.pdrop = READ_ONCE(q->stats.pdrop);
+
+	st.marked = READ_ONCE(q->stats.prob_mark) +
+		    READ_ONCE(q->stats.forced_mark);
 
 	return gnet_stats_copy_app(d, &st, sizeof(st));
 }
-- 
2.53.0.1213.gd9a14994de-goog


^ permalink raw reply related

page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox