The Linux Kernel Mailing List
 help / color / mirror / Atom feed
From: "Théo Lebrun" <theo.lebrun@bootlin.com>
To: "Théo Lebrun" <theo.lebrun@bootlin.com>,
	"Conor Dooley" <conor.dooley@microchip.com>,
	"Andrew Lunn" <andrew+netdev@lunn.ch>,
	"David S. Miller" <davem@davemloft.net>,
	"Eric Dumazet" <edumazet@google.com>,
	"Jakub Kicinski" <kuba@kernel.org>,
	"Paolo Abeni" <pabeni@redhat.com>,
	"Richard Cochran" <richardcochran@gmail.com>,
	"Russell King" <linux@armlinux.org.uk>
Cc: netdev@vger.kernel.org, linux-kernel@vger.kernel.org,
	"Nicolas Ferre" <nicolas.ferre@microchip.com>,
	"Claudiu Beznea" <claudiu.beznea@tuxon.dev>,
	"Paolo Valerio" <pvalerio@redhat.com>,
	"Nicolai Buchwitz" <nb@tipi-net.de>,
	"Vladimir Kondratiev" <vladimir.kondratiev@mobileye.com>,
	"Gregory CLEMENT" <gregory.clement@bootlin.com>,
	"Benoît Monin" <benoit.monin@bootlin.com>,
	"Tawfik Bayouk" <tawfik.bayouk@mobileye.com>,
	"Thomas Petazzoni" <thomas.petazzoni@bootlin.com>,
	"Maxime Chevallier" <maxime.chevallier@bootlin.com>
Subject: [PATCH net-next v3 14/15] net: macb: use context swapping in .set_ringparam()
Date: Wed, 01 Jul 2026 17:59:17 +0200	[thread overview]
Message-ID: <20260701-macb-context-v3-14-00268d5b1502@bootlin.com> (raw)
In-Reply-To: <20260701-macb-context-v3-0-00268d5b1502@bootlin.com>

ethtool_ops.set_ringparam() is implemented using the primitive close /
update ring size / reopen sequence. Under memory pressure this does not
fly: we free our buffers at close and cannot reallocate new ones at
open. Also, it triggers a slow PHY reinit.

Instead, exploit the new context mechanism and improve our sequence to:
 - allocate a new context (including buffers) first
 - if it fails, early return without any impact to the interface
 - stop interface
 - update global state (bp, netdev, etc)
 - pass buffer pointers to the hardware
 - start interface
 - free old context.

The HW disable sequence is inspired by macb_reset_hw() but avoids
(1) setting NCR bit CLRSTAT and (2) clearing register PBUFRXCUT.

The HW re-enable sequence is inspired by macb_mac_link_up(), skipping
over register writes which would be redundant (because values have not
changed).

The generic context swapping parts are isolated into helper functions
macb_context_swap_start|end(), reusable by other operations (change_mtu,
set_channels, etc).

Introduce a new locking primitive (mac_cfg_lock mutex) to serialise swap
with phylink MAC callbacks. Avoid stopping phylink to avoid a slow PHY
retrain. Those callbacks grab phydev->lock if it exists so we could
imagine grabbing that from the swap op, but phydev->lock doesn't exist
in the SFP case.

AT91 EMAC is handled differently as their buffer management is separate
and they don't do NAPI. We refuse them (-EBUSY) to avoid implementing
context swapping for them.

Signed-off-by: Théo Lebrun <theo.lebrun@bootlin.com>
---
 drivers/net/ethernet/cadence/macb.h      |   2 +
 drivers/net/ethernet/cadence/macb_main.c | 142 +++++++++++++++++++++++++++++--
 2 files changed, 137 insertions(+), 7 deletions(-)

diff --git a/drivers/net/ethernet/cadence/macb.h b/drivers/net/ethernet/cadence/macb.h
index db0d0270c88c..630fe92608bc 100644
--- a/drivers/net/ethernet/cadence/macb.h
+++ b/drivers/net/ethernet/cadence/macb.h
@@ -1360,6 +1360,8 @@ struct macb {
 	struct macb_queue	queues[MACB_MAX_QUEUES];
 
 	spinlock_t		lock;
+	/* Serializes context swap against phylink MAC callbacks. */
+	struct mutex		mac_cfg_lock;
 	struct clk		*pclk;
 	struct clk		*hclk;
 	struct clk		*tx_clk;
diff --git a/drivers/net/ethernet/cadence/macb_main.c b/drivers/net/ethernet/cadence/macb_main.c
index 5a32d5cb759e..f0241ade1beb 100644
--- a/drivers/net/ethernet/cadence/macb_main.c
+++ b/drivers/net/ethernet/cadence/macb_main.c
@@ -735,12 +735,16 @@ static void macb_mac_disable_tx_lpi(struct phylink_config *config)
 	struct macb *bp = netdev_priv(netdev);
 	unsigned long flags;
 
+	mutex_lock(&bp->mac_cfg_lock);
+
 	cancel_delayed_work_sync(&bp->tx_lpi_work);
 
 	spin_lock_irqsave(&bp->lock, flags);
 	bp->eee_active = false;
 	macb_tx_lpi_set(bp, false);
 	spin_unlock_irqrestore(&bp->lock, flags);
+
+	mutex_unlock(&bp->mac_cfg_lock);
 }
 
 static int macb_mac_enable_tx_lpi(struct phylink_config *config, u32 timer,
@@ -750,6 +754,8 @@ static int macb_mac_enable_tx_lpi(struct phylink_config *config, u32 timer,
 	struct macb *bp = netdev_priv(netdev);
 	unsigned long flags;
 
+	mutex_lock(&bp->mac_cfg_lock);
+
 	spin_lock_irqsave(&bp->lock, flags);
 	bp->tx_lpi_timer = timer;
 	bp->eee_active = true;
@@ -760,6 +766,8 @@ static int macb_mac_enable_tx_lpi(struct phylink_config *config, u32 timer,
 	 */
 	mod_delayed_work(system_wq, &bp->tx_lpi_work, msecs_to_jiffies(1000));
 
+	mutex_unlock(&bp->mac_cfg_lock);
+
 	return 0;
 }
 
@@ -772,6 +780,7 @@ static void macb_mac_config(struct phylink_config *config, unsigned int mode,
 	u32 old_ctrl, ctrl;
 	u32 old_ncr, ncr;
 
+	mutex_lock(&bp->mac_cfg_lock);
 	spin_lock_irqsave(&bp->lock, flags);
 
 	old_ctrl = ctrl = macb_or_gem_readl(bp, NCFGR);
@@ -803,6 +812,7 @@ static void macb_mac_config(struct phylink_config *config, unsigned int mode,
 		macb_or_gem_writel(bp, NCR, ncr);
 
 	spin_unlock_irqrestore(&bp->lock, flags);
+	mutex_unlock(&bp->mac_cfg_lock);
 }
 
 static void macb_mac_link_down(struct phylink_config *config, unsigned int mode,
@@ -814,6 +824,8 @@ static void macb_mac_link_down(struct phylink_config *config, unsigned int mode,
 	unsigned int q;
 	u32 ctrl;
 
+	mutex_lock(&bp->mac_cfg_lock);
+
 	if (!(bp->caps & MACB_CAPS_MACB_IS_EMAC))
 		for (q = 0, queue = bp->queues; q < bp->num_queues; ++q, ++queue)
 			queue_writel(queue, IDR,
@@ -824,6 +836,8 @@ static void macb_mac_link_down(struct phylink_config *config, unsigned int mode,
 	macb_writel(bp, NCR, ctrl);
 
 	netif_tx_stop_all_queues(netdev);
+
+	mutex_unlock(&bp->mac_cfg_lock);
 }
 
 /* Use juggling algorithm to left rotate tx ring and tx skb array */
@@ -932,6 +946,7 @@ static void macb_mac_link_up(struct phylink_config *config,
 	unsigned int q;
 	u32 ctrl;
 
+	mutex_lock(&bp->mac_cfg_lock);
 	spin_lock_irqsave(&bp->lock, flags);
 
 	ctrl = macb_or_gem_readl(bp, NCFGR);
@@ -983,6 +998,8 @@ static void macb_mac_link_up(struct phylink_config *config,
 	macb_writel(bp, NCR, ctrl | MACB_BIT(RE) | MACB_BIT(TE));
 
 	netif_tx_wake_all_queues(netdev);
+
+	mutex_unlock(&bp->mac_cfg_lock);
 }
 
 static struct phylink_pcs *macb_mac_select_pcs(struct phylink_config *config,
@@ -3083,6 +3100,107 @@ static void macb_configure_dma(struct macb *bp)
 	}
 }
 
+static void macb_context_swap_start(struct macb *bp)
+{
+	struct macb_queue *queue;
+	unsigned long flags;
+	unsigned int q;
+	u32 ctrl;
+
+	mutex_lock(&bp->mac_cfg_lock);
+
+	/* Mask interrupts before disabling BH features. */
+	spin_lock_irqsave(&bp->lock, flags);
+	for (q = 0, queue = bp->queues; q < bp->num_queues; ++q, ++queue) {
+		queue_writel(queue, IDR, -1);
+		queue_readl(queue, ISR);
+		macb_queue_isr_clear(bp, queue, -1);
+	}
+	spin_unlock_irqrestore(&bp->lock, flags);
+
+	/* Drain BH features. HW is still active and usable at this point. */
+
+	cancel_work_sync(&bp->hresp_err_bh_work);
+	cancel_delayed_work_sync(&bp->tx_lpi_work);
+
+	for (q = 0, queue = bp->queues; q < bp->num_queues; ++q, ++queue) {
+		napi_disable(&queue->napi_rx);
+		napi_disable(&queue->napi_tx);
+		cancel_work_sync(&queue->tx_error_task);
+		netdev_tx_reset_queue(netdev_get_tx_queue(bp->netdev, q));
+	}
+
+	/* Can finally disable software Tx; need to wait until napi_tx and
+	 * tx_error_task cannot be scheduled as either might wakeup Tx.
+	 */
+	netif_tx_disable(bp->netdev);
+
+	spin_lock_irqsave(&bp->lock, flags);
+
+	/* Whether it fails or not we'll disable TE/RE next.
+	 * We were just trying to be nice.
+	 */
+	macb_halt_tx(bp);
+
+	ctrl = macb_readl(bp, NCR);
+	macb_writel(bp, NCR, ctrl & ~(MACB_BIT(RE) | MACB_BIT(TE)));
+
+	macb_writel(bp, TSR, -1);
+	macb_writel(bp, RSR, -1);
+
+	spin_unlock_irqrestore(&bp->lock, flags);
+}
+
+static void macb_context_swap_end(struct macb *bp,
+				  struct macb_context *new_ctx)
+{
+	struct macb_context *old_ctx;
+	struct macb_queue *queue;
+	unsigned long flags;
+	unsigned int q;
+	u32 ctrl;
+
+	lockdep_assert_held(&bp->mac_cfg_lock);
+
+	/* Swap contexts & give buffer pointers to HW. */
+
+	old_ctx = bp->ctx;
+	bp->ctx = new_ctx;
+	macb_init_buffers(bp);
+
+	/* Start NAPI, HW Tx/Rx and software Tx. */
+
+	for (q = 0, queue = bp->queues; q < bp->num_queues; ++q, ++queue) {
+		napi_enable(&queue->napi_rx);
+		napi_enable(&queue->napi_tx);
+	}
+
+	spin_lock_irqsave(&bp->lock, flags);
+
+	macb_configure_dma(bp);
+
+	for (q = 0, queue = bp->queues; q < bp->num_queues; ++q, ++queue) {
+		queue_writel(queue, IER,
+			     bp->rx_intr_mask |
+			     MACB_TX_INT_FLAGS |
+			     MACB_BIT(HRESP));
+	}
+
+	ctrl = macb_readl(bp, NCR);
+	macb_writel(bp, NCR, ctrl | MACB_BIT(RE) | MACB_BIT(TE));
+
+	spin_unlock_irqrestore(&bp->lock, flags);
+
+	mutex_unlock(&bp->mac_cfg_lock);
+
+	netif_tx_start_all_queues(bp->netdev);
+
+	/* Free old context. */
+
+	macb_free(old_ctx);
+	kfree(old_ctx);
+}
+
 static void macb_init_hw(struct macb *bp)
 {
 	u32 config;
@@ -3806,9 +3924,10 @@ static int macb_set_ringparam(struct net_device *netdev,
 			      struct kernel_ethtool_ringparam *kernel_ring,
 			      struct netlink_ext_ack *extack)
 {
+	unsigned int new_rx_size, new_tx_size;
 	struct macb *bp = netdev_priv(netdev);
-	u32 new_rx_size, new_tx_size;
-	unsigned int reset = 0;
+	bool running = netif_running(netdev);
+	struct macb_context *new_ctx;
 
 	if ((ring->rx_mini_pending) || (ring->rx_jumbo_pending))
 		return -EINVAL;
@@ -3827,16 +3946,24 @@ static int macb_set_ringparam(struct net_device *netdev,
 		return 0;
 	}
 
-	if (netif_running(bp->netdev)) {
-		reset = 1;
-		macb_close(bp->netdev);
+	if (running) {
+		/* Context swapping is not supported for AT91. */
+		if (bp->caps & MACB_CAPS_MACB_IS_EMAC)
+			return -EBUSY;
+
+		new_ctx = macb_context_alloc(bp, netdev->mtu,
+					     new_rx_size, new_tx_size);
+		if (IS_ERR(new_ctx))
+			return PTR_ERR(new_ctx);
+
+		macb_context_swap_start(bp);
 	}
 
 	bp->configured_rx_ring_size = new_rx_size;
 	bp->configured_tx_ring_size = new_tx_size;
 
-	if (reset)
-		macb_open(bp->netdev);
+	if (running)
+		macb_context_swap_end(bp, new_ctx);
 
 	return 0;
 }
@@ -6009,6 +6136,7 @@ static int macb_probe(struct platform_device *pdev)
 	}
 	spin_lock_init(&bp->lock);
 	spin_lock_init(&bp->stats_lock);
+	mutex_init(&bp->mac_cfg_lock);
 
 	/* setup capabilities */
 	macb_configure_caps(bp, macb_config);

-- 
2.55.0


  parent reply	other threads:[~2026-07-01 15:59 UTC|newest]

Thread overview: 22+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2026-07-01 15:59 [PATCH net-next v3 00/15] net: macb: implement context swapping Théo Lebrun
2026-07-01 15:59 ` [PATCH net-next v3 01/15] net: macb: drop "consistent" from alloc/free function names Théo Lebrun
2026-07-02 10:41   ` Nicolai Buchwitz
2026-07-01 15:59 ` [PATCH net-next v3 02/15] net: macb: unify device pointer naming convention Théo Lebrun
2026-07-01 15:59 ` [PATCH net-next v3 03/15] net: macb: unify variable naming convention in at91ether functions Théo Lebrun
2026-07-02 10:42   ` Nicolai Buchwitz
2026-07-01 15:59 ` [PATCH net-next v3 04/15] net: macb: unify queue index variable naming convention and types Théo Lebrun
2026-07-02 10:43   ` Nicolai Buchwitz
2026-07-01 15:59 ` [PATCH net-next v3 05/15] net: macb: enforce reverse christmas tree (RCT) convention Théo Lebrun
2026-07-02 10:48   ` Nicolai Buchwitz
2026-07-01 15:59 ` [PATCH net-next v3 06/15] net: macb: allocate tieoff descriptor once across device lifetime Théo Lebrun
2026-07-02 10:54   ` Nicolai Buchwitz
2026-07-01 15:59 ` [PATCH net-next v3 07/15] net: macb: introduce macb_context struct for buffer management Théo Lebrun
2026-07-01 15:59 ` [PATCH net-next v3 08/15] net: macb: avoid macb_init_rx_buffer_size() modifying state Théo Lebrun
2026-07-01 15:59 ` [PATCH net-next v3 09/15] net: macb: make `struct macb` subset reachable from macb_context struct Théo Lebrun
2026-07-01 15:59 ` [PATCH net-next v3 10/15] net: macb: change caps helpers signatures Théo Lebrun
2026-07-01 15:59 ` [PATCH net-next v3 11/15] net: macb: change function signatures to take contexts Théo Lebrun
2026-07-01 15:59 ` [PATCH net-next v3 12/15] net: macb: introduce macb_context_alloc() helper Théo Lebrun
2026-07-01 15:59 ` [PATCH net-next v3 13/15] net: macb: re-read ISR inside IRQ handler locked section Théo Lebrun
2026-07-01 15:59 ` Théo Lebrun [this message]
2026-07-02 10:37   ` [PATCH net-next v3 14/15] net: macb: use context swapping in .set_ringparam() Nicolai Buchwitz
2026-07-01 15:59 ` [PATCH net-next v3 15/15] net: macb: use context swapping in .ndo_change_mtu() Théo Lebrun

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20260701-macb-context-v3-14-00268d5b1502@bootlin.com \
    --to=theo.lebrun@bootlin.com \
    --cc=andrew+netdev@lunn.ch \
    --cc=benoit.monin@bootlin.com \
    --cc=claudiu.beznea@tuxon.dev \
    --cc=conor.dooley@microchip.com \
    --cc=davem@davemloft.net \
    --cc=edumazet@google.com \
    --cc=gregory.clement@bootlin.com \
    --cc=kuba@kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux@armlinux.org.uk \
    --cc=maxime.chevallier@bootlin.com \
    --cc=nb@tipi-net.de \
    --cc=netdev@vger.kernel.org \
    --cc=nicolas.ferre@microchip.com \
    --cc=pabeni@redhat.com \
    --cc=pvalerio@redhat.com \
    --cc=richardcochran@gmail.com \
    --cc=tawfik.bayouk@mobileye.com \
    --cc=thomas.petazzoni@bootlin.com \
    --cc=vladimir.kondratiev@mobileye.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox