* [PATCH net-next 1/2] net: macb: implement ethtool_ops.get|set_channels()
2026-03-05 17:20 [PATCH net-next 0/2] net: macb: implement ethtool set channels count operation Théo Lebrun
@ 2026-03-05 17:20 ` Théo Lebrun
2026-03-07 3:09 ` Jakub Kicinski
2026-03-05 17:20 ` [PATCH net-next 2/2] net: macb: distribute evenly Tx SRAM segments Théo Lebrun
1 sibling, 1 reply; 8+ messages in thread
From: Théo Lebrun @ 2026-03-05 17:20 UTC (permalink / raw)
To: Nicolas Ferre, Claudiu Beznea, Andrew Lunn, David S. Miller,
Eric Dumazet, Jakub Kicinski, Paolo Abeni
Cc: netdev, linux-kernel, Vladimir Kondratiev, Gregory CLEMENT,
Benoît Monin, Tawfik Bayouk, Thomas Petazzoni, Paolo Valerio,
Théo Lebrun
bp->num_queues is the total number of queues and is constant from probe.
Introduce bp->max_num_queues which takes the current role of
bp->num_queues and allow `0 < bp->num_queues <= bp->max_num_queues`.
MACB/GEM does not know about rx/tx specific queues; it only has
combined queues.
Implement .set_channels() operation by wrapping ourselves in
macb_close() and macb_open() calls if interface is running. This
triggers reinit of buffers, which also includes the code that
enables/disables only the queues below bp->num_queues, in
macb_init_buffers().
If reopen fails, then we do one last attempt and reset num_queues to
the previous value and try reopen. We still error out because the
.set_channels() operation failed.
.set_channels() is reserved to devices with MACB_CAPS_QUEUE_DISABLE.
The tieoff workaround would not work as packets would still be routed
into queues with a tieoff descriptor.
Nit: fix an alignment issue inside gem_ethtool_ops which does not
deserve its own patch.
Signed-off-by: Théo Lebrun <theo.lebrun@bootlin.com>
---
drivers/net/ethernet/cadence/macb.h | 1 +
drivers/net/ethernet/cadence/macb_main.c | 100 ++++++++++++++++++++++++++-----
2 files changed, 85 insertions(+), 16 deletions(-)
diff --git a/drivers/net/ethernet/cadence/macb.h b/drivers/net/ethernet/cadence/macb.h
index 87414a2ddf6e..30fa65e2bdf2 100644
--- a/drivers/net/ethernet/cadence/macb.h
+++ b/drivers/net/ethernet/cadence/macb.h
@@ -1294,6 +1294,7 @@ struct macb {
unsigned int tx_ring_size;
unsigned int num_queues;
+ unsigned int max_num_queues;
struct macb_queue queues[MACB_MAX_QUEUES];
spinlock_t lock;
diff --git a/drivers/net/ethernet/cadence/macb_main.c b/drivers/net/ethernet/cadence/macb_main.c
index 17f0ad3d7a09..bac83a2b4c4d 100644
--- a/drivers/net/ethernet/cadence/macb_main.c
+++ b/drivers/net/ethernet/cadence/macb_main.c
@@ -466,9 +466,24 @@ static void macb_init_buffers(struct macb *bp)
upper_32_bits(bp->queues[0].tx_ring_dma));
}
- for (q = 0, queue = bp->queues; q < bp->num_queues; ++q, ++queue) {
- queue_writel(queue, RBQP, lower_32_bits(queue->rx_ring_dma));
- queue_writel(queue, TBQP, lower_32_bits(queue->tx_ring_dma));
+ for (q = 0, queue = bp->queues; q < bp->max_num_queues; ++q, ++queue) {
+ if (q < bp->num_queues) {
+ queue_writel(queue, RBQP, lower_32_bits(queue->rx_ring_dma));
+ queue_writel(queue, TBQP, lower_32_bits(queue->tx_ring_dma));
+ } else {
+ /*
+ * macb_set_channels(), which is the only way of writing
+ * to bp->num_queues, is only allowed if
+ * MACB_CAPS_QUEUE_DISABLE.
+ */
+ queue_writel(queue, RBQP, MACB_BIT(QUEUE_DISABLE));
+
+ /* Disable all interrupts */
+ queue_writel(queue, IDR, -1);
+ queue_readl(queue, ISR);
+ if (bp->caps & MACB_CAPS_ISR_CLEAR_ON_WRITE)
+ queue_writel(queue, ISR, -1);
+ }
}
}
@@ -3900,8 +3915,8 @@ static int gem_set_rxnfc(struct net_device *netdev, struct ethtool_rxnfc *cmd)
switch (cmd->cmd) {
case ETHTOOL_SRXCLSRLINS:
- if ((cmd->fs.location >= bp->max_tuples)
- || (cmd->fs.ring_cookie >= bp->num_queues)) {
+ if (cmd->fs.location >= bp->max_tuples ||
+ cmd->fs.ring_cookie >= bp->max_num_queues) {
ret = -EINVAL;
break;
}
@@ -3919,6 +3934,54 @@ static int gem_set_rxnfc(struct net_device *netdev, struct ethtool_rxnfc *cmd)
return ret;
}
+static void macb_get_channels(struct net_device *netdev,
+ struct ethtool_channels *ch)
+{
+ struct macb *bp = netdev_priv(netdev);
+
+ ch->max_combined = bp->max_num_queues;
+ ch->combined_count = bp->num_queues;
+}
+
+static int macb_set_channels(struct net_device *netdev,
+ struct ethtool_channels *ch)
+{
+ struct macb *bp = netdev_priv(netdev);
+ unsigned int old_count = bp->num_queues;
+ unsigned int count = ch->combined_count;
+ bool running = netif_running(bp->dev);
+ int ret = 0;
+
+ if (!(bp->caps & MACB_CAPS_QUEUE_DISABLE))
+ return -EOPNOTSUPP;
+
+ if (!count || ch->rx_count || ch->tx_count)
+ return -EINVAL;
+
+ if (count > bp->max_num_queues)
+ return -EINVAL;
+
+ if (count == old_count)
+ return 0;
+
+ if (running)
+ macb_close(bp->dev);
+
+ bp->num_queues = count;
+ netif_set_real_num_queues(bp->dev, count, count);
+
+ if (running) {
+ ret = macb_open(bp->dev);
+ if (ret) {
+ bp->num_queues = old_count;
+ netif_set_real_num_queues(bp->dev, old_count, old_count);
+ macb_open(bp->dev);
+ }
+ }
+
+ return ret;
+}
+
static const struct ethtool_ops macb_ethtool_ops = {
.get_regs_len = macb_get_regs_len,
.get_regs = macb_get_regs,
@@ -3934,6 +3997,8 @@ static const struct ethtool_ops macb_ethtool_ops = {
.set_link_ksettings = macb_set_link_ksettings,
.get_ringparam = macb_get_ringparam,
.set_ringparam = macb_set_ringparam,
+ .get_channels = macb_get_channels,
+ .set_channels = macb_set_channels,
};
static const struct ethtool_ops gem_ethtool_ops = {
@@ -3954,10 +4019,12 @@ static const struct ethtool_ops gem_ethtool_ops = {
.set_link_ksettings = macb_set_link_ksettings,
.get_ringparam = macb_get_ringparam,
.set_ringparam = macb_set_ringparam,
- .get_rxnfc = gem_get_rxnfc,
- .set_rxnfc = gem_set_rxnfc,
- .get_rx_ring_count = gem_get_rx_ring_count,
- .nway_reset = phy_ethtool_nway_reset,
+ .get_rxnfc = gem_get_rxnfc,
+ .set_rxnfc = gem_set_rxnfc,
+ .get_rx_ring_count = gem_get_rx_ring_count,
+ .nway_reset = phy_ethtool_nway_reset,
+ .get_channels = macb_get_channels,
+ .set_channels = macb_set_channels,
};
static int macb_ioctl(struct net_device *dev, struct ifreq *rq, int cmd)
@@ -4098,9 +4165,9 @@ static int macb_taprio_setup_replace(struct net_device *ndev,
size_t i;
int err;
- if (conf->num_entries > bp->num_queues) {
+ if (conf->num_entries > bp->max_num_queues) {
netdev_err(ndev, "Too many TAPRIO entries: %zu > %d queues\n",
- conf->num_entries, bp->num_queues);
+ conf->num_entries, bp->max_num_queues);
return -EINVAL;
}
@@ -4148,9 +4215,9 @@ static int macb_taprio_setup_replace(struct net_device *ndev,
/* gate_mask must not select queues outside the valid queues */
queue_id = order_base_2(entry->gate_mask);
- if (queue_id >= bp->num_queues) {
+ if (queue_id >= bp->max_num_queues) {
netdev_err(ndev, "Entry %zu: gate_mask 0x%x exceeds queue range (max_queues=%d)\n",
- i, entry->gate_mask, bp->num_queues);
+ i, entry->gate_mask, bp->max_num_queues);
err = -EINVAL;
goto cleanup;
}
@@ -4210,7 +4277,7 @@ static int macb_taprio_setup_replace(struct net_device *ndev,
/* All validations passed - proceed with hardware configuration */
scoped_guard(spinlock_irqsave, &bp->lock) {
/* Disable ENST queues if running before configuring */
- queue_mask = BIT_U32(bp->num_queues) - 1;
+ queue_mask = BIT_U32(bp->max_num_queues) - 1;
gem_writel(bp, ENST_CONTROL,
queue_mask << GEM_ENST_DISABLE_QUEUE_OFFSET);
@@ -4245,7 +4312,7 @@ static void macb_taprio_destroy(struct net_device *ndev)
unsigned int q;
netdev_reset_tc(ndev);
- queue_mask = BIT_U32(bp->num_queues) - 1;
+ queue_mask = BIT_U32(bp->max_num_queues) - 1;
scoped_guard(spinlock_irqsave, &bp->lock) {
/* Single disable command for all queues */
@@ -4253,7 +4320,7 @@ static void macb_taprio_destroy(struct net_device *ndev)
queue_mask << GEM_ENST_DISABLE_QUEUE_OFFSET);
/* Clear all queue ENST registers in batch */
- for (q = 0, queue = bp->queues; q < bp->num_queues; ++q, ++queue) {
+ for (q = 0, queue = bp->queues; q < bp->max_num_queues; ++q, ++queue) {
queue_writel(queue, ENST_START_TIME, 0);
queue_writel(queue, ENST_ON_TIME, 0);
queue_writel(queue, ENST_OFF_TIME, 0);
@@ -5512,6 +5579,7 @@ static int macb_probe(struct platform_device *pdev)
bp->macb_reg_writel = hw_writel;
}
bp->num_queues = num_queues;
+ bp->max_num_queues = num_queues;
bp->dma_burst_length = macb_config->dma_burst_length;
bp->pclk = pclk;
bp->hclk = hclk;
--
2.53.0
^ permalink raw reply related [flat|nested] 8+ messages in thread* [PATCH net-next 2/2] net: macb: distribute evenly Tx SRAM segments
2026-03-05 17:20 [PATCH net-next 0/2] net: macb: implement ethtool set channels count operation Théo Lebrun
2026-03-05 17:20 ` [PATCH net-next 1/2] net: macb: implement ethtool_ops.get|set_channels() Théo Lebrun
@ 2026-03-05 17:20 ` Théo Lebrun
2026-03-07 3:07 ` Jakub Kicinski
1 sibling, 1 reply; 8+ messages in thread
From: Théo Lebrun @ 2026-03-05 17:20 UTC (permalink / raw)
To: Nicolas Ferre, Claudiu Beznea, Andrew Lunn, David S. Miller,
Eric Dumazet, Jakub Kicinski, Paolo Abeni
Cc: netdev, linux-kernel, Vladimir Kondratiev, Gregory CLEMENT,
Benoît Monin, Tawfik Bayouk, Thomas Petazzoni, Paolo Valerio,
Théo Lebrun
GEM has registers to configure the Tx SRAM segments distribution across
queues. The reset value is apprioriate (even spread) but we need to
care if/when number of active queues is modified (or if we inherited
unevenly initialised hardware from bootloader).
To distribute segments, we take as input the number of queues
(bp->num_queues) and the number of segments (found inside DCFG6).
Its output is a number of segments for each queue, formatted as
powers-of-two (eg 2 for queue 0 means it has 2^2=4 segments).
As the distribution logic is quite complex (at least its initial
versions had bugs), it is kunit-tested and those tests live at the end
of macb_main.c. To test:
⟩ env --unset=CROSS_COMPILE make ARCH=um mrproper
⟩ env --unset=CROSS_COMPILE ./tools/testing/kunit/kunit.py run \
--kconfig_add CONFIG_NET=y \
--kconfig_add CONFIG_COMMON_CLK=y \
--kconfig_add CONFIG_MACB=y 'macb*'
Signed-off-by: Théo Lebrun <theo.lebrun@bootlin.com>
---
drivers/net/ethernet/cadence/Kconfig | 6 ++
drivers/net/ethernet/cadence/macb.h | 5 ++
drivers/net/ethernet/cadence/macb_main.c | 135 +++++++++++++++++++++++++++++++
3 files changed, 146 insertions(+)
diff --git a/drivers/net/ethernet/cadence/Kconfig b/drivers/net/ethernet/cadence/Kconfig
index 5b2a461dfd28..3ae7123352f5 100644
--- a/drivers/net/ethernet/cadence/Kconfig
+++ b/drivers/net/ethernet/cadence/Kconfig
@@ -51,4 +51,10 @@ config MACB_PCI
To compile this driver as a module, choose M here: the module
will be called macb_pci.
+config MACB_KUNIT_TEST
+ bool "KUnit test for MACB" if !KUNIT_ALL_TESTS
+ depends on KUNIT
+ depends on MACB
+ default KUNIT_ALL_TESTS
+
endif # NET_VENDOR_CADENCE
diff --git a/drivers/net/ethernet/cadence/macb.h b/drivers/net/ethernet/cadence/macb.h
index 30fa65e2bdf2..81fdd17b34db 100644
--- a/drivers/net/ethernet/cadence/macb.h
+++ b/drivers/net/ethernet/cadence/macb.h
@@ -193,6 +193,9 @@
#define GEM_TXBDCTRL 0x04cc /* TX Buffer Descriptor control register */
#define GEM_RXBDCTRL 0x04d0 /* RX Buffer Descriptor control register */
+#define GEM_TXQSEGALLOC_LOWER 0x05A0 /* Tx queue segment allocation (low) */
+#define GEM_TXQSEGALLOC_UPPER 0x05A4 /* Tx queue segment allocation (high) */
+
/* Screener Type 2 match registers */
#define GEM_SCRT2 0x540
@@ -543,6 +546,8 @@
#define GEM_PBUF_CUTTHRU_SIZE 1
#define GEM_DAW64_OFFSET 23
#define GEM_DAW64_SIZE 1
+#define GEM_SEGMENTS_BIT_SIZE_OFFSET 16
+#define GEM_SEGMENTS_BIT_SIZE_SIZE 3
/* Bitfields in DCFG8. */
#define GEM_T1SCR_OFFSET 24
diff --git a/drivers/net/ethernet/cadence/macb_main.c b/drivers/net/ethernet/cadence/macb_main.c
index bac83a2b4c4d..022577756eab 100644
--- a/drivers/net/ethernet/cadence/macb_main.c
+++ b/drivers/net/ethernet/cadence/macb_main.c
@@ -2751,6 +2751,59 @@ static u32 macb_dbw(struct macb *bp)
}
}
+/*
+ * Distribute evenly available segments across queues. The computation is
+ * complex because (1) segments are counted in powers of two and (2) a queue
+ * can only use up to 8 segments. There are four types of cases:
+ * - Sharing all segments equally is doable. Take num_queues=4 and
+ * num_segments=16. Each queue will get 2^2=4 segments.
+ * - Sharing all segments is doable. Take num_queues=5 and num_segments=16.
+ * Three queues will get 2^2=4 segments and two will get 2^1=2 segments.
+ * - Sharing all segments is not doable because not enough queues are
+ * available. Take num_queues=1 and num_segments=16; queue 0 can only have 8
+ * segments.
+ * - Sharing all segments is not doable because not enough segments are
+ * available. Take num_queues=4 and num_segments=2.
+ *
+ * We start by computing the power each queue will have. For num_queues=5 and
+ * num_segments=16, each queue will have at least 2^1 segments. That leaves us
+ * with remaining_segments=6. If we increase the power for a queue, we get a
+ * delta of 2 (2^2-2^1). The first three queues will therefore be advantaged
+ * and each have 2^2 segments. The remaining 2 queues will only have 2^1
+ * segments.
+ */
+static u64 gem_sram_distribute_segments(unsigned int num_queues,
+ unsigned int num_segments)
+{
+ unsigned int pow, remaining_segments, i;
+ unsigned int num_advantaged_queues = 0;
+ u64 val = 0;
+
+ /* pow=0 for all queues. ilog2(0) is dangerous. */
+ if (num_queues >= num_segments)
+ return 0;
+
+ pow = min(ilog2(num_segments / num_queues), 3);
+ remaining_segments = num_segments - num_queues * (1U << pow);
+
+ /*
+ * We can only distribute remaining segments if (1) there are remaining
+ * segments and (2) we did not reach the max segments per queue (2^3).
+ */
+ if (remaining_segments != 0 && pow != 3) {
+ unsigned int delta = (1U << (pow + 1)) - (1U << pow);
+
+ num_advantaged_queues = remaining_segments / delta;
+ }
+
+ for (i = 0; i < num_advantaged_queues; i++)
+ val |= ((pow + 1) & 0b11) << (i * 4);
+ for (i = num_advantaged_queues; i < num_queues; i++)
+ val |= (pow & 0b11) << (i * 4);
+
+ return val;
+}
+
/* Configure the receive DMA engine
* - use the correct receive buffer size
* - set best burst length for DMA operations
@@ -2832,6 +2885,19 @@ static void macb_init_hw(struct macb *bp)
if (bp->caps & MACB_CAPS_JUMBO)
bp->rx_frm_len_mask = MACB_RX_JFRMLEN_MASK;
+ /*
+ * Distribute Tx SRAM segments evenly based on active number of queues.
+ */
+ if (macb_is_gem(bp)) {
+ unsigned int num_segments;
+ u64 val;
+
+ num_segments = 1U << GEM_BFEXT(SEGMENTS_BIT_SIZE, gem_readl(bp, DCFG6));
+ val = gem_sram_distribute_segments(bp->num_queues, num_segments);
+ gem_writel(bp, TXQSEGALLOC_LOWER, val);
+ gem_writel(bp, TXQSEGALLOC_UPPER, val >> 32);
+ }
+
macb_configure_dma(bp);
/* Enable RX partial store and forward and set watermark */
@@ -6031,3 +6097,72 @@ MODULE_LICENSE("GPL");
MODULE_DESCRIPTION("Cadence MACB/GEM Ethernet driver");
MODULE_AUTHOR("Haavard Skinnemoen (Atmel)");
MODULE_ALIAS("platform:macb");
+
+#ifdef CONFIG_MACB_KUNIT_TEST
+#include <kunit/test.h>
+
+struct macb_sram_segments_case {
+ unsigned int num_queues, num_segments;
+};
+
+static void macb_sram_segments_test(struct kunit *test)
+{
+ const struct macb_sram_segments_case *p = test->param_value;
+ u64 val = gem_sram_distribute_segments(p->num_queues, p->num_segments);
+ unsigned int i, sum_segments = 0, max_assigned_segments;
+ unsigned int num_queues = min(p->num_queues, p->num_segments);
+
+ for (i = 0; i < num_queues; i++) {
+ unsigned int q_segments = (val >> (i * 4)) & 0b11;
+
+ q_segments = 1U << q_segments;
+ sum_segments += q_segments;
+ KUNIT_ASSERT_GT_MSG(test, q_segments, 0, "queue %d, val %#llx", i, val);
+ }
+
+ for (i = num_queues; i < 16; i++) {
+ unsigned int pow = (val >> (i * 4)) & 0b11;
+
+ KUNIT_ASSERT_EQ_MSG(test, pow, 0, "queue %d, val %#llx", i, val);
+ }
+
+ max_assigned_segments = min(p->num_segments, 8 * p->num_queues);
+ KUNIT_ASSERT_EQ_MSG(test, sum_segments, max_assigned_segments, "val %#llx", val);
+}
+
+struct macb_sram_segments_case macb_sram_segments_cases[] = {
+ /* num_segments can only be powers of two. */
+ { .num_queues = 4, .num_segments = 2 },
+ { .num_queues = 1, .num_segments = 16 },
+ { .num_queues = 4, .num_segments = 16 },
+ { .num_queues = 5, .num_segments = 16 },
+ { .num_queues = 15, .num_segments = 16 },
+ { .num_queues = 16, .num_segments = 16 },
+};
+
+static void macb_sram_segments_case_desc(struct macb_sram_segments_case *t, char *desc)
+{
+ u64 val = gem_sram_distribute_segments(t->num_queues, t->num_segments);
+
+ snprintf(desc, KUNIT_PARAM_DESC_SIZE,
+ "num_queues=%d num_segments=%d TXQSEGALLOC=%#llx",
+ t->num_queues, t->num_segments, val);
+}
+
+KUNIT_ARRAY_PARAM(macb_sram_segments,
+ macb_sram_segments_cases,
+ macb_sram_segments_case_desc);
+
+static struct kunit_case macb_test_cases[] = {
+ KUNIT_CASE_PARAM(macb_sram_segments_test, macb_sram_segments_gen_params),
+ {}
+};
+
+static struct kunit_suite macb_test_suite = {
+ .name = "macb",
+ .test_cases = macb_test_cases,
+};
+
+kunit_test_suite(macb_test_suite);
+
+#endif
--
2.53.0
^ permalink raw reply related [flat|nested] 8+ messages in thread