* [PATCH net-next v9 3/7] net: bcmgenet: add basic XDP support (PASS/DROP)
From: Nicolai Buchwitz @ 2026-05-06 9:55 UTC (permalink / raw)
To: netdev
Cc: Justin Chen, Simon Horman, Mohsin Bashir, Doug Berger,
Florian Fainelli, Broadcom internal kernel review list,
Andrew Lunn, Eric Dumazet, Paolo Abeni, Nicolai Buchwitz,
David S. Miller, Jakub Kicinski, Alexei Starovoitov,
Daniel Borkmann, Jesper Dangaard Brouer, John Fastabend,
Stanislav Fomichev, linux-kernel, bpf
In-Reply-To: <20260506095553.55357-1-nb@tipi-net.de>
Add XDP program attachment via ndo_bpf and execute XDP programs in the
RX path. XDP_PASS builds an SKB from the xdp_buff (handling
xdp_adjust_head/tail), XDP_DROP returns the page to page_pool without
SKB allocation.
XDP_TX and XDP_REDIRECT are not yet supported and return XDP_ABORTED.
Advertise NETDEV_XDP_ACT_BASIC in xdp_features.
Signed-off-by: Nicolai Buchwitz <nb@tipi-net.de>
---
.../net/ethernet/broadcom/genet/bcmgenet.c | 129 +++++++++++++++---
.../net/ethernet/broadcom/genet/bcmgenet.h | 4 +
2 files changed, 116 insertions(+), 17 deletions(-)
diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet.c b/drivers/net/ethernet/broadcom/genet/bcmgenet.c
index 5bedc18685b0..ee1d4ecc2b87 100644
--- a/drivers/net/ethernet/broadcom/genet/bcmgenet.c
+++ b/drivers/net/ethernet/broadcom/genet/bcmgenet.c
@@ -36,6 +36,8 @@
#include <linux/ipv6.h>
#include <linux/phy.h>
#include <linux/bpf.h>
+#include <linux/bpf_trace.h>
+#include <linux/filter.h>
#include <linux/unaligned.h>
@@ -2276,6 +2278,56 @@ static int bcmgenet_rx_refill(struct bcmgenet_rx_ring *ring,
return 0;
}
+static struct sk_buff *bcmgenet_xdp_build_skb(struct bcmgenet_rx_ring *ring,
+ struct xdp_buff *xdp)
+{
+ unsigned int metasize;
+ struct sk_buff *skb;
+
+ skb = napi_build_skb(xdp->data_hard_start, PAGE_SIZE);
+ if (unlikely(!skb))
+ return NULL;
+
+ skb_mark_for_recycle(skb);
+
+ metasize = xdp->data - xdp->data_meta;
+ skb_reserve(skb, xdp->data - xdp->data_hard_start);
+ __skb_put(skb, xdp->data_end - xdp->data);
+
+ if (metasize)
+ skb_metadata_set(skb, metasize);
+
+ return skb;
+}
+
+static unsigned int bcmgenet_run_xdp(struct bcmgenet_rx_ring *ring,
+ struct bpf_prog *prog,
+ struct xdp_buff *xdp,
+ struct page *rx_page)
+{
+ unsigned int act;
+
+ if (!prog)
+ return XDP_PASS;
+
+ act = bpf_prog_run_xdp(prog, xdp);
+
+ switch (act) {
+ case XDP_PASS:
+ return XDP_PASS;
+ case XDP_DROP:
+ page_pool_put_full_page(ring->page_pool, rx_page, true);
+ return XDP_DROP;
+ default:
+ bpf_warn_invalid_xdp_action(ring->priv->dev, prog, act);
+ fallthrough;
+ case XDP_ABORTED:
+ trace_xdp_exception(ring->priv->dev, prog, act);
+ page_pool_put_full_page(ring->page_pool, rx_page, true);
+ return XDP_ABORTED;
+ }
+}
+
/* bcmgenet_desc_rx - descriptor based rx process.
* this could be called from bottom half, or from NAPI polling method.
*/
@@ -2285,6 +2337,7 @@ static unsigned int bcmgenet_desc_rx(struct bcmgenet_rx_ring *ring,
struct bcmgenet_rx_stats64 *stats = &ring->stats64;
struct bcmgenet_priv *priv = ring->priv;
struct net_device *dev = priv->dev;
+ struct bpf_prog *xdp_prog;
struct enet_cb *cb;
struct sk_buff *skb;
u32 dma_length_status;
@@ -2295,6 +2348,8 @@ static unsigned int bcmgenet_desc_rx(struct bcmgenet_rx_ring *ring,
unsigned int p_index, mask;
unsigned int discards;
+ xdp_prog = READ_ONCE(priv->xdp_prog);
+
/* Clear status before servicing to reduce spurious interrupts */
mask = 1 << (UMAC_IRQ1_RX_INTR_SHIFT + ring->index);
bcmgenet_intrl2_1_writel(priv, mask, INTRL2_CPU_CLEAR);
@@ -2326,9 +2381,12 @@ static unsigned int bcmgenet_desc_rx(struct bcmgenet_rx_ring *ring,
(rxpktprocessed < budget)) {
struct status_64 *status;
struct page *rx_page;
+ unsigned int xdp_act;
unsigned int rx_off;
- __be16 rx_csum;
+ struct xdp_buff xdp;
+ __be16 rx_csum = 0;
void *hard_start;
+ int pkt_len;
cb = &priv->rx_cbs[ring->read_ptr];
@@ -2413,30 +2471,34 @@ static unsigned int bcmgenet_desc_rx(struct bcmgenet_rx_ring *ring,
goto next;
} /* error packet */
- /* Build SKB from the page - data starts at hard_start,
- * frame begins after RSB(64) + pad(2) = 66 bytes.
+ pkt_len = len - GENET_RSB_PAD;
+ if (priv->crc_fwd_en)
+ pkt_len -= ETH_FCS_LEN;
+
+ /* Save rx_csum before XDP runs - an XDP program
+ * could overwrite the RSB via bpf_xdp_adjust_head.
*/
- skb = napi_build_skb(hard_start, PAGE_SIZE - XDP_PACKET_HEADROOM);
- if (unlikely(!skb)) {
- BCMGENET_STATS64_INC(stats, dropped);
- page_pool_put_full_page(ring->page_pool, rx_page,
- true);
- goto next;
- }
+ if (dev->features & NETIF_F_RXCSUM)
+ rx_csum = (__force __be16)(status->rx_csum & 0xffff);
- skb_mark_for_recycle(skb);
+ xdp_init_buff(&xdp, PAGE_SIZE, &ring->xdp_rxq);
+ xdp_prepare_buff(&xdp, page_address(rx_page),
+ GENET_RX_HEADROOM, pkt_len, true);
- /* Reserve the RSB + pad, then set the data length */
- skb_reserve(skb, GENET_RSB_PAD);
- __skb_put(skb, len - GENET_RSB_PAD);
+ xdp_act = bcmgenet_run_xdp(ring, xdp_prog, &xdp, rx_page);
+ if (xdp_act != XDP_PASS)
+ goto next;
- if (priv->crc_fwd_en) {
- skb_trim(skb, skb->len - ETH_FCS_LEN);
+ skb = bcmgenet_xdp_build_skb(ring, &xdp);
+ if (unlikely(!skb)) {
+ BCMGENET_STATS64_INC(stats, dropped);
+ page_pool_put_full_page(ring->page_pool,
+ rx_page, true);
+ goto next;
}
/* Set up checksum offload */
if (dev->features & NETIF_F_RXCSUM) {
- rx_csum = (__force __be16)(status->rx_csum & 0xffff);
if (rx_csum) {
skb->csum = (__force __wsum)ntohs(rx_csum);
skb->ip_summed = CHECKSUM_COMPLETE;
@@ -3750,6 +3812,37 @@ static int bcmgenet_change_carrier(struct net_device *dev, bool new_carrier)
return 0;
}
+static int bcmgenet_xdp_setup(struct net_device *dev,
+ struct netdev_bpf *xdp)
+{
+ struct bcmgenet_priv *priv = netdev_priv(dev);
+ struct bpf_prog *old_prog;
+ struct bpf_prog *prog = xdp->prog;
+
+ if (prog && dev->mtu > PAGE_SIZE - GENET_RX_HEADROOM -
+ SKB_DATA_ALIGN(sizeof(struct skb_shared_info))) {
+ NL_SET_ERR_MSG_MOD(xdp->extack,
+ "MTU too large for single-page XDP buffer");
+ return -EOPNOTSUPP;
+ }
+
+ old_prog = xchg(&priv->xdp_prog, prog);
+ if (old_prog)
+ bpf_prog_put(old_prog);
+
+ return 0;
+}
+
+static int bcmgenet_xdp(struct net_device *dev, struct netdev_bpf *xdp)
+{
+ switch (xdp->command) {
+ case XDP_SETUP_PROG:
+ return bcmgenet_xdp_setup(dev, xdp);
+ default:
+ return -EOPNOTSUPP;
+ }
+}
+
static const struct net_device_ops bcmgenet_netdev_ops = {
.ndo_open = bcmgenet_open,
.ndo_stop = bcmgenet_close,
@@ -3761,6 +3854,7 @@ static const struct net_device_ops bcmgenet_netdev_ops = {
.ndo_set_features = bcmgenet_set_features,
.ndo_get_stats64 = bcmgenet_get_stats64,
.ndo_change_carrier = bcmgenet_change_carrier,
+ .ndo_bpf = bcmgenet_xdp,
};
/* GENET hardware parameters/characteristics */
@@ -4063,6 +4157,7 @@ static int bcmgenet_probe(struct platform_device *pdev)
NETIF_F_RXCSUM;
dev->hw_features |= dev->features;
dev->vlan_features |= dev->features;
+ dev->xdp_features = NETDEV_XDP_ACT_BASIC;
netdev_sw_irq_coalesce_default_on(dev);
diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet.h b/drivers/net/ethernet/broadcom/genet/bcmgenet.h
index da7b7fee896f..3d65f0e4b4b4 100644
--- a/drivers/net/ethernet/broadcom/genet/bcmgenet.h
+++ b/drivers/net/ethernet/broadcom/genet/bcmgenet.h
@@ -16,6 +16,7 @@
#include <linux/dim.h>
#include <linux/ethtool.h>
#include <net/page_pool/helpers.h>
+#include <linux/bpf.h>
#include <net/xdp.h>
#include "../unimac.h"
@@ -670,6 +671,9 @@ struct bcmgenet_priv {
u8 sopass[SOPASS_MAX];
struct bcmgenet_mib_counters mib;
+
+ /* XDP */
+ struct bpf_prog *xdp_prog;
};
static inline bool bcmgenet_has_40bits(struct bcmgenet_priv *priv)
--
2.51.0
^ permalink raw reply related
* [PATCH net-next v9 1/7] net: bcmgenet: convert RX path to page_pool
From: Nicolai Buchwitz @ 2026-05-06 9:55 UTC (permalink / raw)
To: netdev
Cc: Justin Chen, Simon Horman, Mohsin Bashir, Doug Berger,
Florian Fainelli, Broadcom internal kernel review list,
Andrew Lunn, Eric Dumazet, Paolo Abeni, Nicolai Buchwitz,
David S. Miller, Jakub Kicinski, Bhargava Marreddy, Vikas Gupta,
Rajashekar Hudumula, Eric Biggers, linux-kernel, bpf
In-Reply-To: <20260506095553.55357-1-nb@tipi-net.de>
Replace the per-packet __netdev_alloc_skb() + dma_map_single() in the
RX path with page_pool, which provides efficient page recycling and
DMA mapping management. This is a prerequisite for XDP support (which
requires stable page-backed buffers rather than SKB linear data).
Key changes:
- Create a page_pool per RX ring (PP_FLAG_DMA_MAP | PP_FLAG_DMA_SYNC_DEV)
- bcmgenet_rx_refill() allocates pages via page_pool_alloc_pages()
- bcmgenet_desc_rx() builds SKBs from pages via napi_build_skb() with
skb_mark_for_recycle() for automatic page_pool return
- Buffer layout reserves XDP_PACKET_HEADROOM (256 bytes) before the HW
RSB (64 bytes) + alignment pad (2 bytes) for future XDP headroom
Signed-off-by: Nicolai Buchwitz <nb@tipi-net.de>
---
drivers/net/ethernet/broadcom/Kconfig | 1 +
.../net/ethernet/broadcom/genet/bcmgenet.c | 232 +++++++++++-------
.../net/ethernet/broadcom/genet/bcmgenet.h | 5 +-
3 files changed, 154 insertions(+), 84 deletions(-)
diff --git a/drivers/net/ethernet/broadcom/Kconfig b/drivers/net/ethernet/broadcom/Kconfig
index 4287edc7ddd6..f0bac0dd1439 100644
--- a/drivers/net/ethernet/broadcom/Kconfig
+++ b/drivers/net/ethernet/broadcom/Kconfig
@@ -78,6 +78,7 @@ config BCMGENET
select BCM7XXX_PHY
select MDIO_BCM_UNIMAC
select DIMLIB
+ select PAGE_POOL
select BROADCOM_PHY if ARCH_BCM2835
help
This driver supports the built-in Ethernet MACs found in the
diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet.c b/drivers/net/ethernet/broadcom/genet/bcmgenet.c
index 54f71b1e85fc..df11c4977e8f 100644
--- a/drivers/net/ethernet/broadcom/genet/bcmgenet.c
+++ b/drivers/net/ethernet/broadcom/genet/bcmgenet.c
@@ -35,6 +35,7 @@
#include <linux/ip.h>
#include <linux/ipv6.h>
#include <linux/phy.h>
+#include <linux/bpf.h>
#include <linux/unaligned.h>
@@ -52,6 +53,13 @@
#define RX_BUF_LENGTH 2048
#define SKB_ALIGNMENT 32
+/* Page pool RX buffer layout:
+ * XDP_PACKET_HEADROOM | RSB(64) + pad(2) | frame data | skb_shared_info
+ * The HW writes the 64B RSB + 2B alignment padding before the frame.
+ */
+#define GENET_RSB_PAD (sizeof(struct status_64) + 2)
+#define GENET_RX_HEADROOM (XDP_PACKET_HEADROOM + GENET_RSB_PAD)
+
/* Tx/Rx DMA register offset, skip 256 descriptors */
#define WORDS_PER_BD(p) (p->hw_params->words_per_bd)
#define DMA_DESC_SIZE (WORDS_PER_BD(priv) * sizeof(u32))
@@ -1895,21 +1903,13 @@ static struct sk_buff *bcmgenet_free_tx_cb(struct device *dev,
}
/* Simple helper to free a receive control block's resources */
-static struct sk_buff *bcmgenet_free_rx_cb(struct device *dev,
- struct enet_cb *cb)
+static void bcmgenet_free_rx_cb(struct enet_cb *cb,
+ struct page_pool *pool)
{
- struct sk_buff *skb;
-
- skb = cb->skb;
- cb->skb = NULL;
-
- if (dma_unmap_addr(cb, dma_addr)) {
- dma_unmap_single(dev, dma_unmap_addr(cb, dma_addr),
- dma_unmap_len(cb, dma_len), DMA_FROM_DEVICE);
- dma_unmap_addr_set(cb, dma_addr, 0);
+ if (cb->rx_page) {
+ page_pool_put_full_page(pool, cb->rx_page, false);
+ cb->rx_page = NULL;
}
-
- return skb;
}
/* Unlocked version of the reclaim routine */
@@ -2250,46 +2250,30 @@ static netdev_tx_t bcmgenet_xmit(struct sk_buff *skb, struct net_device *dev)
goto out;
}
-static struct sk_buff *bcmgenet_rx_refill(struct bcmgenet_priv *priv,
- struct enet_cb *cb)
+static int bcmgenet_rx_refill(struct bcmgenet_rx_ring *ring,
+ struct enet_cb *cb)
{
- struct device *kdev = &priv->pdev->dev;
- struct sk_buff *skb;
- struct sk_buff *rx_skb;
+ struct bcmgenet_priv *priv = ring->priv;
dma_addr_t mapping;
+ struct page *page;
- /* Allocate a new Rx skb */
- skb = __netdev_alloc_skb(priv->dev, priv->rx_buf_len + SKB_ALIGNMENT,
- GFP_ATOMIC | __GFP_NOWARN);
- if (!skb) {
+ page = page_pool_alloc_pages(ring->page_pool,
+ GFP_ATOMIC);
+ if (!page) {
priv->mib.alloc_rx_buff_failed++;
netif_err(priv, rx_err, priv->dev,
- "%s: Rx skb allocation failed\n", __func__);
- return NULL;
- }
-
- /* DMA-map the new Rx skb */
- mapping = dma_map_single(kdev, skb->data, priv->rx_buf_len,
- DMA_FROM_DEVICE);
- if (dma_mapping_error(kdev, mapping)) {
- priv->mib.rx_dma_failed++;
- dev_kfree_skb_any(skb);
- netif_err(priv, rx_err, priv->dev,
- "%s: Rx skb DMA mapping failed\n", __func__);
- return NULL;
+ "%s: Rx page allocation failed\n", __func__);
+ return -ENOMEM;
}
- /* Grab the current Rx skb from the ring and DMA-unmap it */
- rx_skb = bcmgenet_free_rx_cb(kdev, cb);
+ /* page_pool handles DMA mapping via PP_FLAG_DMA_MAP */
+ mapping = page_pool_get_dma_addr(page) + XDP_PACKET_HEADROOM;
- /* Put the new Rx skb on the ring */
- cb->skb = skb;
- dma_unmap_addr_set(cb, dma_addr, mapping);
- dma_unmap_len_set(cb, dma_len, priv->rx_buf_len);
+ cb->rx_page = page;
+ cb->rx_page_offset = XDP_PACKET_HEADROOM;
dmadesc_set_addr(priv, cb->bd_addr, mapping);
- /* Return the current Rx skb to caller */
- return rx_skb;
+ return 0;
}
/* bcmgenet_desc_rx - descriptor based rx process.
@@ -2341,25 +2325,29 @@ static unsigned int bcmgenet_desc_rx(struct bcmgenet_rx_ring *ring,
while ((rxpktprocessed < rxpkttoprocess) &&
(rxpktprocessed < budget)) {
struct status_64 *status;
+ struct page *rx_page;
+ unsigned int rx_off;
__be16 rx_csum;
+ void *hard_start;
cb = &priv->rx_cbs[ring->read_ptr];
- skb = bcmgenet_rx_refill(priv, cb);
- if (unlikely(!skb)) {
+ /* Save the received page before refilling */
+ rx_page = cb->rx_page;
+ rx_off = cb->rx_page_offset;
+
+ if (bcmgenet_rx_refill(ring, cb)) {
BCMGENET_STATS64_INC(stats, dropped);
goto next;
}
- status = (struct status_64 *)skb->data;
+ /* Sync the RSB first to read the frame length */
+ page_pool_dma_sync_for_cpu(ring->page_pool, rx_page, 0,
+ sizeof(struct status_64));
+
+ hard_start = page_address(rx_page) + rx_off;
+ status = (struct status_64 *)hard_start;
dma_length_status = status->length_status;
- if (dev->features & NETIF_F_RXCSUM) {
- rx_csum = (__force __be16)(status->rx_csum & 0xffff);
- if (rx_csum) {
- skb->csum = (__force __wsum)ntohs(rx_csum);
- skb->ip_summed = CHECKSUM_COMPLETE;
- }
- }
/* DMA flags and length are still valid no matter how
* we got the Receive Status Vector (64B RSB or register)
@@ -2367,15 +2355,23 @@ static unsigned int bcmgenet_desc_rx(struct bcmgenet_rx_ring *ring,
dma_flag = dma_length_status & 0xffff;
len = dma_length_status >> DMA_BUFLENGTH_SHIFT;
+ /* Sync the rest of the actual received frame */
+ if (len > sizeof(struct status_64))
+ page_pool_dma_sync_for_cpu(ring->page_pool, rx_page,
+ sizeof(struct status_64),
+ len - sizeof(struct status_64));
+
netif_dbg(priv, rx_status, dev,
"%s:p_ind=%d c_ind=%d read_ptr=%d len_stat=0x%08x\n",
__func__, p_index, ring->c_index,
ring->read_ptr, dma_length_status);
- if (unlikely(len > RX_BUF_LENGTH)) {
- netif_err(priv, rx_status, dev, "oversized packet\n");
+ if (unlikely(len > RX_BUF_LENGTH || len < GENET_RSB_PAD)) {
+ netif_err(priv, rx_status, dev,
+ "invalid packet length %d\n", len);
BCMGENET_STATS64_INC(stats, length_errors);
- dev_kfree_skb_any(skb);
+ page_pool_put_full_page(ring->page_pool, rx_page,
+ true);
goto next;
}
@@ -2383,7 +2379,8 @@ static unsigned int bcmgenet_desc_rx(struct bcmgenet_rx_ring *ring,
netif_err(priv, rx_status, dev,
"dropping fragmented packet!\n");
BCMGENET_STATS64_INC(stats, fragmented_errors);
- dev_kfree_skb_any(skb);
+ page_pool_put_full_page(ring->page_pool, rx_page,
+ true);
goto next;
}
@@ -2411,24 +2408,47 @@ static unsigned int bcmgenet_desc_rx(struct bcmgenet_rx_ring *ring,
DMA_RX_RXER)) == DMA_RX_RXER)
u64_stats_inc(&stats->errors);
u64_stats_update_end(&stats->syncp);
- dev_kfree_skb_any(skb);
+ page_pool_put_full_page(ring->page_pool, rx_page,
+ true);
goto next;
} /* error packet */
- skb_put(skb, len);
+ /* Build SKB from the page - data starts at hard_start,
+ * frame begins after RSB(64) + pad(2) = 66 bytes.
+ */
+ skb = napi_build_skb(hard_start, PAGE_SIZE - XDP_PACKET_HEADROOM);
+ if (unlikely(!skb)) {
+ BCMGENET_STATS64_INC(stats, dropped);
+ page_pool_put_full_page(ring->page_pool, rx_page,
+ true);
+ goto next;
+ }
+
+ skb_mark_for_recycle(skb);
- /* remove RSB and hardware 2bytes added for IP alignment */
- skb_pull(skb, 66);
- len -= 66;
+ /* Reserve the RSB + pad, then set the data length */
+ skb_reserve(skb, GENET_RSB_PAD);
+ __skb_put(skb, len - GENET_RSB_PAD);
if (priv->crc_fwd_en) {
- skb_trim(skb, len - ETH_FCS_LEN);
- len -= ETH_FCS_LEN;
+ skb_trim(skb, skb->len - ETH_FCS_LEN);
}
+ /* Set up checksum offload */
+ if (dev->features & NETIF_F_RXCSUM) {
+ rx_csum = (__force __be16)(status->rx_csum & 0xffff);
+ if (rx_csum) {
+ skb->csum = (__force __wsum)ntohs(rx_csum);
+ skb->ip_summed = CHECKSUM_COMPLETE;
+ }
+ }
+
+ len = skb->len;
bytes_processed += len;
- /*Finish setting up the received SKB and send it to the kernel*/
+ /* Finish setting up the received SKB and send it to the
+ * kernel.
+ */
skb->protocol = eth_type_trans(skb, priv->dev);
u64_stats_update_begin(&stats->syncp);
@@ -2497,12 +2517,11 @@ static void bcmgenet_dim_work(struct work_struct *work)
dim->state = DIM_START_MEASURE;
}
-/* Assign skb to RX DMA descriptor. */
+/* Assign page_pool pages to RX DMA descriptors. */
static int bcmgenet_alloc_rx_buffers(struct bcmgenet_priv *priv,
struct bcmgenet_rx_ring *ring)
{
struct enet_cb *cb;
- struct sk_buff *skb;
int i;
netif_dbg(priv, hw, priv->dev, "%s\n", __func__);
@@ -2510,10 +2529,7 @@ static int bcmgenet_alloc_rx_buffers(struct bcmgenet_priv *priv,
/* loop here for each buffer needing assign */
for (i = 0; i < ring->size; i++) {
cb = ring->cbs + i;
- skb = bcmgenet_rx_refill(priv, cb);
- if (skb)
- dev_consume_skb_any(skb);
- if (!cb->skb)
+ if (bcmgenet_rx_refill(ring, cb))
return -ENOMEM;
}
@@ -2522,16 +2538,18 @@ static int bcmgenet_alloc_rx_buffers(struct bcmgenet_priv *priv,
static void bcmgenet_free_rx_buffers(struct bcmgenet_priv *priv)
{
- struct sk_buff *skb;
+ struct bcmgenet_rx_ring *ring;
struct enet_cb *cb;
- int i;
-
- for (i = 0; i < priv->num_rx_bds; i++) {
- cb = &priv->rx_cbs[i];
+ int q, i;
- skb = bcmgenet_free_rx_cb(&priv->pdev->dev, cb);
- if (skb)
- dev_consume_skb_any(skb);
+ for (q = 0; q <= priv->hw_params->rx_queues; q++) {
+ ring = &priv->rx_rings[q];
+ if (!ring->page_pool)
+ continue;
+ for (i = 0; i < ring->size; i++) {
+ cb = ring->cbs + i;
+ bcmgenet_free_rx_cb(cb, ring->page_pool);
+ }
}
}
@@ -2749,6 +2767,31 @@ static void bcmgenet_init_tx_ring(struct bcmgenet_priv *priv,
netif_napi_add_tx(priv->dev, &ring->napi, bcmgenet_tx_poll);
}
+static int bcmgenet_rx_ring_create_pool(struct bcmgenet_priv *priv,
+ struct bcmgenet_rx_ring *ring)
+{
+ struct page_pool_params pp_params = {
+ .order = 0,
+ .flags = PP_FLAG_DMA_MAP | PP_FLAG_DMA_SYNC_DEV,
+ .pool_size = ring->size,
+ .nid = NUMA_NO_NODE,
+ .dev = &priv->pdev->dev,
+ .dma_dir = DMA_FROM_DEVICE,
+ .offset = XDP_PACKET_HEADROOM,
+ .max_len = RX_BUF_LENGTH,
+ };
+ int err;
+
+ ring->page_pool = page_pool_create(&pp_params);
+ if (IS_ERR(ring->page_pool)) {
+ err = PTR_ERR(ring->page_pool);
+ ring->page_pool = NULL;
+ return err;
+ }
+
+ return 0;
+}
+
/* Initialize a RDMA ring */
static int bcmgenet_init_rx_ring(struct bcmgenet_priv *priv,
unsigned int index, unsigned int size,
@@ -2756,7 +2799,7 @@ static int bcmgenet_init_rx_ring(struct bcmgenet_priv *priv,
{
struct bcmgenet_rx_ring *ring = &priv->rx_rings[index];
u32 words_per_bd = WORDS_PER_BD(priv);
- int ret;
+ int ret, i;
ring->priv = priv;
ring->index = index;
@@ -2767,10 +2810,19 @@ static int bcmgenet_init_rx_ring(struct bcmgenet_priv *priv,
ring->cb_ptr = start_ptr;
ring->end_ptr = end_ptr - 1;
- ret = bcmgenet_alloc_rx_buffers(priv, ring);
+ ret = bcmgenet_rx_ring_create_pool(priv, ring);
if (ret)
return ret;
+ ret = bcmgenet_alloc_rx_buffers(priv, ring);
+ if (ret) {
+ for (i = 0; i < ring->size; i++)
+ bcmgenet_free_rx_cb(ring->cbs + i, ring->page_pool);
+ page_pool_destroy(ring->page_pool);
+ ring->page_pool = NULL;
+ return ret;
+ }
+
bcmgenet_init_dim(ring, bcmgenet_dim_work);
bcmgenet_init_rx_coalesce(ring);
@@ -2963,6 +3015,20 @@ static void bcmgenet_fini_rx_napi(struct bcmgenet_priv *priv)
}
}
+static void bcmgenet_destroy_rx_page_pools(struct bcmgenet_priv *priv)
+{
+ struct bcmgenet_rx_ring *ring;
+ unsigned int i;
+
+ for (i = 0; i <= priv->hw_params->rx_queues; ++i) {
+ ring = &priv->rx_rings[i];
+ if (ring->page_pool) {
+ page_pool_destroy(ring->page_pool);
+ ring->page_pool = NULL;
+ }
+ }
+}
+
/* Initialize Rx queues
*
* Queues 0-15 are priority queues. Hardware Filtering Block (HFB) can be
@@ -3034,6 +3100,7 @@ static void bcmgenet_fini_dma(struct bcmgenet_priv *priv)
}
bcmgenet_free_rx_buffers(priv);
+ bcmgenet_destroy_rx_page_pools(priv);
kfree(priv->rx_cbs);
kfree(priv->tx_cbs);
}
@@ -3110,6 +3177,7 @@ static int bcmgenet_init_dma(struct bcmgenet_priv *priv, bool flush_rx)
if (ret) {
netdev_err(priv->dev, "failed to initialize Rx queues\n");
bcmgenet_free_rx_buffers(priv);
+ bcmgenet_destroy_rx_page_pools(priv);
kfree(priv->rx_cbs);
kfree(priv->tx_cbs);
return ret;
@@ -4027,8 +4095,6 @@ static int bcmgenet_probe(struct platform_device *pdev)
/* Mii wait queue */
init_waitqueue_head(&priv->wq);
- /* Always use RX_BUF_LENGTH (2KB) buffer for all chips */
- priv->rx_buf_len = RX_BUF_LENGTH;
INIT_WORK(&priv->bcmgenet_irq_work, bcmgenet_irq_task);
priv->clk_wol = devm_clk_get_optional(&priv->pdev->dev, "enet-wol");
diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet.h b/drivers/net/ethernet/broadcom/genet/bcmgenet.h
index 9e4110c7fdf6..7203bde37b78 100644
--- a/drivers/net/ethernet/broadcom/genet/bcmgenet.h
+++ b/drivers/net/ethernet/broadcom/genet/bcmgenet.h
@@ -15,6 +15,7 @@
#include <linux/phy.h>
#include <linux/dim.h>
#include <linux/ethtool.h>
+#include <net/page_pool/helpers.h>
#include "../unimac.h"
@@ -469,6 +470,8 @@ struct bcmgenet_rx_stats64 {
struct enet_cb {
struct sk_buff *skb;
+ struct page *rx_page;
+ unsigned int rx_page_offset;
void __iomem *bd_addr;
DEFINE_DMA_UNMAP_ADDR(dma_addr);
DEFINE_DMA_UNMAP_LEN(dma_len);
@@ -575,6 +578,7 @@ struct bcmgenet_rx_ring {
struct bcmgenet_net_dim dim;
u32 rx_max_coalesced_frames;
u32 rx_coalesce_usecs;
+ struct page_pool *page_pool;
struct bcmgenet_priv *priv;
};
@@ -609,7 +613,6 @@ struct bcmgenet_priv {
void __iomem *rx_bds;
struct enet_cb *rx_cbs;
unsigned int num_rx_bds;
- unsigned int rx_buf_len;
struct bcmgenet_rxnfc_rule rxnfc_rules[MAX_NUM_OF_FS_RULES];
struct list_head rxnfc_list;
--
2.51.0
^ permalink raw reply related
* [PATCH net-next v9 4/7] net: bcmgenet: add XDP_TX support
From: Nicolai Buchwitz @ 2026-05-06 9:55 UTC (permalink / raw)
To: netdev
Cc: Justin Chen, Simon Horman, Mohsin Bashir, Doug Berger,
Florian Fainelli, Broadcom internal kernel review list,
Andrew Lunn, Eric Dumazet, Paolo Abeni, Nicolai Buchwitz,
David S. Miller, Jakub Kicinski, Alexei Starovoitov,
Daniel Borkmann, Jesper Dangaard Brouer, John Fastabend,
Stanislav Fomichev, linux-kernel, bpf
In-Reply-To: <20260506095553.55357-1-nb@tipi-net.de>
Implement XDP_TX using ring 16 (DESC_INDEX), the hardware default
descriptor ring, dedicated to XDP TX for isolation from SKB TX queues.
Ring 16 gets 32 BDs carved from ring 0's allocation. TX completion is
piggybacked on RX NAPI poll since ring 16's INTRL2_1 bit collides with
RX ring 0, similar to how bnxt, ice, and other XDP drivers handle TX
completion within the RX poll path.
The GENET MAC has TBUF_64B_EN set globally, requiring every TX buffer
to start with a 64-byte struct status_64 (TSB). For local XDP_TX, the
TSB is prepended by backing xdp->data into the RSB area (unused after
BPF execution) and zeroing it. For foreign frames redirected from other
devices, the TSB is written into the xdp_frame headroom.
The page_pool DMA direction is changed from DMA_FROM_DEVICE to
DMA_BIDIRECTIONAL to allow TX reuse of the existing DMA mapping.
Signed-off-by: Nicolai Buchwitz <nb@tipi-net.de>
---
.../net/ethernet/broadcom/genet/bcmgenet.c | 231 ++++++++++++++++--
.../net/ethernet/broadcom/genet/bcmgenet.h | 3 +
2 files changed, 211 insertions(+), 23 deletions(-)
diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet.c b/drivers/net/ethernet/broadcom/genet/bcmgenet.c
index ee1d4ecc2b87..f1e515526787 100644
--- a/drivers/net/ethernet/broadcom/genet/bcmgenet.c
+++ b/drivers/net/ethernet/broadcom/genet/bcmgenet.c
@@ -49,8 +49,10 @@
#define GENET_Q0_RX_BD_CNT \
(TOTAL_DESC - priv->hw_params->rx_queues * priv->hw_params->rx_bds_per_q)
+#define GENET_Q16_TX_BD_CNT 32
#define GENET_Q0_TX_BD_CNT \
- (TOTAL_DESC - priv->hw_params->tx_queues * priv->hw_params->tx_bds_per_q)
+ (TOTAL_DESC - priv->hw_params->tx_queues * priv->hw_params->tx_bds_per_q \
+ - GENET_Q16_TX_BD_CNT)
#define RX_BUF_LENGTH 2048
#define SKB_ALIGNMENT 32
@@ -1893,6 +1895,14 @@ static struct sk_buff *bcmgenet_free_tx_cb(struct device *dev,
if (cb == GENET_CB(skb)->last_cb)
return skb;
+ } else if (cb->xdpf) {
+ if (cb->xdp_dma_map)
+ dma_unmap_single(dev, dma_unmap_addr(cb, dma_addr),
+ dma_unmap_len(cb, dma_len),
+ DMA_TO_DEVICE);
+ dma_unmap_addr_set(cb, dma_addr, 0);
+ xdp_return_frame(cb->xdpf);
+ cb->xdpf = NULL;
} else if (dma_unmap_addr(cb, dma_addr)) {
dma_unmap_page(dev,
dma_unmap_addr(cb, dma_addr),
@@ -1925,10 +1935,16 @@ static unsigned int __bcmgenet_tx_reclaim(struct net_device *dev,
unsigned int pkts_compl = 0;
unsigned int txbds_ready;
unsigned int c_index;
+ struct enet_cb *tx_cb;
struct sk_buff *skb;
- /* Clear status before servicing to reduce spurious interrupts */
- bcmgenet_intrl2_1_writel(priv, (1 << ring->index), INTRL2_CPU_CLEAR);
+ /* Clear status before servicing to reduce spurious interrupts.
+ * Ring DESC_INDEX (XDP TX) has no interrupt; skip the clear to
+ * avoid clobbering RX ring 0's bit at the same position.
+ */
+ if (ring->index != DESC_INDEX)
+ bcmgenet_intrl2_1_writel(priv, BIT(ring->index),
+ INTRL2_CPU_CLEAR);
/* Compute how many buffers are transmitted since last xmit call */
c_index = bcmgenet_tdma_ring_readl(priv, ring->index, TDMA_CONS_INDEX)
@@ -1941,8 +1957,15 @@ static unsigned int __bcmgenet_tx_reclaim(struct net_device *dev,
/* Reclaim transmitted buffers */
while (txbds_processed < txbds_ready) {
- skb = bcmgenet_free_tx_cb(&priv->pdev->dev,
- &priv->tx_cbs[ring->clean_ptr]);
+ tx_cb = &priv->tx_cbs[ring->clean_ptr];
+ if (tx_cb->xdpf) {
+ pkts_compl++;
+ bytes_compl += tx_cb->xdp_dma_map
+ ? tx_cb->xdpf->len
+ : tx_cb->xdpf->len -
+ sizeof(struct status_64);
+ }
+ skb = bcmgenet_free_tx_cb(&priv->pdev->dev, tx_cb);
if (skb) {
pkts_compl++;
bytes_compl += GENET_CB(skb)->bytes_sent;
@@ -1964,8 +1987,11 @@ static unsigned int __bcmgenet_tx_reclaim(struct net_device *dev,
u64_stats_add(&stats->bytes, bytes_compl);
u64_stats_update_end(&stats->syncp);
- netdev_tx_completed_queue(netdev_get_tx_queue(dev, ring->index),
- pkts_compl, bytes_compl);
+ /* Ring DESC_INDEX (XDP TX) has no netdev TX queue; skip BQL */
+ if (ring->index != DESC_INDEX)
+ netdev_tx_completed_queue(netdev_get_tx_queue(dev,
+ ring->index),
+ pkts_compl, bytes_compl);
return txbds_processed;
}
@@ -1999,7 +2025,10 @@ static unsigned int bcmgenet_tx_reclaim(struct net_device *dev,
}
if (skb)
dev_consume_skb_any(skb);
- netdev_tx_reset_queue(netdev_get_tx_queue(dev, ring->index));
+ /* Ring DESC_INDEX (XDP TX) has no netdev TX queue; skip BQL */
+ if (ring->index != DESC_INDEX)
+ netdev_tx_reset_queue(netdev_get_tx_queue(dev,
+ ring->index));
bcmgenet_tdma_ring_writel(priv, ring->index,
ring->prod_index, TDMA_PROD_INDEX);
wr_ptr = ring->write_ptr * WORDS_PER_BD(priv);
@@ -2044,6 +2073,9 @@ static void bcmgenet_tx_reclaim_all(struct net_device *dev)
do {
bcmgenet_tx_reclaim(dev, &priv->tx_rings[i++], true);
} while (i <= priv->hw_params->tx_queues && netif_is_multiqueue(dev));
+
+ /* Also reclaim XDP TX ring */
+ bcmgenet_tx_reclaim(dev, &priv->xdp_tx_ring, true);
}
/* Reallocate the SKB to put enough headroom in front of it and insert
@@ -2300,11 +2332,96 @@ static struct sk_buff *bcmgenet_xdp_build_skb(struct bcmgenet_rx_ring *ring,
return skb;
}
+static bool bcmgenet_xdp_xmit_frame(struct bcmgenet_priv *priv,
+ struct xdp_frame *xdpf, bool dma_map)
+{
+ struct bcmgenet_tx_ring *ring = &priv->xdp_tx_ring;
+ struct device *kdev = &priv->pdev->dev;
+ struct enet_cb *tx_cb_ptr;
+ dma_addr_t mapping;
+ unsigned int dma_len;
+ u32 len_stat;
+
+ spin_lock(&ring->lock);
+
+ if (ring->free_bds < 1) {
+ spin_unlock(&ring->lock);
+ return false;
+ }
+
+ tx_cb_ptr = bcmgenet_get_txcb(priv, ring);
+
+ if (dma_map) {
+ void *tsb_start;
+
+ /* The GENET MAC has TBUF_64B_EN set globally, so hardware
+ * expects a 64-byte TSB prefix on every TX buffer. For
+ * redirected frames (ndo_xdp_xmit) we prepend a zeroed TSB
+ * using the frame's headroom.
+ */
+ if (unlikely(xdpf->headroom < sizeof(struct status_64))) {
+ bcmgenet_put_txcb(priv, ring);
+ spin_unlock(&ring->lock);
+ return false;
+ }
+
+ tsb_start = xdpf->data - sizeof(struct status_64);
+ memset(tsb_start, 0, sizeof(struct status_64));
+
+ dma_len = xdpf->len + sizeof(struct status_64);
+ mapping = dma_map_single(kdev, tsb_start, dma_len,
+ DMA_TO_DEVICE);
+ if (dma_mapping_error(kdev, mapping)) {
+ tx_cb_ptr->skb = NULL;
+ tx_cb_ptr->xdpf = NULL;
+ bcmgenet_put_txcb(priv, ring);
+ spin_unlock(&ring->lock);
+ return false;
+ }
+ } else {
+ struct page *page = virt_to_page(xdpf->data);
+
+ /* For local XDP_TX the caller already prepended the TSB
+ * into xdpf->data/len, so dma_len == xdpf->len.
+ */
+ dma_len = xdpf->len;
+ mapping = page_pool_get_dma_addr(page) +
+ sizeof(*xdpf) + xdpf->headroom;
+ dma_sync_single_for_device(kdev, mapping, dma_len,
+ DMA_BIDIRECTIONAL);
+ }
+
+ dma_unmap_addr_set(tx_cb_ptr, dma_addr, mapping);
+ dma_unmap_len_set(tx_cb_ptr, dma_len, dma_len);
+ tx_cb_ptr->skb = NULL;
+ tx_cb_ptr->xdpf = xdpf;
+ tx_cb_ptr->xdp_dma_map = dma_map;
+
+ len_stat = (dma_len << DMA_BUFLENGTH_SHIFT) |
+ (priv->hw_params->qtag_mask << DMA_TX_QTAG_SHIFT) |
+ DMA_TX_APPEND_CRC | DMA_SOP | DMA_EOP;
+
+ dmadesc_set(priv, tx_cb_ptr->bd_addr, mapping, len_stat);
+
+ ring->free_bds--;
+ ring->prod_index++;
+ ring->prod_index &= DMA_P_INDEX_MASK;
+
+ bcmgenet_tdma_ring_writel(priv, ring->index, ring->prod_index,
+ TDMA_PROD_INDEX);
+
+ spin_unlock(&ring->lock);
+
+ return true;
+}
+
static unsigned int bcmgenet_run_xdp(struct bcmgenet_rx_ring *ring,
struct bpf_prog *prog,
struct xdp_buff *xdp,
struct page *rx_page)
{
+ struct bcmgenet_priv *priv = ring->priv;
+ struct xdp_frame *xdpf;
unsigned int act;
if (!prog)
@@ -2315,14 +2432,42 @@ static unsigned int bcmgenet_run_xdp(struct bcmgenet_rx_ring *ring,
switch (act) {
case XDP_PASS:
return XDP_PASS;
+ case XDP_TX:
+ /* Prepend a zeroed TSB (Transmit Status Block). The GENET
+ * MAC has TBUF_64B_EN set globally, so hardware expects every
+ * TX buffer to begin with a 64-byte struct status_64. Back
+ * up xdp->data into the RSB area (which is no longer needed
+ * after the BPF program ran) and zero it.
+ */
+ if (xdp->data - xdp->data_hard_start <
+ sizeof(struct status_64) + sizeof(struct xdp_frame)) {
+ page_pool_put_full_page(ring->page_pool, rx_page,
+ true);
+ return XDP_DROP;
+ }
+ xdp->data -= sizeof(struct status_64);
+ xdp->data_meta -= sizeof(struct status_64);
+ memset(xdp->data, 0, sizeof(struct status_64));
+
+ xdpf = xdp_convert_buff_to_frame(xdp);
+ if (unlikely(!xdpf)) {
+ page_pool_put_full_page(ring->page_pool, rx_page,
+ true);
+ return XDP_DROP;
+ }
+ if (unlikely(!bcmgenet_xdp_xmit_frame(priv, xdpf, false))) {
+ xdp_return_frame_rx_napi(xdpf);
+ return XDP_DROP;
+ }
+ return XDP_TX;
case XDP_DROP:
page_pool_put_full_page(ring->page_pool, rx_page, true);
return XDP_DROP;
default:
- bpf_warn_invalid_xdp_action(ring->priv->dev, prog, act);
+ bpf_warn_invalid_xdp_action(priv->dev, prog, act);
fallthrough;
case XDP_ABORTED:
- trace_xdp_exception(ring->priv->dev, prog, act);
+ trace_xdp_exception(priv->dev, prog, act);
page_pool_put_full_page(ring->page_pool, rx_page, true);
return XDP_ABORTED;
}
@@ -2548,9 +2693,15 @@ static int bcmgenet_rx_poll(struct napi_struct *napi, int budget)
{
struct bcmgenet_rx_ring *ring = container_of(napi,
struct bcmgenet_rx_ring, napi);
+ struct bcmgenet_priv *priv = ring->priv;
struct dim_sample dim_sample = {};
unsigned int work_done;
+ /* Reclaim completed XDP TX frames (ring 16 has no interrupt) */
+ if (priv->xdp_tx_ring.free_bds < priv->xdp_tx_ring.size)
+ bcmgenet_tx_reclaim(priv->dev,
+ &priv->xdp_tx_ring, false);
+
work_done = bcmgenet_desc_rx(ring, budget);
if (work_done < budget && napi_complete_done(napi, work_done))
@@ -2781,10 +2932,11 @@ static void bcmgenet_init_rx_coalesce(struct bcmgenet_rx_ring *ring)
/* Initialize a Tx ring along with corresponding hardware registers */
static void bcmgenet_init_tx_ring(struct bcmgenet_priv *priv,
+ struct bcmgenet_tx_ring *ring,
unsigned int index, unsigned int size,
- unsigned int start_ptr, unsigned int end_ptr)
+ unsigned int start_ptr,
+ unsigned int end_ptr)
{
- struct bcmgenet_tx_ring *ring = &priv->tx_rings[index];
u32 words_per_bd = WORDS_PER_BD(priv);
u32 flow_period_val = 0;
@@ -2825,8 +2977,11 @@ static void bcmgenet_init_tx_ring(struct bcmgenet_priv *priv,
bcmgenet_tdma_ring_writel(priv, index, end_ptr * words_per_bd - 1,
DMA_END_ADDR);
- /* Initialize Tx NAPI */
- netif_napi_add_tx(priv->dev, &ring->napi, bcmgenet_tx_poll);
+ /* Initialize Tx NAPI for priority queues only; ring DESC_INDEX
+ * (XDP TX) has its completions handled inline in RX NAPI.
+ */
+ if (index != DESC_INDEX)
+ netif_napi_add_tx(priv->dev, &ring->napi, bcmgenet_tx_poll);
}
static int bcmgenet_rx_ring_create_pool(struct bcmgenet_priv *priv,
@@ -2838,7 +2993,7 @@ static int bcmgenet_rx_ring_create_pool(struct bcmgenet_priv *priv,
.pool_size = ring->size,
.nid = NUMA_NO_NODE,
.dev = &priv->pdev->dev,
- .dma_dir = DMA_FROM_DEVICE,
+ .dma_dir = DMA_BIDIRECTIONAL,
.offset = XDP_PACKET_HEADROOM,
.max_len = RX_BUF_LENGTH,
};
@@ -2972,6 +3127,7 @@ static int bcmgenet_tdma_disable(struct bcmgenet_priv *priv)
reg = bcmgenet_tdma_readl(priv, DMA_CTRL);
mask = (1 << (priv->hw_params->tx_queues + 1)) - 1;
+ mask |= BIT(DESC_INDEX);
mask = (mask << DMA_RING_BUF_EN_SHIFT) | DMA_EN;
reg &= ~mask;
bcmgenet_tdma_writel(priv, reg, DMA_CTRL);
@@ -3017,14 +3173,18 @@ static int bcmgenet_rdma_disable(struct bcmgenet_priv *priv)
* with queue 1 being the highest priority queue.
*
* Queue 0 is the default Tx queue with
- * GENET_Q0_TX_BD_CNT = 256 - 4 * 32 = 128 descriptors.
+ * GENET_Q0_TX_BD_CNT = 256 - 4 * 32 - 32 = 96 descriptors.
+ *
+ * Ring 16 (DESC_INDEX) is used for XDP TX with
+ * GENET_Q16_TX_BD_CNT = 32 descriptors.
*
* The transmit control block pool is then partitioned as follows:
- * - Tx queue 0 uses tx_cbs[0..127]
- * - Tx queue 1 uses tx_cbs[128..159]
- * - Tx queue 2 uses tx_cbs[160..191]
- * - Tx queue 3 uses tx_cbs[192..223]
- * - Tx queue 4 uses tx_cbs[224..255]
+ * - Tx queue 0 uses tx_cbs[0..95]
+ * - Tx queue 1 uses tx_cbs[96..127]
+ * - Tx queue 2 uses tx_cbs[128..159]
+ * - Tx queue 3 uses tx_cbs[160..191]
+ * - Tx queue 4 uses tx_cbs[192..223]
+ * - Tx queue 16 uses tx_cbs[224..255]
*/
static void bcmgenet_init_tx_queues(struct net_device *dev)
{
@@ -3037,7 +3197,8 @@ static void bcmgenet_init_tx_queues(struct net_device *dev)
/* Initialize Tx priority queues */
for (i = 0; i <= priv->hw_params->tx_queues; i++) {
- bcmgenet_init_tx_ring(priv, i, end - start, start, end);
+ bcmgenet_init_tx_ring(priv, &priv->tx_rings[i],
+ i, end - start, start, end);
start = end;
end += priv->hw_params->tx_bds_per_q;
dma_priority[DMA_PRIO_REG_INDEX(i)] |=
@@ -3045,13 +3206,21 @@ static void bcmgenet_init_tx_queues(struct net_device *dev)
<< DMA_PRIO_REG_SHIFT(i);
}
+ /* Initialize ring 16 (descriptor ring) for XDP TX */
+ bcmgenet_init_tx_ring(priv, &priv->xdp_tx_ring,
+ DESC_INDEX, GENET_Q16_TX_BD_CNT,
+ TOTAL_DESC - GENET_Q16_TX_BD_CNT, TOTAL_DESC);
+ dma_priority[DMA_PRIO_REG_INDEX(DESC_INDEX)] |=
+ GENET_Q0_PRIORITY << DMA_PRIO_REG_SHIFT(DESC_INDEX);
+
/* Set Tx queue priorities */
bcmgenet_tdma_writel(priv, dma_priority[0], DMA_PRIORITY_0);
bcmgenet_tdma_writel(priv, dma_priority[1], DMA_PRIORITY_1);
bcmgenet_tdma_writel(priv, dma_priority[2], DMA_PRIORITY_2);
- /* Configure Tx queues as descriptor rings */
+ /* Configure Tx queues as descriptor rings, including ring 16 */
ring_mask = (1 << (priv->hw_params->tx_queues + 1)) - 1;
+ ring_mask |= BIT(DESC_INDEX);
bcmgenet_tdma_writel(priv, ring_mask, DMA_RING_CFG);
/* Enable Tx rings */
@@ -3761,6 +3930,21 @@ static void bcmgenet_get_stats64(struct net_device *dev,
stats->tx_dropped += tx_dropped;
}
+ /* Include XDP TX ring (DESC_INDEX) stats */
+ tx_stats = &priv->xdp_tx_ring.stats64;
+ do {
+ start = u64_stats_fetch_begin(&tx_stats->syncp);
+ tx_bytes = u64_stats_read(&tx_stats->bytes);
+ tx_packets = u64_stats_read(&tx_stats->packets);
+ tx_errors = u64_stats_read(&tx_stats->errors);
+ tx_dropped = u64_stats_read(&tx_stats->dropped);
+ } while (u64_stats_fetch_retry(&tx_stats->syncp, start));
+
+ stats->tx_bytes += tx_bytes;
+ stats->tx_packets += tx_packets;
+ stats->tx_errors += tx_errors;
+ stats->tx_dropped += tx_dropped;
+
for (q = 0; q <= priv->hw_params->rx_queues; q++) {
rx_stats = &priv->rx_rings[q].stats64;
do {
@@ -4262,6 +4446,7 @@ static int bcmgenet_probe(struct platform_device *pdev)
u64_stats_init(&priv->rx_rings[i].stats64.syncp);
for (i = 0; i <= priv->hw_params->tx_queues; i++)
u64_stats_init(&priv->tx_rings[i].stats64.syncp);
+ u64_stats_init(&priv->xdp_tx_ring.stats64.syncp);
/* libphy will determine the link state */
netif_carrier_off(dev);
diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet.h b/drivers/net/ethernet/broadcom/genet/bcmgenet.h
index 3d65f0e4b4b4..7e5d9ab0050b 100644
--- a/drivers/net/ethernet/broadcom/genet/bcmgenet.h
+++ b/drivers/net/ethernet/broadcom/genet/bcmgenet.h
@@ -472,6 +472,8 @@ struct bcmgenet_rx_stats64 {
struct enet_cb {
struct sk_buff *skb;
+ struct xdp_frame *xdpf;
+ bool xdp_dma_map;
struct page *rx_page;
unsigned int rx_page_offset;
void __iomem *bd_addr;
@@ -611,6 +613,7 @@ struct bcmgenet_priv {
unsigned int num_tx_bds;
struct bcmgenet_tx_ring tx_rings[GENET_MAX_MQ_CNT + 1];
+ struct bcmgenet_tx_ring xdp_tx_ring;
/* receive variables */
void __iomem *rx_bds;
--
2.51.0
^ permalink raw reply related
* [PATCH net-next v9 6/7] net: bcmgenet: add XDP statistics counters
From: Nicolai Buchwitz @ 2026-05-06 9:55 UTC (permalink / raw)
To: netdev
Cc: Justin Chen, Simon Horman, Mohsin Bashir, Doug Berger,
Florian Fainelli, Broadcom internal kernel review list,
Andrew Lunn, Eric Dumazet, Paolo Abeni, Nicolai Buchwitz,
David S. Miller, Jakub Kicinski, Alexei Starovoitov,
Daniel Borkmann, Jesper Dangaard Brouer, John Fastabend,
Stanislav Fomichev, linux-kernel, bpf
In-Reply-To: <20260506095553.55357-1-nb@tipi-net.de>
Expose per-action XDP counters via ethtool -S: xdp_pass, xdp_drop,
xdp_tx, xdp_tx_err, xdp_redirect, and xdp_redirect_err.
These use the existing soft MIB infrastructure and are incremented in
bcmgenet_run_xdp() alongside the existing driver statistics.
Signed-off-by: Nicolai Buchwitz <nb@tipi-net.de>
---
drivers/net/ethernet/broadcom/genet/bcmgenet.c | 17 +++++++++++++++++
drivers/net/ethernet/broadcom/genet/bcmgenet.h | 7 +++++++
2 files changed, 24 insertions(+)
diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet.c b/drivers/net/ethernet/broadcom/genet/bcmgenet.c
index 4e4fe785f0bf..359a297a25e6 100644
--- a/drivers/net/ethernet/broadcom/genet/bcmgenet.c
+++ b/drivers/net/ethernet/broadcom/genet/bcmgenet.c
@@ -1170,6 +1170,14 @@ static const struct bcmgenet_stats bcmgenet_gstrings_stats[] = {
STAT_GENET_SOFT_MIB("tx_realloc_tsb", mib.tx_realloc_tsb),
STAT_GENET_SOFT_MIB("tx_realloc_tsb_failed",
mib.tx_realloc_tsb_failed),
+ /* XDP counters */
+ STAT_GENET_SOFT_MIB("xdp_pass", mib.xdp_pass),
+ STAT_GENET_SOFT_MIB("xdp_drop", mib.xdp_drop),
+ STAT_GENET_SOFT_MIB("xdp_aborted", mib.xdp_aborted),
+ STAT_GENET_SOFT_MIB("xdp_tx", mib.xdp_tx),
+ STAT_GENET_SOFT_MIB("xdp_tx_err", mib.xdp_tx_err),
+ STAT_GENET_SOFT_MIB("xdp_redirect", mib.xdp_redirect),
+ STAT_GENET_SOFT_MIB("xdp_redirect_err", mib.xdp_redirect_err),
/* Per TX queues */
STAT_GENET_Q(0),
STAT_GENET_Q(1),
@@ -2432,6 +2440,7 @@ static unsigned int bcmgenet_run_xdp(struct bcmgenet_rx_ring *ring,
switch (act) {
case XDP_PASS:
+ priv->mib.xdp_pass++;
return XDP_PASS;
case XDP_TX:
/* Prepend a zeroed TSB (Transmit Status Block). The GENET
@@ -2444,6 +2453,7 @@ static unsigned int bcmgenet_run_xdp(struct bcmgenet_rx_ring *ring,
sizeof(struct status_64) + sizeof(struct xdp_frame)) {
page_pool_put_full_page(ring->page_pool, rx_page,
true);
+ priv->mib.xdp_tx_err++;
return XDP_DROP;
}
xdp->data -= sizeof(struct status_64);
@@ -2454,6 +2464,7 @@ static unsigned int bcmgenet_run_xdp(struct bcmgenet_rx_ring *ring,
if (unlikely(!xdpf)) {
page_pool_put_full_page(ring->page_pool, rx_page,
true);
+ priv->mib.xdp_tx_err++;
return XDP_DROP;
}
@@ -2463,19 +2474,24 @@ static unsigned int bcmgenet_run_xdp(struct bcmgenet_rx_ring *ring,
xdpf, false))) {
spin_unlock(&tx_ring->lock);
xdp_return_frame_rx_napi(xdpf);
+ priv->mib.xdp_tx_err++;
return XDP_DROP;
}
bcmgenet_xdp_ring_doorbell(priv, tx_ring);
spin_unlock(&tx_ring->lock);
+ priv->mib.xdp_tx++;
return XDP_TX;
case XDP_REDIRECT:
if (unlikely(xdp_do_redirect(priv->dev, xdp, prog))) {
+ priv->mib.xdp_redirect_err++;
page_pool_put_full_page(ring->page_pool, rx_page,
true);
return XDP_DROP;
}
+ priv->mib.xdp_redirect++;
return XDP_REDIRECT;
case XDP_DROP:
+ priv->mib.xdp_drop++;
page_pool_put_full_page(ring->page_pool, rx_page, true);
return XDP_DROP;
default:
@@ -2483,6 +2499,7 @@ static unsigned int bcmgenet_run_xdp(struct bcmgenet_rx_ring *ring,
fallthrough;
case XDP_ABORTED:
trace_xdp_exception(priv->dev, prog, act);
+ priv->mib.xdp_aborted++;
page_pool_put_full_page(ring->page_pool, rx_page, true);
return XDP_ABORTED;
}
diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet.h b/drivers/net/ethernet/broadcom/genet/bcmgenet.h
index 7e5d9ab0050b..e5e775a53f6d 100644
--- a/drivers/net/ethernet/broadcom/genet/bcmgenet.h
+++ b/drivers/net/ethernet/broadcom/genet/bcmgenet.h
@@ -156,6 +156,13 @@ struct bcmgenet_mib_counters {
u32 tx_dma_failed;
u32 tx_realloc_tsb;
u32 tx_realloc_tsb_failed;
+ u32 xdp_pass;
+ u32 xdp_drop;
+ u32 xdp_aborted;
+ u32 xdp_tx;
+ u32 xdp_tx_err;
+ u32 xdp_redirect;
+ u32 xdp_redirect_err;
};
struct bcmgenet_tx_stats64 {
--
2.51.0
^ permalink raw reply related
* [PATCH net-next v9 5/7] net: bcmgenet: add XDP_REDIRECT and ndo_xdp_xmit support
From: Nicolai Buchwitz @ 2026-05-06 9:55 UTC (permalink / raw)
To: netdev
Cc: Justin Chen, Simon Horman, Mohsin Bashir, Doug Berger,
Florian Fainelli, Broadcom internal kernel review list,
Andrew Lunn, Eric Dumazet, Paolo Abeni, Nicolai Buchwitz,
David S. Miller, Jakub Kicinski, Alexei Starovoitov,
Daniel Borkmann, Jesper Dangaard Brouer, John Fastabend,
Stanislav Fomichev, linux-kernel, bpf
In-Reply-To: <20260506095553.55357-1-nb@tipi-net.de>
Add XDP_REDIRECT support and implement ndo_xdp_xmit for receiving
redirected frames from other devices.
XDP_REDIRECT calls xdp_do_redirect() in the RX path with
xdp_do_flush() once per NAPI poll cycle. ndo_xdp_xmit batches frames
into ring 16 under a single spinlock acquisition.
Advertise NETDEV_XDP_ACT_REDIRECT and NETDEV_XDP_ACT_NDO_XMIT in
xdp_features.
Signed-off-by: Nicolai Buchwitz <nb@tipi-net.de>
---
.../net/ethernet/broadcom/genet/bcmgenet.c | 81 +++++++++++++++----
1 file changed, 67 insertions(+), 14 deletions(-)
diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet.c b/drivers/net/ethernet/broadcom/genet/bcmgenet.c
index f1e515526787..4e4fe785f0bf 100644
--- a/drivers/net/ethernet/broadcom/genet/bcmgenet.c
+++ b/drivers/net/ethernet/broadcom/genet/bcmgenet.c
@@ -2332,22 +2332,22 @@ static struct sk_buff *bcmgenet_xdp_build_skb(struct bcmgenet_rx_ring *ring,
return skb;
}
+/* Submit a single XDP frame to the TX ring. Caller must hold ring->lock.
+ * Returns true on success. Does not ring the doorbell - caller must
+ * write TDMA_PROD_INDEX after batching.
+ */
static bool bcmgenet_xdp_xmit_frame(struct bcmgenet_priv *priv,
+ struct bcmgenet_tx_ring *ring,
struct xdp_frame *xdpf, bool dma_map)
{
- struct bcmgenet_tx_ring *ring = &priv->xdp_tx_ring;
struct device *kdev = &priv->pdev->dev;
struct enet_cb *tx_cb_ptr;
dma_addr_t mapping;
unsigned int dma_len;
u32 len_stat;
- spin_lock(&ring->lock);
-
- if (ring->free_bds < 1) {
- spin_unlock(&ring->lock);
+ if (ring->free_bds < 1)
return false;
- }
tx_cb_ptr = bcmgenet_get_txcb(priv, ring);
@@ -2361,7 +2361,6 @@ static bool bcmgenet_xdp_xmit_frame(struct bcmgenet_priv *priv,
*/
if (unlikely(xdpf->headroom < sizeof(struct status_64))) {
bcmgenet_put_txcb(priv, ring);
- spin_unlock(&ring->lock);
return false;
}
@@ -2375,7 +2374,6 @@ static bool bcmgenet_xdp_xmit_frame(struct bcmgenet_priv *priv,
tx_cb_ptr->skb = NULL;
tx_cb_ptr->xdpf = NULL;
bcmgenet_put_txcb(priv, ring);
- spin_unlock(&ring->lock);
return false;
}
} else {
@@ -2407,12 +2405,14 @@ static bool bcmgenet_xdp_xmit_frame(struct bcmgenet_priv *priv,
ring->prod_index++;
ring->prod_index &= DMA_P_INDEX_MASK;
+ return true;
+}
+
+static void bcmgenet_xdp_ring_doorbell(struct bcmgenet_priv *priv,
+ struct bcmgenet_tx_ring *ring)
+{
bcmgenet_tdma_ring_writel(priv, ring->index, ring->prod_index,
TDMA_PROD_INDEX);
-
- spin_unlock(&ring->lock);
-
- return true;
}
static unsigned int bcmgenet_run_xdp(struct bcmgenet_rx_ring *ring,
@@ -2421,6 +2421,7 @@ static unsigned int bcmgenet_run_xdp(struct bcmgenet_rx_ring *ring,
struct page *rx_page)
{
struct bcmgenet_priv *priv = ring->priv;
+ struct bcmgenet_tx_ring *tx_ring;
struct xdp_frame *xdpf;
unsigned int act;
@@ -2455,11 +2456,25 @@ static unsigned int bcmgenet_run_xdp(struct bcmgenet_rx_ring *ring,
true);
return XDP_DROP;
}
- if (unlikely(!bcmgenet_xdp_xmit_frame(priv, xdpf, false))) {
+
+ tx_ring = &priv->xdp_tx_ring;
+ spin_lock(&tx_ring->lock);
+ if (unlikely(!bcmgenet_xdp_xmit_frame(priv, tx_ring,
+ xdpf, false))) {
+ spin_unlock(&tx_ring->lock);
xdp_return_frame_rx_napi(xdpf);
return XDP_DROP;
}
+ bcmgenet_xdp_ring_doorbell(priv, tx_ring);
+ spin_unlock(&tx_ring->lock);
return XDP_TX;
+ case XDP_REDIRECT:
+ if (unlikely(xdp_do_redirect(priv->dev, xdp, prog))) {
+ page_pool_put_full_page(ring->page_pool, rx_page,
+ true);
+ return XDP_DROP;
+ }
+ return XDP_REDIRECT;
case XDP_DROP:
page_pool_put_full_page(ring->page_pool, rx_page, true);
return XDP_DROP;
@@ -2483,6 +2498,7 @@ static unsigned int bcmgenet_desc_rx(struct bcmgenet_rx_ring *ring,
struct bcmgenet_priv *priv = ring->priv;
struct net_device *dev = priv->dev;
struct bpf_prog *xdp_prog;
+ bool xdp_flush = false;
struct enet_cb *cb;
struct sk_buff *skb;
u32 dma_length_status;
@@ -2631,6 +2647,8 @@ static unsigned int bcmgenet_desc_rx(struct bcmgenet_rx_ring *ring,
GENET_RX_HEADROOM, pkt_len, true);
xdp_act = bcmgenet_run_xdp(ring, xdp_prog, &xdp, rx_page);
+ if (xdp_act == XDP_REDIRECT)
+ xdp_flush = true;
if (xdp_act != XDP_PASS)
goto next;
@@ -2682,6 +2700,9 @@ static unsigned int bcmgenet_desc_rx(struct bcmgenet_rx_ring *ring,
bcmgenet_rdma_ring_writel(priv, ring->index, ring->c_index, RDMA_CONS_INDEX);
}
+ if (xdp_flush)
+ xdp_do_flush();
+
ring->dim.bytes = bytes_processed;
ring->dim.packets = rxpktprocessed;
@@ -4027,6 +4048,36 @@ static int bcmgenet_xdp(struct net_device *dev, struct netdev_bpf *xdp)
}
}
+static int bcmgenet_xdp_xmit(struct net_device *dev, int num_frames,
+ struct xdp_frame **frames, u32 flags)
+{
+ struct bcmgenet_priv *priv = netdev_priv(dev);
+ struct bcmgenet_tx_ring *ring = &priv->xdp_tx_ring;
+ int sent = 0;
+ int i;
+
+ if (unlikely(flags & ~XDP_XMIT_FLAGS_MASK))
+ return -EINVAL;
+
+ if (unlikely(!netif_running(dev)))
+ return -ENETDOWN;
+
+ spin_lock(&ring->lock);
+
+ for (i = 0; i < num_frames; i++) {
+ if (!bcmgenet_xdp_xmit_frame(priv, ring, frames[i], true))
+ break;
+ sent++;
+ }
+
+ if (sent)
+ bcmgenet_xdp_ring_doorbell(priv, ring);
+
+ spin_unlock(&ring->lock);
+
+ return sent;
+}
+
static const struct net_device_ops bcmgenet_netdev_ops = {
.ndo_open = bcmgenet_open,
.ndo_stop = bcmgenet_close,
@@ -4039,6 +4090,7 @@ static const struct net_device_ops bcmgenet_netdev_ops = {
.ndo_get_stats64 = bcmgenet_get_stats64,
.ndo_change_carrier = bcmgenet_change_carrier,
.ndo_bpf = bcmgenet_xdp,
+ .ndo_xdp_xmit = bcmgenet_xdp_xmit,
};
/* GENET hardware parameters/characteristics */
@@ -4341,7 +4393,8 @@ static int bcmgenet_probe(struct platform_device *pdev)
NETIF_F_RXCSUM;
dev->hw_features |= dev->features;
dev->vlan_features |= dev->features;
- dev->xdp_features = NETDEV_XDP_ACT_BASIC;
+ dev->xdp_features = NETDEV_XDP_ACT_BASIC | NETDEV_XDP_ACT_REDIRECT |
+ NETDEV_XDP_ACT_NDO_XMIT;
netdev_sw_irq_coalesce_default_on(dev);
--
2.51.0
^ permalink raw reply related
* [PATCH net-next v9 7/7] net: bcmgenet: reject MTU changes incompatible with XDP
From: Nicolai Buchwitz @ 2026-05-06 9:55 UTC (permalink / raw)
To: netdev
Cc: Justin Chen, Simon Horman, Mohsin Bashir, Doug Berger,
Florian Fainelli, Broadcom internal kernel review list,
Andrew Lunn, Eric Dumazet, Paolo Abeni, Nicolai Buchwitz,
Mohsin Bashir, David S. Miller, Jakub Kicinski,
Alexei Starovoitov, Daniel Borkmann, Jesper Dangaard Brouer,
John Fastabend, Stanislav Fomichev, linux-kernel, bpf
In-Reply-To: <20260506095553.55357-1-nb@tipi-net.de>
Add a minimal ndo_change_mtu that rejects MTU values too large for
single-page XDP buffers when an XDP program is attached. Without this,
users could change the MTU at runtime and break the XDP buffer layout.
When no XDP program is attached, any MTU change is accepted, matching
the existing behavior without ndo_change_mtu.
Signed-off-by: Nicolai Buchwitz <nb@tipi-net.de>
Reviewed-by: Florian Fainelli <florian.fainelli@broadcom.com>
Reviewed-by: Mohsin Bashir <hmohsin@meta.com>
---
drivers/net/ethernet/broadcom/genet/bcmgenet.c | 15 +++++++++++++++
1 file changed, 15 insertions(+)
diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet.c b/drivers/net/ethernet/broadcom/genet/bcmgenet.c
index 359a297a25e6..6ef76e93d9ad 100644
--- a/drivers/net/ethernet/broadcom/genet/bcmgenet.c
+++ b/drivers/net/ethernet/broadcom/genet/bcmgenet.c
@@ -4095,6 +4095,20 @@ static int bcmgenet_xdp_xmit(struct net_device *dev, int num_frames,
return sent;
}
+static int bcmgenet_change_mtu(struct net_device *dev, int new_mtu)
+{
+ struct bcmgenet_priv *priv = netdev_priv(dev);
+
+ if (priv->xdp_prog && new_mtu > PAGE_SIZE - GENET_RX_HEADROOM -
+ SKB_DATA_ALIGN(sizeof(struct skb_shared_info))) {
+ netdev_warn(dev, "MTU too large for single-page XDP buffer\n");
+ return -EINVAL;
+ }
+
+ WRITE_ONCE(dev->mtu, new_mtu);
+ return 0;
+}
+
static const struct net_device_ops bcmgenet_netdev_ops = {
.ndo_open = bcmgenet_open,
.ndo_stop = bcmgenet_close,
@@ -4105,6 +4119,7 @@ static const struct net_device_ops bcmgenet_netdev_ops = {
.ndo_eth_ioctl = phy_do_ioctl_running,
.ndo_set_features = bcmgenet_set_features,
.ndo_get_stats64 = bcmgenet_get_stats64,
+ .ndo_change_mtu = bcmgenet_change_mtu,
.ndo_change_carrier = bcmgenet_change_carrier,
.ndo_bpf = bcmgenet_xdp,
.ndo_xdp_xmit = bcmgenet_xdp_xmit,
--
2.51.0
^ permalink raw reply related
* [PATCH] macsec: defer RX SA cleanup from RCU callback to workqueue
From: alexjlzheng @ 2026-05-06 10:01 UTC (permalink / raw)
To: sd, andrew+netdev, davem, edumazet, kuba, pabeni, horms,
shenyangyang4
Cc: netdev, linux-kernel, alexjlzheng
From: Jinliang Zheng <alexjlzheng@tencent.com>
crypto_free_aead() can call vunmap() internally (e.g. via
dma_free_attrs() in hardware crypto drivers like hisi_sec2), which
must not be called from softirq context.
free_rxsa() is an RCU callback and therefore runs in softirq context,
causing a kernel crash when the underlying AEAD implementation
performs DMA unmapping during tfm destruction:
vunmap+0x4c/0x70
__iommu_dma_free+0xd0/0x138
dma_free_attrs+0xf4/0x100
sec_aead_exit+0x64/0xb8 [hisi_sec2]
crypto_destroy_tfm+0x98/0x110
free_rxsa+0x28/0x50 [macsec]
rcu_do_batch+0x184/0x460
rcu_core+0xf4/0x1f8
handle_softirqs+0x118/0x330
Fix this by splitting free_rxsa() into two parts: the RCU callback
now only schedules a work item, and the actual resource release
(crypto_free_aead, free_percpu, kfree) is done in a workqueue
handler running in process context.
Add a destroy_work field to struct macsec_rx_sa and initialize it
in init_rx_sa().
Signed-off-by: Jinliang Zheng <alexjlzheng@tencent.com>
---
drivers/net/macsec.c | 13 +++++++++++--
include/net/macsec.h | 2 ++
2 files changed, 13 insertions(+), 2 deletions(-)
diff --git a/drivers/net/macsec.c b/drivers/net/macsec.c
index f6cad0746a02..dabd3d2598ae 100644
--- a/drivers/net/macsec.c
+++ b/drivers/net/macsec.c
@@ -174,15 +174,23 @@ static void macsec_rxsc_put(struct macsec_rx_sc *sc)
call_rcu(&sc->rcu_head, free_rx_sc_rcu);
}
-static void free_rxsa(struct rcu_head *head)
+static void free_rxsa_work(struct work_struct *work)
{
- struct macsec_rx_sa *sa = container_of(head, struct macsec_rx_sa, rcu);
+ struct macsec_rx_sa *sa = container_of(work, struct macsec_rx_sa,
+ destroy_work);
crypto_free_aead(sa->key.tfm);
free_percpu(sa->stats);
kfree(sa);
}
+static void free_rxsa(struct rcu_head *head)
+{
+ struct macsec_rx_sa *sa = container_of(head, struct macsec_rx_sa, rcu);
+
+ schedule_work(&sa->destroy_work);
+}
+
static void macsec_rxsa_put(struct macsec_rx_sa *sa)
{
if (refcount_dec_and_test(&sa->refcnt))
@@ -1407,6 +1415,7 @@ static int init_rx_sa(struct macsec_rx_sa *rx_sa, char *sak, int key_len,
rx_sa->next_pn = 1;
refcount_set(&rx_sa->refcnt, 1);
spin_lock_init(&rx_sa->lock);
+ INIT_WORK(&rx_sa->destroy_work, free_rxsa_work);
return 0;
}
diff --git a/include/net/macsec.h b/include/net/macsec.h
index bc7de5b53e54..aeacd361f686 100644
--- a/include/net/macsec.h
+++ b/include/net/macsec.h
@@ -9,6 +9,7 @@
#include <linux/u64_stats_sync.h>
#include <linux/if_vlan.h>
+#include <linux/workqueue.h>
#include <uapi/linux/if_link.h>
#include <uapi/linux/if_macsec.h>
@@ -137,6 +138,7 @@ struct macsec_rx_sa {
struct macsec_rx_sa_stats __percpu *stats;
struct macsec_rx_sc *sc;
struct rcu_head rcu;
+ struct work_struct destroy_work;
};
struct pcpu_rx_sc_stats {
--
2.39.3
^ permalink raw reply related
* Re: [PATCH net-next 5/5] ionic: Add .get_fec_stats ethtool handler
From: Vadim Fedorenko @ 2026-05-06 10:00 UTC (permalink / raw)
To: Eric Joyner, netdev
Cc: Brett Creeley, Andrew Lunn, David S. Miller, Eric Dumazet,
Jakub Kicinski, Paolo Abeni
In-Reply-To: <e5d3c314-e6b0-4f18-b17e-752fea4dcb16@amd.com>
On 05/05/2026 20:43, Eric Joyner wrote:
> On 5/5/2026 6:54 AM, Vadim Fedorenko wrote:
>> [You don't often get email from vadim.fedorenko@linux.dev. Learn why this is
>> important at https://aka.ms/LearnAboutSenderIdentification ]
>>
>> Caution: This message originated from an External Source. Use proper caution
>> when opening attachments, clicking links, or responding.
>>
>>
>> On 01/05/2026 04:15, Eric Joyner wrote:
>>> Several FEC error statistics being collected can be reported in a
>>> dedicated ethtool callback for FEC errors, so implement the handler that
>>> does so. This includes 802.3ck FEC histogram data that some newer
>>> hardware collects.
>>>
>>> Assisted-by: Claude:claude-4.6-sonnet
>>> Signed-off-by: Eric Joyner <eric.joyner@amd.com>
>>> ---
>>> .../ethernet/pensando/ionic/ionic_ethtool.c | 51 +++++++++++++++++++
>>> 1 file changed, 51 insertions(+)
>>>
>>> diff --git a/drivers/net/ethernet/pensando/ionic/ionic_ethtool.c b/drivers/
>>> net/ethernet/pensando/ionic/ionic_ethtool.c
>>> index 78a802eb159f..fe1f753b6115 100644
>>> --- a/drivers/net/ethernet/pensando/ionic/ionic_ethtool.c
>>> +++ b/drivers/net/ethernet/pensando/ionic/ionic_ethtool.c
>>> @@ -418,6 +418,56 @@ static int ionic_get_fecparam(struct net_device *netdev,
>>> return 0;
>>> }
>>>
>>> +static const struct ethtool_fec_hist_range ionic_fec_ranges[] = {
>>> + { 0, 0},
>>> + { 1, 1},
>>> + { 2, 2},
>>> + { 3, 3},
>>> + { 4, 4},
>>> + { 5, 5},
>>> + { 6, 6},
>>> + { 7, 7},
>>> + { 8, 8},
>>> + { 9, 9},
>>> + { 10, 10},
>>> + { 11, 11},
>>> + { 12, 12},
>>> + { 13, 13},
>>> + { 14, 14},
>>> + { 15, 15},
>>> + { 0, 0},
>>> +};
>>> +
>>> +static void
>>> +ionic_fill_fec_hist(const struct ionic_port_extra_stats *extra_stats,
>>> + struct ethtool_fec_hist *hist)
>>> +{
>>> + int i;
>>> +
>>> + hist->ranges = ionic_fec_ranges;
>>> + for (i = 0; i < ETHTOOL_FEC_HIST_MAX - 1; i++)
>>> + hist->values[i].sum = extra_stats->fec_codeword_error_bin[i];
>>> +}
>>
>> ETHTOOL_FEC_HIST_MAX = 17, you defined 16 bins, but iterating over 15 of
>> them. Looks like bin {15, 15} will be lost in stats.
>>
>>
>
> This looks correct to me -- (ETHTOOL_FEC_HIST_MAX - 1) = 16, so starting with
> i=0, it'll iterate through the 16 bins and ignore the 17th end marker bin. Bin
> 15 does get included and gets its sum set.
Ahh, yeah, you're right, smth was wrong with my math yesterday. btw, how
does it work for different FEC? RS(528, 514) will not give you 16 bins..
^ permalink raw reply
* [PATCH net] net: team: fix NULL pointer dereference in team_xmit during mode change
From: Weiming Shi @ 2026-05-06 10:11 UTC (permalink / raw)
To: Jiri Pirko, Andrew Lunn, David S . Miller, Eric Dumazet,
Jakub Kicinski, Paolo Abeni
Cc: netdev, Xiang Mei, Weiming Shi
__team_change_mode() clears team->ops with memset() before restoring
safe dummy handlers via team_adjust_ops(). A concurrent team_xmit()
running under RCU on another CPU can read team->ops.transmit during
this window and call a NULL function pointer, crashing the kernel.
The race requires CAP_NET_ADMIN (in init_user_ns) to trigger via
TEAM_CMD_OPTIONS_SET, plus AF_PACKET sendto() on a team device with
forced carrier and no ports.
BUG: kernel NULL pointer dereference, address: 0000000000000000
Oops: 0010 [#1] SMP KASAN NOPTI
RIP: 0010:0x0
Call Trace:
team_xmit (drivers/net/team/team_core.c:1853)
dev_hard_start_xmit (net/core/dev.c:3904)
__dev_queue_xmit (net/core/dev.c:4871)
packet_sendmsg (net/packet/af_packet.c:3109)
__sys_sendto (net/socket.c:2265)
Fix this by reading team->ops.transmit with READ_ONCE() into a local
variable and falling back to team_dummy_transmit if NULL. This matches
what team_adjust_ops() would have installed moments later.
Fixes: 3d249d4ca7d0 ("net: introduce ethernet teaming device")
Reported-by: Xiang Mei <xmei5@asu.edu>
Signed-off-by: Weiming Shi <bestswngs@gmail.com>
---
drivers/net/team/team_core.c | 10 ++++++++--
1 file changed, 8 insertions(+), 2 deletions(-)
diff --git a/drivers/net/team/team_core.c b/drivers/net/team/team_core.c
index 0c87f9972457..3ff08b5deccd 100644
--- a/drivers/net/team/team_core.c
+++ b/drivers/net/team/team_core.c
@@ -1844,8 +1844,14 @@ static netdev_tx_t team_xmit(struct sk_buff *skb, struct net_device *dev)
unsigned int len = skb->len;
tx_success = team_queue_override_transmit(team, skb);
- if (!tx_success)
- tx_success = team->ops.transmit(team, skb);
+ if (!tx_success) {
+ bool (*transmit)(struct team *team, struct sk_buff *skb);
+
+ transmit = READ_ONCE(team->ops.transmit);
+ if (unlikely(!transmit))
+ transmit = team_dummy_transmit;
+ tx_success = transmit(team, skb);
+ }
if (tx_success) {
struct team_pcpu_stats *pcpu_stats;
--
2.43.0
^ permalink raw reply related
* Re: [PATCH v1 net] ipmr: Call ipmr_fib_lookup() under RCU.
From: Eric Dumazet @ 2026-05-06 10:13 UTC (permalink / raw)
To: Kuniyuki Iwashima
Cc: David Ahern, Ido Schimmel, David S. Miller, Jakub Kicinski,
Paolo Abeni, Simon Horman, Kuniyuki Iwashima, netdev, syzkaller,
Yi Lai
In-Reply-To: <20260506065955.1695753-1-kuniyu@google.com>
On Tue, May 5, 2026 at 11:59 PM Kuniyuki Iwashima <kuniyu@google.com> wrote:
>
> Yi Lai reported RCU splat in reg_vif_xmit() below. [0]
>
> When CONFIG_IP_MROUTE_MULTIPLE_TABLES=n, ipmr_fib_lookup()
> uses rcu_dereference() without explicit rcu_read_lock().
>
> Although rcu_read_lock_bh() is already held by the caller
> __dev_queue_xmit(), lockdep requires explicit rcu_read_lock()
> for rcu_dereference().
>
> Let's move up rcu_read_lock() in reg_vif_xmit() to
> cover ipmr_fib_lookup().
>
>
>
> Fixes: b3b6babf4751 ("ipmr: Free mr_table after RCU grace period.")
> Reported-by: syzkaller <syzkaller@googlegroups.com>
> Reported-by: Yi Lai <yi1.lai@intel.com>
> Closes: https://lore.kernel.org/netdev/afrY34dLXNUboevf@ly-workstation/
> Signed-off-by: Kuniyuki Iwashima <kuniyu@google.com>
Reviewed-by: Eric Dumazet <edumazet@google.com>
^ permalink raw reply
* Re: [PATCH v1 net] tcp: Fix dst leak in tcp_v6_connect().
From: Eric Dumazet @ 2026-05-06 10:16 UTC (permalink / raw)
To: Kuniyuki Iwashima
Cc: Neal Cardwell, David S. Miller, Jakub Kicinski, Paolo Abeni,
Simon Horman, Kuniyuki Iwashima, netdev, Damiano Melotti
In-Reply-To: <20260506070443.1699879-1-kuniyu@google.com>
On Wed, May 6, 2026 at 12:04 AM Kuniyuki Iwashima <kuniyu@google.com> wrote:
>
> If a socket is bound to a wildcard address, tcp_v[46]_connect()
> updates it with a non-wildcard address based on the route lookup.
>
> After bhash2 was introduced in the cited commit, we must call
> inet_bhash2_update_saddr() to update the bhash2 entry as well.
>
> If inet_bhash2_update_saddr() fails, we must release the refcount
> for dst by ip_route_connect() or ip6_dst_lookup_flow().
>
> While tcp_v4_connect() calls ip_rt_put() in the error path,
> tcp_v6_connect() does not call dst_release().
>
> Let's call dst_release() when inet_bhash2_update_saddr() fails
> in tcp_v6_connect().
>
> Fixes: 28044fc1d495 ("net: Add a bhash2 table hashed by port and address")
> Reported-by: Damiano Melotti <melotti@google.com>
> Signed-off-by: Kuniyuki Iwashima <kuniyu@google.com>
Reviewed-by: Eric Dumazet <edumazet@google.com>
^ permalink raw reply
* Re: [PATCH net-next v3 1/2] tcp: rehash onto different local ECMP path on retransmit timeout
From: Eric Dumazet @ 2026-05-06 10:26 UTC (permalink / raw)
To: Neil Spring; +Cc: netdev, davem, kuba, pabeni, ncardwell, dsahern
In-Reply-To: <20260505193824.2791642-2-ntspring@meta.com>
On Tue, May 5, 2026 at 12:38 PM Neil Spring <ntspring@meta.com> wrote:
>
> Currently sk_rethink_txhash() re-rolls the socket's txhash on RTO, PLB,
> and spurious-retransmission events, but the cached route is reused and
> the new hash is not propagated into the ECMP path selection logic. Two
> changes are needed to make rehash select a different local ECMP path:
>
> 1. Add sk_dst_reset() alongside sk_rethink_txhash() in
> tcp_write_timeout(), tcp_rcv_spurious_retrans(), and
> tcp_plb_check_rehash() so the cached dst is invalidated and the
> next transmit triggers a fresh route lookup.
>
> 2. Set fl6->mp_hash from sk_txhash (or tcp_rsk(req)->txhash for
> SYN/ACK retransmits) in inet6_sk_rebuild_header(),
> inet6_csk_route_req(), and inet6_csk_route_socket() so
> fib6_select_path() picks a path based on the new hash.
>
> It is necessary to update mp_hash explicitly because the
> default ECMP hash derives from fl6->flowlabel via
> np->flow_label, which is not updated from sk_txhash
> (REPFLOW is off by default). ip6_make_flowlabel() cannot
> help either, as it runs after the route lookup.
>
> Signed-off-by: Neil Spring <ntspring@meta.com>
> ---
> net/ipv4/tcp_input.c | 4 +++-
> net/ipv4/tcp_plb.c | 1 +
> net/ipv4/tcp_timer.c | 1 +
> net/ipv6/af_inet6.c | 3 +++
> net/ipv6/inet6_connection_sock.c | 6 ++++++
> 5 files changed, 14 insertions(+), 1 deletion(-)
>
> diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
> index 7995a89bafc9..126dffd675c9 100644
> --- a/net/ipv4/tcp_input.c
> +++ b/net/ipv4/tcp_input.c
> @@ -5020,8 +5020,10 @@ static void tcp_rcv_spurious_retrans(struct sock *sk,
> skb->protocol == htons(ETH_P_IPV6) &&
> (tcp_sk(sk)->inet_conn.icsk_ack.lrcv_flowlabel !=
> ntohl(ip6_flowlabel(ipv6_hdr(skb)))) &&
> - sk_rethink_txhash(sk))
> + sk_rethink_txhash(sk)) {
> NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPDUPLICATEDATAREHASH);
> + sk_dst_reset(sk);
I think it would be nice to use __sk_dst_reset() in TCP, to reflect the fact
that we own the socket lock.
^ permalink raw reply
* Re: [PATCH net-next v6 2/2] net: sfp: extend SMBus support
From: Maxime Chevallier @ 2026-05-06 10:27 UTC (permalink / raw)
To: Jonas Jelonek, Russell King, Andrew Lunn, Heiner Kallweit,
David S . Miller, Eric Dumazet, Jakub Kicinski, Paolo Abeni
Cc: netdev, linux-kernel, Bjørn Mork
In-Reply-To: <20260505200647.1125311-3-jelonek.jonas@gmail.com>
Hi Jonas,
On 05/05/2026 22:06, Jonas Jelonek wrote:
> Commit 7662abf4db94 ("net: phy: sfp: Add support for SMBus module access")
> added SMBus access for SFP modules, but limited it to single-byte
> transfers. As a side effect, hwmon is disabled (16-bit reads cannot be
> guaranteed atomic) and a warning is printed.
>
> Many SMBus-only I2C controllers in the wild support more than just
> byte access, and SFP cages are often wired to such controllers
> rather than to a full-featured I2C controller -- e.g. the SMBus
> controllers in the Realtek longan and mango SoCs, which advertise
> word access and I2C block reads. Today, they cannot drive an SFP at
> all without falling back to the byte-only path.
>
> Extend sfp_smbus_read()/sfp_smbus_write() so that, in addition to
> the existing byte access, they also use SMBus word access and SMBus
> I2C block access whenever the adapter advertises them. Both
> directions are handled in a single read and a single write helper
> that pick the largest supported transfer per chunk and fall back as
> needed.
>
> I2C-block is preferred unconditionally when available: the protocol
> carries any length 1..32, so it can serve every chunk -- including
> the 1- and 2-byte tails -- without help from word or byte access.
> Note that this requires I2C_FUNC_SMBUS_I2C_BLOCK, which reads a
> caller-specified number of bytes. This deviates from the official
> SMBus Block Read (length is supplied by the slave) but is widely
> supported by Linux I2C controllers/drivers.
>
> Capability matrix this implementation supports:
>
> - BYTE only: works (unchanged behaviour); 1-byte
> xfers, hwmon disabled.
> - BYTE + WORD: word for >=2-byte chunks, byte for
> trailing odd byte.
> - I2C_BLOCK present (with or
> without BYTE/WORD): block as the universal transport for
> every chunk.
> - WORD only (no BYTE/BLOCK): accepted with WARN_ONCE. Even-length
> transfers work; odd-length transfers
> (e.g. the 3-byte cotsworks fixup
> write) hit the BYTE branch which the
> adapter does not implement, so the
> xfer returns an error and the
> operation is aborted. No mainline
> I2C driver was found to advertise
> WORD without BYTE; the warning lets
> us learn about it if it ever shows
> up.
>
> Adapters with asymmetric R/W capabilities (e.g. only READ_I2C_BLOCK
> but not WRITE_I2C_BLOCK) remain functionally correct -- the
> per-iteration fallback uses the direction-specific bits -- but the
> shared i2c_max_block_size is sized by the all-bits-set check, so a
> transfer in the better-supported direction is not upgraded. None of
> the mainline I2C bus drivers surveyed during review advertise such
> asymmetry; promoting i2c_max_block_size to per-direction sizes can
> be revisited if needed.
>
> Signed-off-by: Jonas Jelonek <jelonek.jonas@gmail.com>
This looks great, I've given this some testing and so far it works well :) I do have some comment though :
> ---
> drivers/net/phy/sfp.c | 134 +++++++++++++++++++++++++++++++++---------
> 1 file changed, 107 insertions(+), 27 deletions(-)
>
> diff --git a/drivers/net/phy/sfp.c b/drivers/net/phy/sfp.c
> index e58e29a1e8d2..8ad650cbe862 100644
> --- a/drivers/net/phy/sfp.c
> +++ b/drivers/net/phy/sfp.c
> @@ -14,6 +14,7 @@
> #include <linux/platform_device.h>
> #include <linux/rtnetlink.h>
> #include <linux/slab.h>
> +#include <linux/unaligned.h>
> #include <linux/workqueue.h>
>
> #include "sfp.h"
> @@ -756,50 +757,110 @@ static int sfp_i2c_write(struct sfp *sfp, bool a2, u8 dev_addr, void *buf,
> return ret == ARRAY_SIZE(msgs) ? len : 0;
> }
>
> -static int sfp_smbus_byte_read(struct sfp *sfp, bool a2, u8 dev_addr,
> - void *buf, size_t len)
> +static int sfp_smbus_read(struct sfp *sfp, bool a2, u8 dev_addr, void *buf,
> + size_t len)
> {
> union i2c_smbus_data smbus_data;
> u8 bus_addr = a2 ? 0x51 : 0x50;
> + size_t this_len, transferred;
> + u32 functionality;
> u8 *data = buf;
> int ret;
>
> - while (len) {
> - ret = i2c_smbus_xfer(sfp->i2c, bus_addr, 0,
> - I2C_SMBUS_READ, dev_addr,
> - I2C_SMBUS_BYTE_DATA, &smbus_data);
> - if (ret < 0)
> - return ret;
> + functionality = i2c_get_functionality(sfp->i2c);
>
> - *data = smbus_data.byte;
> + while (len) {
> + this_len = min(len, sfp->i2c_max_block_size);
You should be using sfp->i2c_block_size here and not i2c_max_block_size
to account for the modules that require a specific access size.
> +
> + if (functionality & I2C_FUNC_SMBUS_READ_I2C_BLOCK) {
> + smbus_data.block[0] = this_len;
> + ret = i2c_smbus_xfer(sfp->i2c, bus_addr, 0,
> + I2C_SMBUS_READ, dev_addr,
> + I2C_SMBUS_I2C_BLOCK_DATA, &smbus_data);
> + if (ret < 0)
> + return ret;
> +
> + memcpy(data, &smbus_data.block[1], this_len);
> + transferred = this_len;
> + } else if (this_len >= 2 &&
> + (functionality & I2C_FUNC_SMBUS_READ_WORD_DATA)) {
> + ret = i2c_smbus_xfer(sfp->i2c, bus_addr, 0,
> + I2C_SMBUS_READ, dev_addr,
> + I2C_SMBUS_WORD_DATA, &smbus_data);
> + if (ret < 0)
> + return ret;
> +
> + put_unaligned_le16(smbus_data.word, data);
> + transferred = 2;
> + } else {
> + ret = i2c_smbus_xfer(sfp->i2c, bus_addr, 0,
> + I2C_SMBUS_READ, dev_addr,
> + I2C_SMBUS_BYTE_DATA, &smbus_data);
> + if (ret < 0)
> + return ret;
> +
> + *data = smbus_data.byte;
> + transferred = 1;
> + }
>
> - len--;
> - data++;
> - dev_addr++;
> + data += transferred;
> + len -= transferred;
> + dev_addr += transferred;
> }
>
> return data - (u8 *)buf;
> }
>
> -static int sfp_smbus_byte_write(struct sfp *sfp, bool a2, u8 dev_addr,
> - void *buf, size_t len)
> +static int sfp_smbus_write(struct sfp *sfp, bool a2, u8 dev_addr, void *buf,
> + size_t len)
> {
> union i2c_smbus_data smbus_data;
> u8 bus_addr = a2 ? 0x51 : 0x50;
> + size_t this_len, transferred;
> + u32 functionality;
> u8 *data = buf;
> int ret;
>
> + functionality = i2c_get_functionality(sfp->i2c);
> +
> while (len) {
> - smbus_data.byte = *data;
> - ret = i2c_smbus_xfer(sfp->i2c, bus_addr, 0,
> - I2C_SMBUS_WRITE, dev_addr,
> - I2C_SMBUS_BYTE_DATA, &smbus_data);
> - if (ret)
> - return ret;
> + this_len = min(len, sfp->i2c_max_block_size);
> +
> + if (functionality & I2C_FUNC_SMBUS_WRITE_I2C_BLOCK) {
> + smbus_data.block[0] = this_len;
> + memcpy(&smbus_data.block[1], data, this_len);
> +
> + ret = i2c_smbus_xfer(sfp->i2c, bus_addr, 0,
> + I2C_SMBUS_WRITE, dev_addr,
> + I2C_SMBUS_I2C_BLOCK_DATA, &smbus_data);
> + if (ret < 0)
> + return ret;
> +
> + transferred = this_len;
> + } else if (this_len >= 2 &&
> + (functionality & I2C_FUNC_SMBUS_WRITE_WORD_DATA)) {
> + smbus_data.word = get_unaligned_le16(data);
> + ret = i2c_smbus_xfer(sfp->i2c, bus_addr, 0,
> + I2C_SMBUS_WRITE, dev_addr,
> + I2C_SMBUS_WORD_DATA, &smbus_data);
> + if (ret < 0)
> + return ret;
> +
> + transferred = 2;
> + } else {
> + smbus_data.byte = *data;
> + ret = i2c_smbus_xfer(sfp->i2c, bus_addr, 0,
> + I2C_SMBUS_WRITE, dev_addr,
> + I2C_SMBUS_BYTE_DATA, &smbus_data);
> + if (ret < 0)
> + return ret;
> +
> + transferred = 1;
> + }
>
> - len--;
> - data++;
> - dev_addr++;
> + data += transferred;
> + len -= transferred;
> + dev_addr += transferred;
> }
>
> return data - (u8 *)buf;
> @@ -815,10 +876,29 @@ static int sfp_i2c_configure(struct sfp *sfp, struct i2c_adapter *i2c)
> sfp->read = sfp_i2c_read;
> sfp->write = sfp_i2c_write;
> max_block_size = SFP_EEPROM_BLOCK_SIZE;
> - } else if (i2c_check_functionality(i2c, I2C_FUNC_SMBUS_BYTE_DATA)) {
> - sfp->read = sfp_smbus_byte_read;
> - sfp->write = sfp_smbus_byte_write;
> - max_block_size = 1;
> + } else if (i2c_check_functionality(i2c, I2C_FUNC_SMBUS_BYTE_DATA) ||
> + i2c_check_functionality(i2c, I2C_FUNC_SMBUS_I2C_BLOCK)) {
> + /* I2C-block carries any length 1..32, byte serves the
> + * 1-byte tail when block is absent: either alone is a
> + * complete transport.
> + */
> + sfp->read = sfp_smbus_read;
> + sfp->write = sfp_smbus_write;
> +
> + if (i2c_check_functionality(i2c, I2C_FUNC_SMBUS_I2C_BLOCK))
> + max_block_size = SFP_EEPROM_BLOCK_SIZE;
> + else if (i2c_check_functionality(i2c, I2C_FUNC_SMBUS_WORD_DATA))
> + max_block_size = 2;
> + else
> + max_block_size = 1;
> + } else if (i2c_check_functionality(i2c, I2C_FUNC_SMBUS_WORD_DATA)) {
> + /* Word-only: even-length xfers work; odd-length xfers
> + * will error out at i2c_smbus_xfer().
> + */
> + WARN_ONCE(1, "sfp: SMBus word-only adapter; odd-length transfers will fail\n");
I think this WARN_ONCE can be moved directly into the
"else if ()" check
> + sfp->read = sfp_smbus_read;
> + sfp->write = sfp_smbus_write;
> + max_block_size = 2;
> } else {
> sfp->i2c = NULL;
> return -EINVAL;
The rest looks good to me :)
Thanks !
Maxime
^ permalink raw reply
* Re: [PATCH v13 5/6] tls: add hardware offload key update support
From: Sabrina Dubroca @ 2026-05-06 10:37 UTC (permalink / raw)
To: Paolo Abeni
Cc: Rishikesh Jethwani, netdev, saeedm, tariqt, mbloch, borisp,
john.fastabend, kuba, davem, edumazet, leon
In-Reply-To: <64d71f18-f86b-4fd7-a6bd-02243eed0492@redhat.com>
2026-05-05, 10:40:41 +0200, Paolo Abeni wrote:
> On 4/29/26 8:10 PM, Rishikesh Jethwani wrote:
> > On TX, the NIC key cannot be replaced while HW-offloaded records
> > are still unacked. tls_dev_start_rekey() installs a temporary SW
> > context with the new key and redirects sendmsg through
> > tls_sw_sendmsg_locked. If no records are pending,
> > tls_dev_complete_rekey() runs inline during setsockopt; otherwise
> > clean_acked sets REKEY_READY once all old-key records are ACKed
> > and the next sendmsg completes the rekey, flushing SW records and
> > reinstalling HW offload at the current write_seq. A KeyUpdate
> > arriving while one is pending re-keys the SW AEAD in place; if the
> > HW reinstall fails the socket stays in SW mode (REKEY_FAILED).
> >
> > On RX, the NIC may have already decrypted in-flight records with
> > the old key before the peer's KeyUpdate is parsed, so the old
> > AEAD, IV and rec_seq are retained on tls_offload_context_rx.
> > tls_check_pending_rekey() invokes tls_device_rx_del_key() to drop
> > the NIC key; otherwise post-KeyUpdate records (carrying new-key
> > wire encryption) would be XOR'd with the retired key.
> > tls_device_decrypted() classifies records by old_nic_boundary:
> >
> > - after the boundary: new-key record; drop the old key.
> > - before, fully encrypted: advance old_rec_seq, let SW AEAD decrypt.
> > - before, (partially) decrypted: reencrypt with the old key so SW
> > AEAD can decrypt with the new key.
> >
> > For mixed records skb->decrypted flags can be wrong (NIC clears
> > them on auth failure); on -EBADMSG, tls_rx_rekey_retry() toggles
> > those flags, decrements old_rec_seq to reuse the nonce, and
> > retries once (gated by old_key_reencrypted).
> >
> > The new key's tls_dev_add is deferred until the old key is fully
> > consumed: tls_set_device_offload_rx() sets dev_add_pending while
> > old_aead_recv is retained, and tls_device_deferred_dev_add()
> > installs the new key once copied_seq crosses old_nic_boundary.
> >
> > Tested on Mellanox ConnectX-6 Dx (Crypto Enabled) with multiple
> > TLS 1.3 TX and RX KeyUpdate cycles.
> >
> > Signed-off-by: Rishikesh Jethwani <rjethwani@purestorage.com>
> > ---
> > include/net/tls.h | 84 +++-
> > include/uapi/linux/snmp.h | 2 +
> > net/tls/tls.h | 29 +-
> > net/tls/tls_device.c | 753 +++++++++++++++++++++++++++++++---
> > net/tls/tls_device_fallback.c | 24 ++
> > net/tls/tls_main.c | 92 +++--
> > net/tls/tls_proc.c | 2 +
> > net/tls/tls_sw.c | 76 +++-
> > net/tls/trace.h | 79 ++++
>
> This patch is really big and complex and you should break it to help
> reviewers.
>
> At very least you can split out the tracing bits and the trivial
> refactor moving around declaration and definitions to separate patches.
True, that would make review of the refactorings easier, and then the
changes for rekey would be more obvious.
Things like tls_sw_ctx_tx_init, adding/using tls_tx_cipher_ctx and
others helpers, function renames, splitting of
tls_set_device_offload*, maybe others.
Based on this, I won't review the whole rekey code this time around (I
think it will be much easier to follow the logic after the split), but
I have some comments that I'll send out today.
--
Sabrina
^ permalink raw reply
* [PATCH net] net: atlantic: preserve PCI wake-from-D3 on shutdown when WOL enabled
From: Rex Bytes @ 2026-05-06 10:42 UTC (permalink / raw)
To: Igor Russkikh, David S . Miller, Eric Dumazet, Jakub Kicinski,
Paolo Abeni
Cc: netdev, linux-kernel, stable
The shutdown handler aq_pci_shutdown() unconditionally calls
pci_wake_from_d3(pdev, false), clearing the PCI PME_En bit even when
wake-on-LAN has been configured. While aq_nic_shutdown() correctly
programs the NIC firmware via aq_nic_set_power() to listen for magic
packets, the PCI subsystem will not propagate the resulting PME wake
event from D3, so the system never wakes after poweroff.
WOL from suspend (S3) is unaffected because aq_suspend_common() does
not touch pci_wake_from_d3() and relies on the PM core's wake
configuration via device_may_wakeup().
This affects all atlantic-supported NICs (AQC107/108/111/112/113);
users have reported that WOL works if the atlantic driver is never
loaded, but breaks once it has run its shutdown path.
Pass the configured WOL state to pci_wake_from_d3() instead of a
literal false, so the PCI PME_En bit is preserved when the user has
armed WOL via ethtool.
Fixes: 90869ddfefeb ("net: aquantia: Implement pci shutdown callback")
Cc: stable@vger.kernel.org
Signed-off-by: Rex Bytes <goodboy@rexbytes.com>
---
drivers/net/ethernet/aquantia/atlantic/aq_pci_func.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_pci_func.c b/drivers/net/ethernet/aquantia/atlantic/aq_pci_func.c
index baa5f8cc31f2..775cbbc1aa42 100644
--- a/drivers/net/ethernet/aquantia/atlantic/aq_pci_func.c
+++ b/drivers/net/ethernet/aquantia/atlantic/aq_pci_func.c
@@ -374,7 +374,7 @@ static void aq_pci_shutdown(struct pci_dev *pdev)
pci_disable_device(pdev);
if (system_state == SYSTEM_POWER_OFF) {
- pci_wake_from_d3(pdev, false);
+ pci_wake_from_d3(pdev, self->aq_hw->aq_nic_cfg->wol);
pci_set_power_state(pdev, PCI_D3hot);
}
}
--
2.43.0
^ permalink raw reply related
* [GIT PULL] wireless-2026-05-06
From: Johannes Berg @ 2026-05-06 11:01 UTC (permalink / raw)
To: netdev; +Cc: linux-wireless
Hi,
More content given last week's kunit failure, but I've fixed
that now, sorry about that. Things are all over really.
Please pull and let us know if there's any problem.
Thanks,
johannes
The following changes since commit 254f49634ee16a731174d2ae34bc50bd5f45e731:
Linux 7.1-rc1 (2026-04-26 14:19:00 -0700)
are available in the Git repository at:
https://git.kernel.org/pub/scm/linux/kernel/git/wireless/wireless.git tags/wireless-2026-05-06
for you to fetch changes up to 79240f3f6d766b342b57c32397d643e1cfa26b81:
wifi: nl80211: re-check wiphy netns in nl80211_prepare_wdev_dump() continuation (2026-05-06 11:08:41 +0200)
----------------------------------------------------------------
Quite a number of fixes now:
- mac80211
- remove HT NSS validation to work with broken APs
(with a kunit fix now)
- remove 'static' that could cause races
- check station link lookup before further processing
- fix use-after-free due to delete in list iteration
- remove AP station on assoc failures to fix crashes
- ath12k
- fix OF node refcount imbalance
- fix queue flush ("REO update") in MLO
- fix RCU assert
- ath12k:
- fix Kconfig with POWER_SEQUENCING
- fix WMI buffer leaks on error conditions
- don't use uninitialized stack data when processing RSSI events
- fix logic for determining the peer ID in the RX path
- ath5k: fix a potential stack buffer overwrite
- rsi: fix thread lifetime race
- brcmfmac: fix potential UAF
- nl80211:
- stricter permissions/checks for PMK and netns
- fix netlink policy vs. code type confusion
- cw1200: revert a broken locking change
- various fixes to not trust values from firmware
----------------------------------------------------------------
Aaradhana Sahu (1):
wifi: ath12k: fix OF node refcount imbalance in WSI graph traversal
Amir Mohammad Jahangirzad (1):
wifi: libertas: fix integer underflow in process_cmdrequest()
Baochen Qiang (2):
wifi: ath12k: prepare REO update element only for primary link
wifi: ath12k: fix peer_id usage in normal RX path
Bart Van Assche (1):
wifi: cw1200: Revert "Fix locking in error paths"
Benjamin Berg (1):
wifi: mac80211: use safe list iteration in radar detect work
Catherine (1):
wifi: mac80211: drop stray 'static' from fast-RX rx_result
Dmitry Baryshkov (1):
wifi: ath10k: snoc: select POWER_SEQUENCING
Jakov Novak (1):
wifi: libertas: notify firmware load wait on disconnect
Jeongjun Park (1):
wifi: rsi: fix kthread lifetime race between self-exit and external-stop
Jiri Slaby (SUSE) (1):
wifi: ath5k: do not access array OOB
Johannes Berg (5):
Merge tag 'ath-current-20260427' of git://git.kernel.org/pub/scm/linux/kernel/git/ath/ath
wifi: mac80211: tests: mark HT check strict
Merge tag 'ath-current-20260505' of git://git.kernel.org/pub/scm/linux/kernel/git/ath/ath
wifi: mac80211: remove station if connection prep fails
wifi: nl80211: fix NL80211_PMSR_FTM_REQ_ATTR_FTMS_PER_BURST usage
Maoyi Xie (2):
wifi: nl80211: require CAP_NET_ADMIN over the target netns in SET_WIPHY_NETNS
wifi: nl80211: re-check wiphy netns in nl80211_prepare_wdev_dump() continuation
Marek Szyprowski (1):
wifi: brcmfmac: Fix potential use-after-free issue when stopping watchdog task
Michael Bommarito (2):
wifi: nl80211: require admin perm on SET_PMK / DEL_PMK
wifi: mac80211: check ieee80211_rx_data_set_link return in pubsta MLO path
Nicolas Escande (1):
wifi: ath12k: fix leak in some ath12k_wmi_xxx() functions
Rameshkumar Sundaram (1):
wifi: ath12k: initialize RSSI dBm conversion event state
Rio Liu (1):
wifi: mac80211: skip ieee80211_verify_sta_ht_mcs_support check in non-strict mode
Tristan Madani (2):
wifi: b43: enforce bounds check on firmware key index in b43_rx()
wifi: b43legacy: enforce bounds check on firmware key index in RX path
Yu-Hsiang Tseng (1):
wifi: ath12k: use lockdep_assert_in_rcu_read_lock() for RCU assertions
drivers/net/wireless/ath/ath10k/Kconfig | 1 +
drivers/net/wireless/ath/ath12k/core.c | 77 +++++++++------
drivers/net/wireless/ath/ath12k/dp_rx.c | 5 +-
drivers/net/wireless/ath/ath12k/mac.c | 2 +-
drivers/net/wireless/ath/ath12k/p2p.c | 2 +-
drivers/net/wireless/ath/ath12k/wmi.c | 105 +++++++++++++++++----
drivers/net/wireless/ath/ath5k/base.c | 3 +-
drivers/net/wireless/broadcom/b43/xmit.c | 3 +-
drivers/net/wireless/broadcom/b43legacy/xmit.c | 3 +-
.../wireless/broadcom/brcm80211/brcmfmac/sdio.c | 6 +-
drivers/net/wireless/marvell/libertas/if_usb.c | 6 +-
drivers/net/wireless/rsi/rsi_common.h | 5 +-
drivers/net/wireless/st/cw1200/pm.c | 2 -
net/mac80211/mlme.c | 18 +++-
net/mac80211/rx.c | 6 +-
net/mac80211/tests/chan-mode.c | 1 +
net/mac80211/util.c | 4 +-
net/wireless/nl80211.c | 27 ++++++
net/wireless/pmsr.c | 2 +-
19 files changed, 208 insertions(+), 70 deletions(-)
^ permalink raw reply
* Re: [PATCH 1/2] nfc: llcp: Fix use-after-free in llcp_sock_release()
From: David Heidelberg @ 2026-05-06 11:08 UTC (permalink / raw)
To: Lee Jones, Jakub Kicinski
Cc: David Heidelberg, David S. Miller, Eric Dumazet, Paolo Abeni,
Simon Horman, Kuniyuki Iwashima, Kees Cook, Junxi Qian,
Ingo Molnar, Samuel Ortiz, netdev, linux-kernel
In-Reply-To: <20260506081145.GA305027@google.com>
Hello Lee.
Yeah, I think today these should hit the linux-next integration tree, and I need to setup the Thank you email to work in `b4 review` :)
David
-------- Original Message --------
From: Lee Jones <lee@kernel.org>
Sent: 6 May 2026 08:11:45 UTC
To: Jakub Kicinski <kuba@kernel.org>
Cc: David Heidelberg <david+nfc@ixit.cz>, "David S. Miller" <davem@davemloft.net>, Eric Dumazet <edumazet@google.com>, Paolo Abeni <pabeni@redhat.com>, Simon Horman <horms@kernel.org>, Kuniyuki Iwashima <kuniyu@google.com>, Kees Cook <kees@kernel.org>, Junxi Qian <qjx1298677004@gmail.com>, Ingo Molnar <mingo@kernel.org>, Samuel Ortiz <sameo@linux.intel.com>, netdev@vger.kernel.org, linux-kernel@vger.kernel.org
Subject: Re: [PATCH 1/2] nfc: llcp: Fix use-after-free in llcp_sock_release()
On Fri, 01 May 2026, Jakub Kicinski wrote:
> On Wed, 29 Apr 2026 13:40:41 +0000 Lee Jones wrote:
> > llcp_sock_release() unconditionally unlinks the socket from the local
> > sockets list. However, if the socket is still in connecting state, it
> > is on the connecting list.
> >
> > Fix this by checking the socket state and unlinking from the correct list.
> >
> > Fixes: b4011239a08e ("NFC: llcp: Fix non blocking sockets connections")
> > Signed-off-by: Lee Jones <lee@kernel.org>
>
> Adding David H and dropping from netdev's patchwork..
Is anyone looking at these please?
These are pretty important.
^ permalink raw reply
* Re: [PATCH net] ipv6: fix potential UAF caused by ip6_forward_proxy_check()
From: Ido Schimmel @ 2026-05-06 11:10 UTC (permalink / raw)
To: Eric Dumazet
Cc: David S . Miller, Jakub Kicinski, Paolo Abeni, Simon Horman,
David Ahern, netdev, eric.dumazet, Damiano Melotti
In-Reply-To: <20260505130056.2927197-1-edumazet@google.com>
On Tue, May 05, 2026 at 01:00:56PM +0000, Eric Dumazet wrote:
> ip6_forward_proxy_check() calls pskb_may_pull() which might re-allocate
> skb->head.
>
> Reload ipv6_hdr() after the pskb_may_pull() call to avoid using
> the freed memory.
>
> Fixes: e21e0b5f19ac ("[IPV6] NDISC: Handle NDP messages to proxied addresses.")
> Reported-by: Damiano Melotti <melotti@google.com>
> Signed-off-by: Eric Dumazet <edumazet@google.com>
Reviewed-by: Ido Schimmel <idosch@nvidia.com>
^ permalink raw reply
* AW: assert in phylink.c with lan7801 and dp83tc811 since kernel 6.18
From: Sven Schuchmann @ 2026-05-06 11:10 UTC (permalink / raw)
To: Andrew Lunn; +Cc: netdev@vger.kernel.org
In-Reply-To: <57fb63c2-7a05-4bbe-ba2d-fc61ce1e3ba1@lunn.ch>
Hi Andrew
> Von: Andrew Lunn <andrew@lunn.ch>
> On Tue, May 05, 2026 at 09:53:06AM +0000, Sven Schuchmann wrote:
> > Hello,
> > I have a raspberrypi and switched from kernel 6.12 to 6.18 and now I have a crash in phylink.c.
> Please could you try 7.0.3, or better still, 7.1-rcX. We need to
> determine if the problem has already been fixed and just needs
> backporting, or is it a new problem.
Now tried with: Linux rpi 7.1.0-rc2-v8+ #6 SMP PREEMPT Wed May 6 09:07:34 CEST 2026 aarch64 GNU/Linux
> Please could you make line 1 of drivers/net/phy/phylink.c.
Also done
Still crashing -> see below for more details
[ 2.153568] usb 1-1.3: new high-speed USB device number 3 using dwc2
[ 2.266726] usb 1-1.3: New USB device found, idVendor=0424, idProduct=7801, bcdDevice= 3.00
[ 2.266762] usb 1-1.3: New USB device strings: Mfr=1, Product=2, SerialNumber=3
[ 2.266772] usb 1-1.3: Product: LAN7801
[ 2.266781] usb 1-1.3: Manufacturer: Microchip
[ 2.266789] usb 1-1.3: SerialNumber: 00800F780100
[ 2.323106] lan78xx 1-1.3:1.0 (unnamed net_device) (uninitialized): int urb period 64
[ 2.334333] lan78xx 1-1.3:1.0 (unnamed net_device) (uninitialized): PHY usb-001:003:00 doesn't supply possible interfaces
[ 2.334360] lan78xx 1-1.3:1.0 (unnamed net_device) (uninitialized): validation of rgmii-id with support 00000000,00000000,00000000,00006280 and advertisement 00000000,00000000,00000000,00006280 failed: -EINVAL
[ 2.339481] lan78xx 1-1.3:1.0 (unnamed net_device) (uninitialized): can't attach PHY to usb-001:003, error -EINVAL
[ 2.339519] ------------[ cut here ]------------
[ 2.339525] RTNL: assertion failed at drivers/net/phy/phylink.c (2351)
[ 2.339613] WARNING: drivers/net/phy/phylink.c:2351 at phylink_disconnect_phy+0xf0/0x100, CPU#0: kworker/0:0/9
[ 2.851713] Modules linked in:
[ 2.854770] CPU: 0 UID: 0 PID: 9 Comm: kworker/0:0 Not tainted 7.1.0-rc2-v8+ #6 PREEMPT
[ 2.862861] Hardware name: Raspberry Pi Compute Module 4 Rev 1.0 (DT)
[ 2.869297] Workqueue: usb_hub_wq hub_event
[ 2.873486] pstate: 60000005 (nZCv daif -PAN -UAO -TCO -DIT -SSBS BTYPE=--)
[ 2.880444] pc : phylink_disconnect_phy+0xf0/0x100
[ 2.885233] lr : phylink_disconnect_phy+0xf0/0x100
[ 2.890021] sp : ffffffc08006b4d0
[ 2.893328] x29: ffffffc08006b4d0 x28: ffffff810703ea40 x27: ffffff81053e0000
[ 2.900467] x26: ffffff810703eae0 x25: ffffff810703ea80 x24: 00000000ffffffea
[ 2.907605] x23: ffffff81051ebc50 x22: ffffff81052d80b0 x21: 0000000000008000
[ 2.914742] x20: ffffff810629a000 x19: ffffff8105a50600 x18: 0000000000000006
[ 2.921879] x17: 74612074276e6163 x16: 203a2964657a696c x15: 077907680770072f
[ 2.929016] x14: 07740765076e072f x13: 077907680770072f x12: 07740765076e072f
[ 2.936152] x11: 0720072007200720 x10: 0720072007200729 x9 : ffffffe227d591ec
[ 2.943289] x8 : 0000000000000128 x7 : ffffffe22941d100 x6 : ffffffe22941d100
[ 2.950426] x5 : 3fffffffffffefff x4 : bffffffffffff000 x3 : 0000000000000000
[ 2.957563] x2 : 0000000000000000 x1 : 0000000000000000 x0 : ffffff810033de80
[ 2.964701] Call trace:
[ 2.967141] phylink_disconnect_phy+0xf0/0x100 (P)
[ 2.971932] lan78xx_probe+0x9e4/0xda0
[ 2.975679] usb_probe_interface+0x13c/0x360
[ 2.979946] really_probe+0xc4/0x2d0
[ 2.983521] __driver_probe_device+0x88/0x160
[ 2.987876] driver_probe_device+0x44/0x160
[ 2.992057] __device_attach_driver+0xc0/0x150
[ 2.996499] bus_for_each_drv+0x90/0x100
[ 3.000419] __device_attach+0xa8/0x1a0
[ 3.004252] device_initial_probe+0x58/0x68
[ 3.008433] bus_probe_device+0x40/0xb0
[ 3.012266] device_add+0x5a4/0x7b8
[ 3.015751] usb_set_configuration+0x548/0xab0
[ 3.020195] usb_generic_driver_probe+0x5c/0xa0
[ 3.024724] usb_probe_device+0x44/0x140
[ 3.028645] really_probe+0xc4/0x2d0
[ 3.032218] __driver_probe_device+0x88/0x160
[ 3.036573] driver_probe_device+0x44/0x160
[ 3.040754] __device_attach_driver+0xc0/0x150
[ 3.045195] bus_for_each_drv+0x90/0x100
[ 3.049114] __device_attach+0xa8/0x1a0
[ 3.052947] device_initial_probe+0x58/0x68
[ 3.057128] bus_probe_device+0x40/0xb0
[ 3.060961] device_add+0x5a4/0x7b8
[ 3.064446] usb_new_device+0x1bc/0x4e8
[ 3.068278] hub_event+0xde8/0x1560
[ 3.071763] process_one_work+0x164/0x4e0
[ 3.075774] worker_thread+0x19c/0x320
[ 3.079519] kthread+0x138/0x150
[ 3.082742] ret_from_fork+0x10/0x20
[ 3.086317] ---[ end trace 0000000000000000 ]---
[ 3.113678] lan78xx 1-1.3:1.0: probe with driver lan78xx failed with error -22
due to my limited debugging skills I put my kernel over here:
https://github.com/Schuchmann/rpi_lan78xx
added some code to maybe get a better idea:
https://github.com/Schuchmann/rpi_lan78xx/pull/1/changes
[ 2.117653] usb 1-1.3: new high-speed USB device number 3 using dwc2
[ 2.214810] usb 1-1.3: New USB device found, idVendor=0424, idProduct=7801, bcdDevice= 3.00
[ 2.214840] usb 1-1.3: New USB device strings: Mfr=1, Product=2, SerialNumber=3
[ 2.214850] usb 1-1.3: Product: LAN7801
[ 2.214857] usb 1-1.3: Manufacturer: Microchip
[ 2.214864] usb 1-1.3: SerialNumber: 00800F780100
[ 2.215258] lan78xx 1-1.3:1.0 (unnamed net_device) (uninitialized): --1--
[ 2.215275] lan78xx 1-1.3:1.0 (unnamed net_device) (uninitialized): --2--
[ 2.215284] lan78xx 1-1.3:1.0 (unnamed net_device) (uninitialized): --3--
[ 2.215310] lan78xx 1-1.3:1.0 (unnamed net_device) (uninitialized): --4--
[ 2.215334] lan78xx 1-1.3:1.0 (unnamed net_device) (uninitialized): --5--
[ 2.215350] lan78xx 1-1.3:1.0 (unnamed net_device) (uninitialized): --6--
[ 2.215358] lan78xx 1-1.3:1.0 (unnamed net_device) (uninitialized): --7--
[ 2.215365] lan78xx 1-1.3:1.0 (unnamed net_device) (uninitialized): --8--
[ 2.215373] lan78xx 1-1.3:1.0 (unnamed net_device) (uninitialized): --9--
[ 2.215380] lan78xx 1-1.3:1.0 (unnamed net_device) (uninitialized): --10--
[ 2.215388] lan78xx 1-1.3:1.0 (unnamed net_device) (uninitialized): --11--
[ 2.225577] lan78xx 1-1.3:1.0 (unnamed net_device) (uninitialized): deferred multicast write 0x00007ca0
[ 2.272191] lan78xx 1-1.3:1.0 (unnamed net_device) (uninitialized): registered mdiobus bus usb-001:003
[ 2.272211] lan78xx 1-1.3:1.0 (unnamed net_device) (uninitialized): --12--
[ 2.272234] lan78xx 1-1.3:1.0 (unnamed net_device) (uninitialized): --13--
[ 2.272243] lan78xx 1-1.3:1.0 (unnamed net_device) (uninitialized): int urb period 64
[ 2.272254] lan78xx 1-1.3:1.0 (unnamed net_device) (uninitialized): --14--
[ 2.272263] lan78xx 1-1.3:1.0 (unnamed net_device) (uninitialized): --15--
[ 2.272270] lan78xx 1-1.3:1.0 (unnamed net_device) (uninitialized): --16--
[ 2.272282] lan78xx 1-1.3:1.0 (unnamed net_device) (uninitialized): --17--
[ 2.272289] lan78xx 1-1.3:1.0 (unnamed net_device) (uninitialized): --18--
[ 2.272300] lan78xx 1-1.3:1.0 (unnamed net_device) (uninitialized): phy: --3.2--
[ 2.272305] lan78xx 1-1.3:1.0 (unnamed net_device) (uninitialized): phy: --3.3--
[ 2.272312] lan78xx 1-1.3:1.0 (unnamed net_device) (uninitialized): phy: --3.4--
[ 2.272316] lan78xx 1-1.3:1.0 (unnamed net_device) (uninitialized): phy: --3.5-- 0
[ 2.272433] lan78xx 1-1.3:1.0 (unnamed net_device) (uninitialized): phydev->irq = 37
[ 2.272440] lan78xx 1-1.3:1.0 (unnamed net_device) (uninitialized): phy: --1--
[ 2.272444] lan78xx 1-1.3:1.0 (unnamed net_device) (uninitialized): phy: --2--
[ 2.283504] lan78xx 1-1.3:1.0 (unnamed net_device) (uninitialized): phy: --3--
[ 2.283520] lan78xx 1-1.3:1.0 (unnamed net_device) (uninitialized): PHY usb-001:003:00 doesn't supply possible interfaces
[ 2.283528] lan78xx 1-1.3:1.0 (unnamed net_device) (uninitialized): PHY usb-001:003:00 --1.1--
[ 2.283533] lan78xx 1-1.3:1.0 (unnamed net_device) (uninitialized): PHY usb-001:003:00 --1.2--
[ 2.283538] lan78xx 1-1.3:1.0 (unnamed net_device) (uninitialized): phy: --3.2--
[ 2.283542] lan78xx 1-1.3:1.0 (unnamed net_device) (uninitialized): phy: --3.3--
[ 2.283549] lan78xx 1-1.3:1.0 (unnamed net_device) (uninitialized): phy: --3.4--
[ 2.283553] lan78xx 1-1.3:1.0 (unnamed net_device) (uninitialized): phy: --3.5-- -22
[ 2.283559] lan78xx 1-1.3:1.0 (unnamed net_device) (uninitialized): validation of rgmii-id with support 00000000,00000000,00000000,00006280 and advertisement 00000000,00000000,00000000,00006280 failed: -EINVAL
[ 2.288572] lan78xx 1-1.3:1.0 (unnamed net_device) (uninitialized): can't attach PHY to usb-001:003, error -EINVAL
[ 2.288617] ------------[ cut here ]------------
[ 2.288624] RTNL: assertion failed at drivers/net/phy/phylink.c (2377)
[ 2.288730] WARNING: drivers/net/phy/phylink.c:2377 at phylink_disconnect_phy+0xf0/0x100, CPU#0: kworker/0:2/68
[ 2.962641] Modules linked in:
[ 2.965698] CPU: 0 UID: 0 PID: 68 Comm: kworker/0:2 Not tainted 7.1.0-rc2-v8+ #12 PREEMPT
[ 2.973963] Hardware name: Raspberry Pi Compute Module 4 Rev 1.0 (DT)
[ 2.980399] Workqueue: usb_hub_wq hub_event
[ 2.984589] pstate: 60000005 (nZCv daif -PAN -UAO -TCO -DIT -SSBS BTYPE=--)
[ 2.991547] pc : phylink_disconnect_phy+0xf0/0x100
[ 2.996335] lr : phylink_disconnect_phy+0xf0/0x100
[ 3.001122] sp : ffffffc080d034d0
[ 3.004430] x29: ffffffc080d034d0 x28: ffffff8105f0ea80 x27: ffffff8105f0eb28
[ 3.011568] x26: ffffff8105f0eae0 x25: ffffff810622bc50 x24: 00000000ffffffea
[ 3.018705] x23: ffffff81058708b0 x22: 0000000000008000 x21: ffffff8105877000
[ 3.025842] x20: ffffff8105f0ea40 x19: ffffff810027ce00 x18: 0000000000000006
[ 3.032979] x17: 74612074276e6163 x16: 203a2964657a696c x15: 077907680770072f
[ 3.040116] x14: 07740765076e072f x13: 077907680770072f x12: 07740765076e072f
[ 3.047253] x11: 0720072007200720 x10: 0720072007200729 x9 : ffffffd85f3591ec
[ 3.054390] x8 : 000000000000014a x7 : ffffffd860a1d100 x6 : ffffffd860a1d100
[ 3.061527] x5 : 3fffffffffffefff x4 : bffffffffffff000 x3 : 0000000000000000
[ 3.068664] x2 : 0000000000000000 x1 : 0000000000000000 x0 : ffffff8102419f80
[ 3.075801] Call trace:
[ 3.078241] phylink_disconnect_phy+0xf0/0x100 (P)
[ 3.083031] lan78xx_probe+0xb00/0x1208
[ 3.086865] usb_probe_interface+0x13c/0x360
[ 3.091132] really_probe+0xc4/0x2d0
[ 3.094709] __driver_probe_device+0x88/0x160
[ 3.099064] driver_probe_device+0x44/0x160
[ 3.103244] __device_attach_driver+0xc0/0x150
[ 3.107686] bus_for_each_drv+0x90/0x100
[ 3.111605] __device_attach+0xa8/0x1a0
[ 3.115438] device_initial_probe+0x58/0x68
[ 3.119618] bus_probe_device+0x40/0xb0
[ 3.123451] device_add+0x5a4/0x7b8
[ 3.126935] usb_set_configuration+0x548/0xab0
[ 3.131377] usb_generic_driver_probe+0x5c/0xa0
[ 3.135907] usb_probe_device+0x44/0x140
[ 3.139827] really_probe+0xc4/0x2d0
[ 3.143399] __driver_probe_device+0x88/0x160
[ 3.147753] driver_probe_device+0x44/0x160
[ 3.151934] __device_attach_driver+0xc0/0x150
[ 3.156375] bus_for_each_drv+0x90/0x100
[ 3.160294] __device_attach+0xa8/0x1a0
[ 3.164127] device_initial_probe+0x58/0x68
[ 3.168307] bus_probe_device+0x40/0xb0
[ 3.172140] device_add+0x5a4/0x7b8
[ 3.175624] usb_new_device+0x1bc/0x4e8
[ 3.179456] hub_event+0xde8/0x1560
[ 3.182940] process_one_work+0x164/0x4e0
[ 3.186949] worker_thread+0x19c/0x320
[ 3.190694] kthread+0x138/0x150
[ 3.193917] ret_from_fork+0x10/0x20
[ 3.197495] ---[ end trace 0000000000000000 ]---
[ 3.202212] lan78xx 1-1.3:1.0 (unnamed net_device) (uninitialized): --out: free_urbs--
[ 3.202245] lan78xx 1-1.3:1.0 (unnamed net_device) (uninitialized): --out: out5--
[ 3.202725] lan78xx 1-1.3:1.0 (unnamed net_device) (uninitialized): --out: out4--
[ 3.225689] lan78xx 1-1.3:1.0 (unnamed net_device) (uninitialized): --out: out3--
[ 3.225735] lan78xx 1-1.3:1.0 (unnamed net_device) (uninitialized): --out: out2--
So for me it somewhere happens in phylink_validate_mac_and_pcs()
Regards,
Sven
^ permalink raw reply
* [GIT PULL] wireless-next-2026-05-06
From: Johannes Berg @ 2026-05-06 11:10 UTC (permalink / raw)
To: netdev; +Cc: linux-wireless
Hi,
More content for next, as below. I'm sending this now
in hopes that we'll get net merged into net-next after
it all lands, because I'm annoyed by the use-after-free
issue that somehow we never noticed and now hits all
the time.
Please pull and let us know if there's any problem.
Thanks,
johannes
The following changes since commit 6855a52318b3a8c33031209e38bef497c971ef17:
Merge tag 'wireless-next-2026-04-30' of https://git.kernel.org/pub/scm/linux/kernel/git/wireless/wireless-next (2026-04-30 17:10:21 -0700)
are available in the Git repository at:
https://git.kernel.org/pub/scm/linux/kernel/git/wireless/wireless-next.git tags/wireless-next-2026-05-06
for you to fetch changes up to 89e367a90c1a877ca9c5d75d3848582d80fd0e60:
wifi: mac80211: explicitly disable FTM responder on AP stop (2026-05-06 11:52:14 +0200)
----------------------------------------------------------------
Lots of new content in cfg80211/mac80211, notably
- more NAN work, mostly complete now (also hwsim)
- more UHR work (e.g. non-primary channel access),
this will continue for a while
- FTM ranging APIs
----------------------------------------------------------------
Andrei Otcheretianski (1):
wifi: mac80211: Fix a kernel panic in ieee80211_encrypt_tx_skb()
Avraham Stern (1):
wifi: mac80211: accept protected frames for NAN device
Benjamin Berg (13):
wifi: mac80211_hwsim: remove unused nan_vif struct member
wifi: mac80211_hwsim: move NAN related variables into a struct
wifi: mac80211_hwsim: split NAN handling into separate file
wifi: mac80211_hwsim: rename and switch simulation time to boottime
wifi: mac80211_hwsim: move timestamp writing later in the datapath
wifi: mac80211_hwsim: register beacon timer by calculating TBTT
wifi: mac80211_hwsim: refactor NAN timer handling
wifi: mac80211_hwsim: switch to use TXQs
wifi: mac80211_hwsim: limit TX of frames to the NAN DW
wifi: mac80211_hwsim: select NAN TX channel based on current TSF
wifi: mac80211_hwsim: only RX on NAN when active on a slot
wifi: mac80211_hwsim: protect tsf_offset using a spinlock
wifi: mac80211_hwsim: implement NAN synchronization
Daniel Gabay (7):
wifi: mac80211_hwsim: add NAN_DATA interface limits
wifi: mac80211_hwsim: add NAN PHY capabilities
wifi: mac80211_hwsim: implement NAN schedule callbacks
wifi: mac80211_hwsim: set HAS_RATE_CONTROL when using NAN
wifi: mac80211_hwsim: add NAN data path TX/RX support
wifi: mac80211_hwsim: Declare support for secure NAN
wifi: mac80211_hwsim: enable NAN_DATA interface simulation support
Emmanuel Grumbach (1):
wifi: iwlwifi: don't blindly start the responder upon BSS_CHANGED_FTM_RESPONDER
Ilan Peer (4):
wifi: mac80211: allow userspace TX/RX over NAN Data interfaces
wifi: mac80211: Allow setting MAC address on interface creation
wifi: mac80211_hwsim: Do not declare support for NDPE
wifi: mac80211_hwsim: Support Tx of multicast data on NAN
Israel Kozitz (1):
wifi: cfg80211: fix max_channel_switch_time documentation unit
Johannes Berg (23):
wifi: mac80211: move frame RX handling to type files
wifi: mac80211: update UHR capabilities field order
wifi: ieee80211: define UHR ML-PM extended MLD capability
wifi: mac80211: track AP's extended MLD capa/ops
wifi: cfg80211: ensure UHR ML-PM flag is consistent
wifi: cfg80211: allow devices to advertise extended MLD capa/ops
wifi: mac80211: mlme: advertise driver's extended MLD capa/ops
wifi: mac80211: use struct for ieee80211_determine_ap_chan() args
wifi: mac80211: move ieee80211_chandef_usable() up
wifi: mac80211: carry element parsing frame type/from_ap
wifi: cfg80211: allow representing NPCA in chandef
wifi: cfg80211: add helper for parsing NPCA to chandef
wifi: mac80211: use NPCA in chandef for validation
wifi: mac80211: remove NPCA during chandef downgrade
wifi: mac80211: add NPCA to chandef tracing
wifi: mac80211: allow only AP chanctx sharing with NPCA
wifi: mac80211: mlme: use NPCA chandef if capable
wifi: mac80211: set AP NPCA parameters in bss_conf
wifi: cfg80211: separate NPCA validity from chandef validity
wifi: mac80211: don't parse full UHR operation from beacons
wifi: mac80211: check AP using NPCA has NPCA capability
wifi: mac80211_hwsim: claim HT STBC capability
wifi: mac80211: explicitly disable FTM responder on AP stop
Kavita Kavita (3):
wifi: cfg80211: indicate (Re)Association frame encryption to userspace
wifi: mac80211: set assoc_encrypted for EPP associations
wifi: mac80211_hwsim: Add support for extended FTM ranging
Miri Korenblit (4):
wifi: mac80211: track the id of the NAN cluster we joined
wifi: mac80211: avoid out-of-bounds access in monitor
wifi: mac80211: add NAN channel evacuation support
wifi: cfg80211: don't allow NAN DATA on multi radio devices
Peddolla Harshavardhan Reddy (12):
wifi: cfg80211: restrict LMR feedback check to TB and non-TB ranging
wifi: cfg80211: Add MAC address filter to remain_on_channel
wifi: cfg80211/mac80211: Add NL80211_IFTYPE_PD for PD PASN and PMSR operations
wifi: cfg80211: add start/stop proximity detection commands
wifi: cfg80211: add proximity detection capabilities to PMSR
wifi: cfg80211: add NTB continuous ranging and FTM request type support
wifi: cfg80211: extend PMSR FTM response for proximity ranging
wifi: cfg80211: add role-based peer limits to FTM capabilities
wifi: cfg80211: add ingress/egress distance thresholds for FTM
wifi: cfg80211: add PD-specific preamble and bandwidth capabilities
wifi: cfg80211: allow suppressing FTM result reporting for PD requests
wifi: cfg80211: add LTF keyseed support for secure ranging
MAINTAINERS | 2 +-
drivers/net/wireless/ath/ath6kl/cfg80211.c | 3 +-
drivers/net/wireless/ath/wil6210/cfg80211.c | 3 +-
.../net/wireless/broadcom/brcm80211/brcmfmac/p2p.c | 4 +-
.../net/wireless/broadcom/brcm80211/brcmfmac/p2p.h | 3 +-
drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c | 4 +-
.../net/wireless/intel/iwlwifi/mvm/mld-mac80211.c | 4 +-
drivers/net/wireless/marvell/mwifiex/cfg80211.c | 3 +-
drivers/net/wireless/microchip/wilc1000/cfg80211.c | 3 +-
drivers/net/wireless/virtual/Makefile | 2 +
drivers/net/wireless/virtual/mac80211_hwsim_i.h | 168 +++
.../{mac80211_hwsim.c => mac80211_hwsim_main.c} | 974 +++++++-------
drivers/net/wireless/virtual/mac80211_hwsim_nan.c | 1346 ++++++++++++++++++++
drivers/net/wireless/virtual/mac80211_hwsim_nan.h | 102 ++
include/linux/ieee80211-eht.h | 14 +-
include/linux/ieee80211-uhr.h | 55 +-
include/linux/ieee80211.h | 1 +
include/net/cfg80211.h | 251 +++-
include/net/mac80211.h | 64 +
include/uapi/linux/nl80211.h | 293 ++++-
net/mac80211/Makefile | 2 +-
net/mac80211/ap.c | 207 +++
net/mac80211/cfg.c | 77 +-
net/mac80211/chan.c | 102 +-
net/mac80211/eht.c | 174 ---
net/mac80211/ieee80211_i.h | 28 +-
net/mac80211/iface.c | 62 +-
net/mac80211/main.c | 7 +-
net/mac80211/mlme.c | 579 +++++----
net/mac80211/nan.c | 126 ++
net/mac80211/offchannel.c | 13 +-
net/mac80211/parse.c | 3 +
net/mac80211/rx.c | 33 +-
net/mac80211/status.c | 9 +-
net/mac80211/trace.h | 14 +-
net/mac80211/tx.c | 18 +-
net/mac80211/uhr.c | 5 +-
net/mac80211/util.c | 11 +-
net/wireless/chan.c | 124 +-
net/wireless/core.c | 36 +-
net/wireless/core.h | 2 +
net/wireless/mlme.c | 11 +-
net/wireless/nl80211.c | 368 +++++-
net/wireless/nl80211.h | 5 +-
net/wireless/pmsr.c | 190 ++-
net/wireless/rdev-ops.h | 26 +-
net/wireless/reg.c | 3 +
net/wireless/sme.c | 1 +
net/wireless/trace.h | 38 +-
net/wireless/util.c | 19 +-
50 files changed, 4520 insertions(+), 1072 deletions(-)
create mode 100644 drivers/net/wireless/virtual/mac80211_hwsim_i.h
rename drivers/net/wireless/virtual/{mac80211_hwsim.c => mac80211_hwsim_main.c} (91%)
create mode 100644 drivers/net/wireless/virtual/mac80211_hwsim_nan.c
create mode 100644 drivers/net/wireless/virtual/mac80211_hwsim_nan.h
create mode 100644 net/mac80211/ap.c
^ permalink raw reply
* [PATCH v2 net] net: wan: fsl_ucc_hdlc: free tx_skbuff in uhdlc_memclean
From: Holger Brunck @ 2026-05-06 11:15 UTC (permalink / raw)
To: netdev
Cc: linuxppc-dev, andrew+netdev, chleroy, qiang.zhao, horms, kuba,
Holger Brunck
When the device is removed all allocated resources should be freed.
In uhdlc_memclean the netdev transmit queue was already stopped. But at
this point we may have pending skb in the transmit queue which must be
freed. Therefore iterate over the tx_skbuff pointers and free all
pending skb. The issue was discovered by sashiko.
https://sashiko.dev/#/patchset/20260429114208.941011-1-holger.brunck%40hitachienergy.com
Fixes: c19b6d246a35 ("drivers/net: support hdlc function for QE-UCC")
Signed-off-by: Holger Brunck <holger.brunck@hitachienergy.com>
---
v2: - use dev_kfree_skb instead of kfree
- improve commit message
- add missing paramter in for statement
v1: https://lore.kernel.org/linuxppc-dev/20260504161145.2217950-1-holger.brunck@hitachienergy.com/
drivers/net/wan/fsl_ucc_hdlc.c | 7 +++++++
1 file changed, 7 insertions(+)
diff --git a/drivers/net/wan/fsl_ucc_hdlc.c b/drivers/net/wan/fsl_ucc_hdlc.c
index bc7c2e9e6554..417e8e4c111f 100644
--- a/drivers/net/wan/fsl_ucc_hdlc.c
+++ b/drivers/net/wan/fsl_ucc_hdlc.c
@@ -739,6 +739,8 @@ static int uhdlc_open(struct net_device *dev)
static void uhdlc_memclean(struct ucc_hdlc_private *priv)
{
+ int i;
+
qe_muram_free(ioread16be(&priv->ucc_pram->riptr));
qe_muram_free(ioread16be(&priv->ucc_pram->tiptr));
@@ -769,6 +771,11 @@ static void uhdlc_memclean(struct ucc_hdlc_private *priv)
kfree(priv->rx_skbuff);
priv->rx_skbuff = NULL;
+ for (i = 0; i < TX_BD_RING_LEN; i++) {
+ dev_kfree_skb(priv->tx_skbuff[i]);
+ priv->tx_skbuff[i] = NULL;
+ }
+
kfree(priv->tx_skbuff);
priv->tx_skbuff = NULL;
--
2.52.0.120.gb31ab939fe
^ permalink raw reply related
* Re: [PATCH v5 net-next 3/3] selftests:net: Implement ptp4l sync test using netdevsim
From: Maciek Machnikowski @ 2026-05-06 11:18 UTC (permalink / raw)
To: Jakub Kicinski
Cc: netdev, richardcochran, milena.olech, willemdebruijn.kernel,
andrew, vadim.fedorenko, horms
In-Reply-To: <20260505172317.089fb380@kernel.org>
On 06/05/2026 02:23, Jakub Kicinski wrote:
> On Tue, 5 May 2026 17:22:34 -0700 Jakub Kicinski wrote:
>> On Tue, 5 May 2026 09:36:30 +0200 Maciek Machnikowski wrote:
>>> On 04/05/2026 19:07, Jakub Kicinski wrote:
>>>> On Sun, 3 May 2026 09:47:47 +0200 Maciek Machnikowski wrote:
>>>> Doesn't seem to pass on netdevsim for us:
>>>>
>>>> # 41.13 [+40.95] # ptp4l follower did not reach locked state (s2) within 40s
>>>> # 41.13 [+0.00] # Follower log (last 10 lines): ptp4l[2179.605]: ioctl SIOCETHTOOL failed: Operation not supported | ptp4l[2179.607]: interface 'eth0' does not support requested timestamping mode | failed to create a clock
>>>> # 41.15 [+0.02] # Check| At /srv/vmksft/testing/wt-2/tools/testing/selftests/net/./ptp.py, line 173, in ptp_sync_test:
>>>> # 41.15 [+0.01] # Check| _run_ptp4l_wait_sync(nsimsv.ifname, nsimcl.ifname, nssv.name, nscl.name)
>>>> # 41.16 [+0.01] # Check| At /srv/vmksft/testing/wt-2/tools/testing/selftests/net/./ptp.py, line 99, in _run_ptp4l_wait_sync:
>>>> # 41.17 [+0.01] # Check| ksft_true(False, "PTP sync timeout")
>>>> # 41.17 [+0.00] # Check failed False does not eval to True PTP sync timeout
>>>> # 41.32 [+0.16] not ok 1 ptp.ptp_sync_test
>>>> # 41.33 [+0.00] # Totals: pass:0 fail:1 xfail:0 xpass:0 skip:0 error:0
>>>>
>>>> Anything we need to do?
>>>
>>> Can you share the config file you used? Seems the PTP clock was not
>>> found which may lead to PTP_1588_CLOCK_MOCK not being enabled?
>>
>> You have to add it to the relevant config.
>>
>> Please read: github.com/linux-netdev/nipa/wiki/Netdev-CI-system
>
> Sorry, wrong link, this:
>
> https://github.com/linux-netdev/nipa/wiki/How-to-run-netdev-selftests-CI-style
I did follow the guid, ran the test on the vng environment and it still
passed the test. Is there more debuggability on the CI system? Can you
share the .config file it generated while running the test? Or the
commands it tries to run so I can verify them locally?
The commands I ran were:
make mrproper
vng --build --config tools/testing/selftests/drivers/net/netdevsim/config
vng -v --run . --user root --cpus 4 -- tools/testing/selftests/net/ptp.py
The tests were done on the ARM64 Fedora 44 with the default kernel
6.19.14-300.fc44.aarch64 running inside the Parallels Desktop VM.
^ permalink raw reply
* Re: [PATCH v13 5/6] tls: add hardware offload key update support
From: Sabrina Dubroca @ 2026-05-06 11:30 UTC (permalink / raw)
To: Rishikesh Jethwani
Cc: netdev, saeedm, tariqt, mbloch, borisp, john.fastabend, kuba,
davem, pabeni, edumazet, leon
In-Reply-To: <20260429181016.3164935-6-rjethwani@purestorage.com>
2026-04-29, 12:10:15 -0600, Rishikesh Jethwani wrote:
> On RX, the NIC may have already decrypted in-flight records with
> the old key before the peer's KeyUpdate is parsed, so the old
> AEAD, IV and rec_seq are retained on tls_offload_context_rx.
> tls_check_pending_rekey() invokes tls_device_rx_del_key() to drop
> the NIC key; otherwise post-KeyUpdate records (carrying new-key
> wire encryption) would be XOR'd with the retired key.
> tls_device_decrypted() classifies records by old_nic_boundary:
>
> - after the boundary: new-key record; drop the old key.
> - before, fully encrypted: advance old_rec_seq, let SW AEAD decrypt.
> - before, (partially) decrypted: reencrypt with the old key so SW
> AEAD can decrypt with the new key.
>
> For mixed records skb->decrypted flags can be wrong (NIC clears
> them on auth failure); on -EBADMSG, tls_rx_rekey_retry() toggles
> those flags, decrements old_rec_seq to reuse the nonce, and
> retries once (gated by old_key_reencrypted).
Not blaming you for NIC behavior, but... the NIC passes up as
"decrypted" records that have failed decryption (because it was using
the wrong (old) key), or passes as "encrypted" the incorrectly
decrypted data (that it has "decrypted" with the old key)?
Or this is only the first record(s) after the KeyUpdate message, if
they fall within the same packet, the whole packet was "decrypted"
with the old key but only the KeyUpdate itself (and maybe some more
records before it) decrypted correctly ; but subsequent packets get
passed as !decrypted and don't need this reencrypt dance?
(this is maybe more of a question for Tariq or the other @nvidia
folks)
I haven't reviewed the whole patch at this point, because of Paolo's
suggestion and this confusion with the RX rekey.
> diff --git a/include/net/tls.h b/include/net/tls.h
> index ebd2550280ae..6891aa6b484c 100644
> --- a/include/net/tls.h
> +++ b/include/net/tls.h
[...]
> @@ -165,6 +181,11 @@ struct tls_offload_context_tx {
> void (*sk_destruct)(struct sock *sk);
> struct work_struct destruct_work;
> struct tls_context *ctx;
> +
> + struct tls_sw_context_tx rekey_sw; /* SW context for new key */
> + struct cipher_context rekey_tx; /* IV, rec_seq for new key */
> + union tls_crypto_context rekey_crypto_send; /* Crypto for new key */
[...]
> @@ -253,6 +273,15 @@ struct tls_context {
> */
> unsigned long flags;
>
> + /* TCP sequence number boundary for pending rekey.
> + * Packets with seq < this use old key, >= use new key.
> + */
> + u32 rekey_boundary_seq;
> +
> + /* Pointers to rekey contexts for SW encryption with new key */
> + struct tls_sw_context_tx *rekey_sw_ctx;
> + struct cipher_context *rekey_cipher_ctx;
> +
[...]
> @@ -311,6 +340,14 @@ struct tls_offload_context_rx {
> u8 resync_nh_reset:1;
> /* CORE_NEXT_HINT-only member, but use the hole here */
> u8 resync_nh_do_now:1;
> + /* retry reencrypt of mixed record during rekey */
> + u8 old_key_reencrypted:1;
> + /* tls_dev_add deferred until old key is freed */
> + u8 dev_add_pending:1;
> + struct crypto_aead *old_aead_recv; /* old key AEAD cipher */
> + char old_iv[TLS_MAX_IV_SIZE + TLS_MAX_SALT_SIZE]; /* old key IV */
> + char old_rec_seq[TLS_MAX_REC_SEQ_SIZE]; /* old key TLS record seq */
> + u32 old_nic_boundary; /* TCP seq: NIC switched to next key */
I think it would be nice to group the new fields in those 3 structs
into embedded "untyped" structs:
struct tls_offload_context_rx {
/* existing fields */
struct {
struct crypto_aead *old_aead_recv; /* old key AEAD cipher */
char old_iv[TLS_MAX_IV_SIZE + TLS_MAX_SALT_SIZE]; /* old key IV */
char old_rec_seq[TLS_MAX_REC_SEQ_SIZE]; /* old key TLS record seq */
u32 old_nic_boundary; /* TCP seq: NIC switched to next key */
} rekey;
/* other fields */
};
and then remove the "rekey_" prefixes in
tls_offload_context_tx/tls_context.
[note to self: in the near future we should probably switch all those
comments to kdoc, and consider splitting the offload_contexts into
NIC-visible and internal-only chunks]
> diff --git a/net/tls/tls_device.c b/net/tls/tls_device.c
> index cd26873e9063..51f1cc783336 100644
> --- a/net/tls/tls_device.c
> +++ b/net/tls/tls_device.c
[...]
> @@ -159,6 +161,262 @@ static void delete_all_records(struct tls_offload_context_tx *offload_ctx)
> +static bool tls_has_unacked_records(struct tls_offload_context_tx *offload_ctx)
> +{
[...]
> +static int tls_device_init_rekey_sw(struct sock *sk,
> + struct tls_context *ctx,
> + struct tls_offload_context_tx *offload_ctx,
> + struct tls_crypto_info *new_crypto_info)
> +{
[...]
> +static int tls_device_start_rekey(struct sock *sk,
> + struct tls_context *ctx,
> + struct tls_offload_context_tx *offload_ctx,
> + struct tls_crypto_info *new_crypto_info)
> +{
[...]
> +static int tls_device_complete_rekey(struct sock *sk, struct tls_context *ctx)
> +{
For tls_device_complete_rekey(): refactor the bits that overlap with
tls_set_device_offload_initial() into a shared helper?
I think it'd be better to reshuffle all this so that
tls_has_unacked_records is just next to tls_tcp_clean_acked (because
they do something a bit similar, both deal with unacked records), and
move those 3 rekey functions next to tls_set_device_offload_rekey().
> @@ -980,13 +1269,144 @@ tls_device_reencrypt(struct sock *sk, struct tls_context *tls_ctx)
> return err;
> }
>
> +/*
> + * temporarily swap in the old key, run
> + * tls_device_reencrypt(), then restore the current key.
> + */
> +static int tls_old_key_reencrypt(struct sock *sk,
> + struct tls_offload_context_rx *ctx,
> + struct tls_sw_context_rx *sw_ctx,
> + struct tls_context *tls_ctx)
> +{
Why a separate helper (with a very confusing name), with then a
wrapper that does almost nothing else than call tls_old_key_reencrypt()?
[...]
> +static int tls_device_reencrypt_old_key(struct sock *sk,
> + struct tls_offload_context_rx *ctx,
> + struct tls_sw_context_rx *sw_ctx,
> + struct tls_context *tls_ctx)
> +{
[...]
> +void tls_device_rx_del_key(struct sock *sk, struct tls_context *ctx)
> +{
[...]
> +static int tls_device_dev_add(struct sock *sk, struct tls_context *tls_ctx,
> + struct net_device *netdev,
> + struct tls_crypto_info *crypto_info,
> + u32 cur_seq, bool is_rekey)
> +{
[...]
> +static void tls_device_deferred_dev_add(struct sock *sk,
> + struct tls_context *tls_ctx,
> + struct tls_offload_context_rx *ctx)
> +{
Same here, in terms of code organization, I'd move those 3
->tls_dev_{add,del} wrappers to the top of the file, so that the
reencrypt/decrypted/old_key function(s) stay grouped together.
> int tls_device_decrypted(struct sock *sk, struct tls_context *tls_ctx)
> {
[...]
> diff --git a/net/tls/tls_main.c b/net/tls/tls_main.c
> index fd04857fa0ab..ab701f166b57 100644
> --- a/net/tls/tls_main.c
> +++ b/net/tls/tls_main.c
> @@ -371,6 +371,8 @@ static void tls_sk_proto_close(struct sock *sk, long timeout)
>
> if (ctx->tx_conf == TLS_SW)
> tls_sw_cancel_work_tx(ctx);
> + else if (ctx->tx_conf == TLS_HW && ctx->rekey_sw_ctx)
> + tls_sw_cancel_work_tx(ctx);
>
> lock_sock(sk);
> free_ctx = ctx->tx_conf != TLS_HW && ctx->rx_conf != TLS_HW;
> @@ -711,64 +713,68 @@ static int do_tls_setsockopt_conf(struct sock *sk, sockptr_t optval,
> }
>
> if (tx) {
> - if (update && ctx->tx_conf == TLS_HW) {
> - rc = -EOPNOTSUPP;
> - goto err_crypto_info;
> - }
> -
> - if (!update) {
> - rc = tls_set_device_offload(sk);
> - conf = TLS_HW;
> - if (!rc) {
> + rc = tls_set_device_offload(sk, update ? crypto_info : NULL);
> + conf = TLS_HW;
> + if (!rc) {
> + if (update) {
> + TLS_INC_STATS(sock_net(sk), LINUX_MIB_TLSTXREKEYOK);
tls_set_device_offload* could succeed without completing the rekey, so
we would increment this counter, and then the async rekey completion
could fail. In that case, we will end up with 2 stats increases for a
single rekey.
Probably better to add a pair of REKEYINPROGRESS counters (similar to
the CURR* counters) that get incremented if we start a rekey but can't
finish it immediately, and decremented when we complete the rekey (and
also increment the corresponding REKEYOK/REKEYFAIL counter at that
time). If we're able to complete the rekey before returning from
do_tls_setsockopt_conf, we would increment the REKEYOK/REKEYFAIL
immediately.
This would avoid the inconsistent number of stats increases, and allow
users to see that a rekey is in progress.
> + } else {
> TLS_INC_STATS(sock_net(sk), LINUX_MIB_TLSTXDEVICE);
> TLS_INC_STATS(sock_net(sk), LINUX_MIB_TLSCURRTXDEVICE);
> - goto out;
> }
> - }
> -
> - rc = tls_set_sw_offload(sk, 1, update ? crypto_info : NULL);
> - if (rc)
> + } else if (update && ctx->tx_conf == TLS_HW) {
> + /* HW rekey failed - return the actual error.
> + * Cannot fall back to SW for an existing HW connection.
> + */
> goto err_crypto_info;
In this case, the REKEYHWFAIL will not always be incremented (for
example when tls_device_start_rekey fails, or tls_sw_ctx_init on the
RX side).
[...]
> diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c
> index 1412b3dcce4c..fc60e8c0f24c 100644
> --- a/net/tls/tls_sw.c
> +++ b/net/tls/tls_sw.c
[...]
> @@ -1802,6 +1801,8 @@ static int tls_check_pending_rekey(struct sock *sk, struct tls_context *ctx,
> if (hs_type == TLS_HANDSHAKE_KEYUPDATE) {
> struct tls_sw_context_rx *rx_ctx = ctx->priv_ctx_rx;
>
> + /* Stop NIC from XOR-ing post-KU records with the retired key */
I think "tls_device_rx_del_key" tells us everything we need to know in
this context, and "XOR-ing post-KU records" is just noise here. Please
remove this comment.
> @@ -2714,11 +2758,7 @@ static struct tls_sw_context_tx *init_ctx_tx(struct tls_context *ctx, struct soc
> sw_ctx_tx = ctx->priv_ctx_tx;
> }
>
> - crypto_init_wait(&sw_ctx_tx->async_wait);
> - atomic_set(&sw_ctx_tx->encrypt_pending, 1);
> - INIT_LIST_HEAD(&sw_ctx_tx->tx_list);
> - INIT_DELAYED_WORK(&sw_ctx_tx->tx_work.work, tx_work_handler);
> - sw_ctx_tx->tx_work.sk = sk;
> + tls_sw_ctx_tx_init(sk, sw_ctx_tx);
>
> return sw_ctx_tx;
> }
> @@ -2861,11 +2901,9 @@ int tls_sw_ctx_init(struct sock *sk, int tx,
> goto free_aead;
> }
>
> - if (!new_crypto_info) {
> - rc = crypto_aead_setauthsize(*aead, prot->tag_size);
> - if (rc)
> - goto free_aead;
> - }
> + rc = crypto_aead_setauthsize(*aead, prot->tag_size);
> + if (rc)
> + goto free_aead;
If you're going to do this (I'm not sure why we want this), fix up the
comment just above crypto_aead_setkey (8 lines above this) that says
"setkey is the last operation that could fail during a rekey", or do
the change in a way that setkey is still the last op that can fail.
--
Sabrina
^ permalink raw reply
* [PATCH v4] Bluetooth: serialize accept_q access
From: Ren Wei @ 2026-05-06 11:43 UTC (permalink / raw)
To: linux-bluetooth, netdev
Cc: marcel, luiz.dentz, davem, edumazet, kuba, pabeni, horms, jannh,
yuantan098, yifanwucs, tomapufckgml, bird, wangjiexun2025, n05ec
From: Jiexun Wang <wangjiexun2025@gmail.com>
bt_sock_poll() walks the accept queue without synchronization, while
child teardown can unlink the same socket and drop its last reference.
The unsynchronized accept queue walk has existed since the initial
Bluetooth import.
Protect accept_q with a dedicated lock for queue updates and polling.
Also rework bt_accept_dequeue() to take temporary child references under
the queue lock before dropping it and locking the child socket.
Fixes: 1da177e4c3f41524e886b7f1b8a0c1fc7321cac2 ("Linux-2.6.12-rc2")
Cc: stable@vger.kernel.org
Reported-by: Jann Horn <jannh@google.com>
Reported-by: Yuan Tan <yuantan098@gmail.com>
Reported-by: Yifan Wu <yifanwucs@gmail.com>
Reported-by: Juefei Pu <tomapufckgml@gmail.com>
Reported-by: Xin Liu <bird@lzu.edu.cn>
Signed-off-by: Jiexun Wang <wangjiexun2025@gmail.com>
Signed-off-by: Ren Wei <n05ec@lzu.edu.cn>
---
Changes in v4:
- no functional changes
- clarify that the race dates back to the initial Bluetooth import
- update trailers
I noticed Jann also proposed a fix at
https://patchwork.kernel.org/project/bluetooth/patch/20260504-bluetooth-accept-uaf-fix-v1-1-1ca63c0efadd@google.com/,
so we're adding his Reported-by tag here. Please let us know if this
isn't appropriate.
- v3 Link: https://lore.kernel.org/all/20260404162324.2789862-1-n05ec@lzu.edu.cn/
Changes in v3:
- move sk_acceptq_added()/sk_acceptq_removed() inside accept_q_lock
critical sections to serialize sk_ack_backlog updates with accept_q
operations
- v2 Link: https://lore.kernel.org/all/06a6b4549acba207847ce532dedbf1c95ab22d13.1774925231.git.wangjiexun2025@gmail.com/
Changes in v2:
- add Tested-by: Ren Wei <enjou1224z@gmail.com>
- resend to the public Bluetooth/netdev lists
include/net/bluetooth/bluetooth.h | 1 +
net/bluetooth/af_bluetooth.c | 87 +++++++++++++++++++++++--------
2 files changed, 66 insertions(+), 22 deletions(-)
diff --git a/include/net/bluetooth/bluetooth.h b/include/net/bluetooth/bluetooth.h
index 69eed69f7f26..3faea66b1979 100644
--- a/include/net/bluetooth/bluetooth.h
+++ b/include/net/bluetooth/bluetooth.h
@@ -398,6 +398,7 @@ void baswap(bdaddr_t *dst, const bdaddr_t *src);
struct bt_sock {
struct sock sk;
struct list_head accept_q;
+ spinlock_t accept_q_lock; /* protects accept_q */
struct sock *parent;
unsigned long flags;
void (*skb_msg_name)(struct sk_buff *, void *, int *);
diff --git a/net/bluetooth/af_bluetooth.c b/net/bluetooth/af_bluetooth.c
index 2b94e2077203..fa14b9a915eb 100644
--- a/net/bluetooth/af_bluetooth.c
+++ b/net/bluetooth/af_bluetooth.c
@@ -154,6 +154,7 @@ struct sock *bt_sock_alloc(struct net *net, struct socket *sock,
sock_init_data(sock, sk);
INIT_LIST_HEAD(&bt_sk(sk)->accept_q);
+ spin_lock_init(&bt_sk(sk)->accept_q_lock);
sock_reset_flag(sk, SOCK_ZAPPED);
@@ -214,6 +215,7 @@ void bt_accept_enqueue(struct sock *parent, struct sock *sk, bool bh)
{
const struct cred *old_cred;
struct pid *old_pid;
+ struct bt_sock *par = bt_sk(parent);
BT_DBG("parent %p, sk %p", parent, sk);
@@ -224,9 +226,13 @@ void bt_accept_enqueue(struct sock *parent, struct sock *sk, bool bh)
else
lock_sock_nested(sk, SINGLE_DEPTH_NESTING);
- list_add_tail(&bt_sk(sk)->accept_q, &bt_sk(parent)->accept_q);
bt_sk(sk)->parent = parent;
+ spin_lock_bh(&par->accept_q_lock);
+ list_add_tail(&bt_sk(sk)->accept_q, &par->accept_q);
+ sk_acceptq_added(parent);
+ spin_unlock_bh(&par->accept_q_lock);
+
/* Copy credentials from parent since for incoming connections the
* socket is allocated by the kernel.
*/
@@ -244,8 +250,6 @@ void bt_accept_enqueue(struct sock *parent, struct sock *sk, bool bh)
bh_unlock_sock(sk);
else
release_sock(sk);
-
- sk_acceptq_added(parent);
}
EXPORT_SYMBOL(bt_accept_enqueue);
@@ -254,45 +258,72 @@ EXPORT_SYMBOL(bt_accept_enqueue);
*/
void bt_accept_unlink(struct sock *sk)
{
+ struct sock *parent = bt_sk(sk)->parent;
+
BT_DBG("sk %p state %d", sk, sk->sk_state);
+ spin_lock_bh(&bt_sk(parent)->accept_q_lock);
list_del_init(&bt_sk(sk)->accept_q);
- sk_acceptq_removed(bt_sk(sk)->parent);
+ sk_acceptq_removed(parent);
+ spin_unlock_bh(&bt_sk(parent)->accept_q_lock);
bt_sk(sk)->parent = NULL;
sock_put(sk);
}
EXPORT_SYMBOL(bt_accept_unlink);
+static struct sock *bt_accept_get(struct sock *parent, struct sock *sk)
+{
+ struct bt_sock *bt = bt_sk(parent);
+ struct sock *next = NULL;
+
+ /* accept_q is modified from child teardown paths too, so take a
+ * temporary reference before dropping the queue lock.
+ */
+ spin_lock_bh(&bt->accept_q_lock);
+
+ if (sk) {
+ if (bt_sk(sk)->parent != parent)
+ goto out;
+
+ if (!list_is_last(&bt_sk(sk)->accept_q, &bt->accept_q)) {
+ next = &list_next_entry(bt_sk(sk), accept_q)->sk;
+ sock_hold(next);
+ }
+ } else if (!list_empty(&bt->accept_q)) {
+ next = &list_first_entry(&bt->accept_q,
+ struct bt_sock, accept_q)->sk;
+ sock_hold(next);
+ }
+
+out:
+ spin_unlock_bh(&bt->accept_q_lock);
+ return next;
+}
+
struct sock *bt_accept_dequeue(struct sock *parent, struct socket *newsock)
{
- struct bt_sock *s, *n;
- struct sock *sk;
+ struct sock *sk, *next;
BT_DBG("parent %p", parent);
restart:
- list_for_each_entry_safe(s, n, &bt_sk(parent)->accept_q, accept_q) {
- sk = (struct sock *)s;
-
+ for (sk = bt_accept_get(parent, NULL); sk; sk = next) {
/* Prevent early freeing of sk due to unlink and sock_kill */
- sock_hold(sk);
lock_sock(sk);
/* Check sk has not already been unlinked via
* bt_accept_unlink() due to serialisation caused by sk locking
*/
- if (!bt_sk(sk)->parent) {
+ if (bt_sk(sk)->parent != parent) {
BT_DBG("sk %p, already unlinked", sk);
release_sock(sk);
sock_put(sk);
- /* Restart the loop as sk is no longer in the list
- * and also avoid a potential infinite loop because
- * list_for_each_entry_safe() is not thread safe.
- */
goto restart;
}
+ next = bt_accept_get(parent, sk);
+
/* sk is safely in the parent list so reduce reference count */
sock_put(sk);
@@ -310,6 +341,8 @@ struct sock *bt_accept_dequeue(struct sock *parent, struct socket *newsock)
sock_graft(sk, newsock);
release_sock(sk);
+ if (next)
+ sock_put(next);
return sk;
}
@@ -518,18 +551,28 @@ EXPORT_SYMBOL(bt_sock_stream_recvmsg);
static inline __poll_t bt_accept_poll(struct sock *parent)
{
- struct bt_sock *s, *n;
+ struct bt_sock *bt = bt_sk(parent);
+ struct bt_sock *s;
struct sock *sk;
+ __poll_t mask = 0;
+
+ spin_lock_bh(&bt->accept_q_lock);
+ list_for_each_entry(s, &bt->accept_q, accept_q) {
+ int state;
- list_for_each_entry_safe(s, n, &bt_sk(parent)->accept_q, accept_q) {
sk = (struct sock *)s;
- if (sk->sk_state == BT_CONNECTED ||
- (test_bit(BT_SK_DEFER_SETUP, &bt_sk(parent)->flags) &&
- sk->sk_state == BT_CONNECT2))
- return EPOLLIN | EPOLLRDNORM;
+ state = READ_ONCE(sk->sk_state);
+
+ if (state == BT_CONNECTED ||
+ (test_bit(BT_SK_DEFER_SETUP, &bt->flags) &&
+ state == BT_CONNECT2)) {
+ mask = EPOLLIN | EPOLLRDNORM;
+ break;
+ }
}
+ spin_unlock_bh(&bt->accept_q_lock);
- return 0;
+ return mask;
}
__poll_t bt_sock_poll(struct file *file, struct socket *sock,
--
2.34.1
^ permalink raw reply related
* Re: [PATCH v5 14/16] wifi: ath12k: Switch to generic PAS TZ APIs
From: Sumit Garg @ 2026-05-06 11:44 UTC (permalink / raw)
To: Jeff Johnson
Cc: andersson, konradybcio, Sowmiya Sree Elavalagan, linux-arm-msm,
devicetree, dri-devel, freedreno, linux-media, netdev,
linux-wireless, ath12k, linux-remoteproc, robh, krzk+dt, conor+dt,
robin.clark, sean, akhilpo, lumag, abhinav.kumar, jesszhan0024,
marijn.suijten, airlied, simona, vikash.garodia, dikshita.agarwal,
bod, mchehab, elder, andrew+netdev, davem, edumazet, kuba, pabeni,
jjohnson, mathieu.poirier, trilokkumar.soni, mukesh.ojha,
pavan.kondeti, jorge.ramirez, tonyh, vignesh.viswanathan,
srinivas.kandagatla, amirreza.zarrabi, jens.wiklander, op-tee,
apurupa, skare, linux-kernel, Sumit Garg
In-Reply-To: <ae68bdb3-d683-4e7a-a27f-739214656ce2@oss.qualcomm.com>
On Tue, May 05, 2026 at 07:50:03AM -0700, Jeff Johnson wrote:
> On 5/5/2026 7:27 AM, Jeff Johnson wrote:
> > On 5/4/2026 6:06 AM, Sumit Garg wrote:
> >> @@ -485,9 +485,9 @@ static void ath12k_ahb_power_down(struct ath12k_base *ab, bool is_suspend)
> >> pasid = (u32_encode_bits(ab_ahb->userpd_id, ATH12K_USERPD_ID_MASK)) |
> >> ATH12K_AHB_UPD_SWID;
> >> /* Release the firmware */
> >> - ret = qcom_scm_pas_shutdown(pasid);
> >> + ret = qcom_pas_shutdown(pasid);
> >> if (ret)
> >> - ath12k_err(ab, "scm pas shutdown failed for userPD%d\n",
> >> + ath12k_err(ab, "pas shutdown failed for userPD%d: %d\n",
> >> ab_ahb->userpd_id);
> >
> > at some point the "ret" param was dropped, and this now generates build warnings
> The 'ret' param was dropped by:
> 8fb66931fe31 ("wifi: ath12k: Enable IPQ5424 WiFi device support")
>
> Not sure if that was on purpose or accidental. Sowmiya?
>
> - if (ret)
> - ath12k_err(ab, "scm pas shutdown failed for userPD%d: %d\n",
> - ab_ahb->userpd_id, ret);
> ...
> + if (ret)
> + ath12k_err(ab, "scm pas shutdown failed for userPD%d\n",
> + ab_ahb->userpd_id);
>
Ah I see, not sure why I didn't see any build issues in the defconfig
builds. Maybe this driver isn't enabled there?
Anyhow I can add the "ret" back here.
-Sumit
^ permalink raw reply
page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox