From: Joe Damato <joe@dama.to>
To: netdev@vger.kernel.org, Michael Chan <michael.chan@broadcom.com>,
Pavan Chebbi <pavan.chebbi@broadcom.com>,
Andrew Lunn <andrew+netdev@lunn.ch>,
"David S. Miller" <davem@davemloft.net>,
Eric Dumazet <edumazet@google.com>,
Jakub Kicinski <kuba@kernel.org>, Paolo Abeni <pabeni@redhat.com>
Cc: horms@kernel.org, linux-kernel@vger.kernel.org, leon@kernel.org,
Joe Damato <joe@dama.to>
Subject: [net-next v5 09/12] net: bnxt: Add SW GSO completion and teardown support
Date: Mon, 23 Mar 2026 11:38:34 -0700 [thread overview]
Message-ID: <20260323183844.3146982-10-joe@dama.to> (raw)
In-Reply-To: <20260323183844.3146982-1-joe@dama.to>
Update __bnxt_tx_int and bnxt_free_one_tx_ring_skbs to handle SW GSO
segments:
- MID segments: adjust tx_pkts/tx_bytes accounting and skip skb free
(the skb is shared across all segments and freed only once)
- LAST segments: if the DMA IOVA path was used, use dma_iova_destroy to
tear down the contiguous mapping. On the fallback path, payload DMA
unmapping is handled by the existing per-BD dma_unmap_len walk.
Both MID and LAST completions advance tx_inline_cons to release the
segment's inline header slot back to the ring.
is_sw_gso is initialized to zero, so the new code paths are not run.
Suggested-by: Jakub Kicinski <kuba@kernel.org>
Reviewed-by: Pavan Chebbi <pavan.chebbi@broadcom.com>
Signed-off-by: Joe Damato <joe@dama.to>
---
v5:
- Added Pavan's Reviewed-by. No functional changes.
v3:
- completion paths updated to use DMA IOVA APIs to teardown mappings.
rfcv2:
- Update the shared header buffer consumer on TX completion.
drivers/net/ethernet/broadcom/bnxt/bnxt.c | 82 +++++++++++++++++--
.../net/ethernet/broadcom/bnxt/bnxt_ethtool.c | 19 ++++-
2 files changed, 91 insertions(+), 10 deletions(-)
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
index 2759a4e2b148..40a16f96feba 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
@@ -74,6 +74,8 @@
#include "bnxt_debugfs.h"
#include "bnxt_coredump.h"
#include "bnxt_hwmon.h"
+#include "bnxt_gso.h"
+#include <net/tso.h>
#define BNXT_TX_TIMEOUT (5 * HZ)
#define BNXT_DEF_MSG_ENABLE (NETIF_MSG_DRV | NETIF_MSG_HW | \
@@ -817,12 +819,13 @@ static bool __bnxt_tx_int(struct bnxt *bp, struct bnxt_tx_ring_info *txr,
bool rc = false;
while (RING_TX(bp, cons) != hw_cons) {
- struct bnxt_sw_tx_bd *tx_buf;
+ struct bnxt_sw_tx_bd *tx_buf, *head_buf;
struct sk_buff *skb;
bool is_ts_pkt;
int j, last;
tx_buf = &txr->tx_buf_ring[RING_TX(bp, cons)];
+ head_buf = tx_buf;
skb = tx_buf->skb;
if (unlikely(!skb)) {
@@ -869,6 +872,23 @@ static bool __bnxt_tx_int(struct bnxt *bp, struct bnxt_tx_ring_info *txr,
DMA_TO_DEVICE, 0);
}
}
+
+ if (unlikely(head_buf->is_sw_gso)) {
+ txr->tx_inline_cons++;
+ if (head_buf->is_sw_gso == BNXT_SW_GSO_LAST) {
+ if (dma_use_iova(&head_buf->iova_state))
+ dma_iova_destroy(&pdev->dev,
+ &head_buf->iova_state,
+ head_buf->iova_total_len,
+ DMA_TO_DEVICE, 0);
+ } else {
+ tx_pkts--;
+ tx_bytes -= skb->len;
+ skb = NULL;
+ }
+ head_buf->is_sw_gso = 0;
+ }
+
if (unlikely(is_ts_pkt)) {
if (BNXT_CHIP_P5(bp)) {
/* PTP worker takes ownership of the skb */
@@ -3420,6 +3440,7 @@ static void bnxt_free_one_tx_ring_skbs(struct bnxt *bp,
for (i = 0; i < max_idx;) {
struct bnxt_sw_tx_bd *tx_buf = &txr->tx_buf_ring[i];
+ struct bnxt_sw_tx_bd *head_buf = tx_buf;
struct sk_buff *skb;
int j, last;
@@ -3472,7 +3493,20 @@ static void bnxt_free_one_tx_ring_skbs(struct bnxt *bp,
DMA_TO_DEVICE, 0);
}
}
- dev_kfree_skb(skb);
+ if (head_buf->is_sw_gso) {
+ txr->tx_inline_cons++;
+ if (head_buf->is_sw_gso == BNXT_SW_GSO_LAST) {
+ if (dma_use_iova(&head_buf->iova_state))
+ dma_iova_destroy(&pdev->dev,
+ &head_buf->iova_state,
+ head_buf->iova_total_len,
+ DMA_TO_DEVICE, 0);
+ } else {
+ skb = NULL;
+ }
+ }
+ if (skb)
+ dev_kfree_skb(skb);
}
netdev_tx_reset_queue(netdev_get_tx_queue(bp->dev, idx));
}
@@ -3998,9 +4032,9 @@ static void bnxt_free_tx_inline_buf(struct bnxt_tx_ring_info *txr,
txr->tx_inline_size = 0;
}
-static int __maybe_unused bnxt_alloc_tx_inline_buf(struct bnxt_tx_ring_info *txr,
- struct pci_dev *pdev,
- unsigned int size)
+static int bnxt_alloc_tx_inline_buf(struct bnxt_tx_ring_info *txr,
+ struct pci_dev *pdev,
+ unsigned int size)
{
txr->tx_inline_buf = kmalloc(size, GFP_KERNEL);
if (!txr->tx_inline_buf)
@@ -4103,6 +4137,14 @@ static int bnxt_alloc_tx_rings(struct bnxt *bp)
sizeof(struct tx_push_bd);
txr->data_mapping = cpu_to_le64(mapping);
}
+ if (!(bp->flags & BNXT_FLAG_UDP_GSO_CAP) &&
+ (bp->dev->features & NETIF_F_GSO_UDP_L4)) {
+ rc = bnxt_alloc_tx_inline_buf(txr, pdev,
+ BNXT_SW_USO_MAX_SEGS *
+ TSO_HEADER_SIZE);
+ if (rc)
+ return rc;
+ }
qidx = bp->tc_to_qidx[j];
ring->queue_id = bp->q_info[qidx].queue_id;
spin_lock_init(&txr->xdp_tx_lock);
@@ -4645,6 +4687,10 @@ static int bnxt_init_tx_rings(struct bnxt *bp)
bp->tx_wake_thresh = max_t(int, bp->tx_ring_size / 2,
BNXT_MIN_TX_DESC_CNT);
+ if (!(bp->flags & BNXT_FLAG_UDP_GSO_CAP) &&
+ (bp->dev->features & NETIF_F_GSO_UDP_L4))
+ bp->tx_wake_thresh = max_t(int, bp->tx_wake_thresh,
+ BNXT_SW_USO_MAX_DESCS);
for (i = 0; i < bp->tx_nr_rings; i++) {
struct bnxt_tx_ring_info *txr = &bp->tx_ring[i];
@@ -13833,6 +13879,11 @@ static netdev_features_t bnxt_fix_features(struct net_device *dev,
if ((features & NETIF_F_NTUPLE) && !bnxt_rfs_capable(bp, false))
features &= ~NETIF_F_NTUPLE;
+ if ((features & NETIF_F_GSO_UDP_L4) &&
+ !(bp->flags & BNXT_FLAG_UDP_GSO_CAP) &&
+ bp->tx_ring_size < 2 * BNXT_SW_USO_MAX_DESCS)
+ features &= ~NETIF_F_GSO_UDP_L4;
+
if ((bp->flags & BNXT_FLAG_NO_AGG_RINGS) || bp->xdp_prog)
features &= ~(NETIF_F_LRO | NETIF_F_GRO_HW);
@@ -13878,6 +13929,15 @@ static int bnxt_set_features(struct net_device *dev, netdev_features_t features)
int rc = 0;
bool re_init = false;
+ if (!(bp->flags & BNXT_FLAG_UDP_GSO_CAP)) {
+ if (features & NETIF_F_GSO_UDP_L4)
+ bp->tx_wake_thresh = max_t(int, bp->tx_wake_thresh,
+ BNXT_SW_USO_MAX_DESCS);
+ else
+ bp->tx_wake_thresh = max_t(int, bp->tx_ring_size / 2,
+ BNXT_MIN_TX_DESC_CNT);
+ }
+
flags &= ~BNXT_FLAG_ALL_CONFIG_FEATS;
if (features & NETIF_F_GRO_HW)
flags |= BNXT_FLAG_GRO;
@@ -16881,8 +16941,7 @@ static int bnxt_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
NETIF_F_GSO_UDP_TUNNEL_CSUM | NETIF_F_GSO_GRE_CSUM |
NETIF_F_GSO_PARTIAL | NETIF_F_RXHASH |
NETIF_F_RXCSUM | NETIF_F_GRO;
- if (bp->flags & BNXT_FLAG_UDP_GSO_CAP)
- dev->hw_features |= NETIF_F_GSO_UDP_L4;
+ dev->hw_features |= NETIF_F_GSO_UDP_L4;
if (BNXT_SUPPORTS_TPA(bp))
dev->hw_features |= NETIF_F_LRO;
@@ -16915,8 +16974,15 @@ static int bnxt_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
dev->priv_flags |= IFF_UNICAST_FLT;
netif_set_tso_max_size(dev, GSO_MAX_SIZE);
- if (bp->tso_max_segs)
+ if (!(bp->flags & BNXT_FLAG_UDP_GSO_CAP)) {
+ u16 max_segs = BNXT_SW_USO_MAX_SEGS;
+
+ if (bp->tso_max_segs)
+ max_segs = min_t(u16, max_segs, bp->tso_max_segs);
+ netif_set_tso_max_segs(dev, max_segs);
+ } else if (bp->tso_max_segs) {
netif_set_tso_max_segs(dev, bp->tso_max_segs);
+ }
dev->xdp_features = NETDEV_XDP_ACT_BASIC | NETDEV_XDP_ACT_REDIRECT |
NETDEV_XDP_ACT_RX_SG;
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c
index 48e8e3be70d3..44b3fd18fcbe 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c
@@ -33,6 +33,7 @@
#include "bnxt_xdp.h"
#include "bnxt_ptp.h"
#include "bnxt_ethtool.h"
+#include "bnxt_gso.h"
#include "bnxt_nvm_defs.h" /* NVRAM content constant and structure defs */
#include "bnxt_fw_hdr.h" /* Firmware hdr constant and structure defs */
#include "bnxt_coredump.h"
@@ -852,12 +853,18 @@ static int bnxt_set_ringparam(struct net_device *dev,
u8 tcp_data_split = kernel_ering->tcp_data_split;
struct bnxt *bp = netdev_priv(dev);
u8 hds_config_mod;
+ int rc;
if ((ering->rx_pending > BNXT_MAX_RX_DESC_CNT) ||
(ering->tx_pending > BNXT_MAX_TX_DESC_CNT) ||
(ering->tx_pending < BNXT_MIN_TX_DESC_CNT))
return -EINVAL;
+ if ((dev->features & NETIF_F_GSO_UDP_L4) &&
+ !(bp->flags & BNXT_FLAG_UDP_GSO_CAP) &&
+ ering->tx_pending < 2 * BNXT_SW_USO_MAX_DESCS)
+ return -EINVAL;
+
hds_config_mod = tcp_data_split != dev->cfg->hds_config;
if (tcp_data_split == ETHTOOL_TCP_DATA_SPLIT_DISABLED && hds_config_mod)
return -EINVAL;
@@ -882,9 +889,17 @@ static int bnxt_set_ringparam(struct net_device *dev,
bp->tx_ring_size = ering->tx_pending;
bnxt_set_ring_params(bp);
- if (netif_running(dev))
- return bnxt_open_nic(bp, false, false);
+ if (netif_running(dev)) {
+ rc = bnxt_open_nic(bp, false, false);
+ if (rc)
+ return rc;
+ }
+ /* ring size changes may affect features (SW USO requires a minimum
+ * ring size), so recalculate features to ensure the correct features
+ * are blocked/available.
+ */
+ netdev_update_features(dev);
return 0;
}
--
2.52.0
next prev parent reply other threads:[~2026-03-23 18:39 UTC|newest]
Thread overview: 16+ messages / expand[flat|nested] mbox.gz Atom feed top
2026-03-23 18:38 [net-next v5 00/12] Add TSO map-once DMA helpers and bnxt SW USO support Joe Damato
2026-03-23 18:38 ` [net-next v5 01/12] net: tso: Introduce tso_dma_map Joe Damato
2026-03-23 18:38 ` [net-next v5 02/12] net: tso: Add tso_dma_map helpers Joe Damato
2026-03-23 18:38 ` [net-next v5 03/12] net: bnxt: Export bnxt_xmit_get_cfa_action Joe Damato
2026-03-23 18:38 ` [net-next v5 04/12] net: bnxt: Add a helper for tx_bd_ext Joe Damato
2026-03-23 18:38 ` [net-next v5 05/12] net: bnxt: Use dma_unmap_len for TX completion unmapping Joe Damato
2026-03-23 18:38 ` [net-next v5 06/12] net: bnxt: Add TX inline buffer infrastructure Joe Damato
2026-03-23 18:38 ` [net-next v5 07/12] net: bnxt: Add boilerplate GSO code Joe Damato
2026-03-23 18:38 ` [net-next v5 08/12] net: bnxt: Implement software USO Joe Damato
2026-03-23 18:38 ` Joe Damato [this message]
2026-03-26 12:39 ` [net-next v5 09/12] net: bnxt: Add SW GSO completion and teardown support Paolo Abeni
2026-03-26 17:02 ` Joe Damato
2026-03-26 17:20 ` Paolo Abeni
2026-03-23 18:38 ` [net-next v5 10/12] net: bnxt: Dispatch to SW USO Joe Damato
2026-03-23 18:38 ` [net-next v5 11/12] net: netdevsim: Add support for " Joe Damato
2026-03-23 18:38 ` [net-next v5 12/12] selftests: drv-net: Add USO test Joe Damato
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20260323183844.3146982-10-joe@dama.to \
--to=joe@dama.to \
--cc=andrew+netdev@lunn.ch \
--cc=davem@davemloft.net \
--cc=edumazet@google.com \
--cc=horms@kernel.org \
--cc=kuba@kernel.org \
--cc=leon@kernel.org \
--cc=linux-kernel@vger.kernel.org \
--cc=michael.chan@broadcom.com \
--cc=netdev@vger.kernel.org \
--cc=pabeni@redhat.com \
--cc=pavan.chebbi@broadcom.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox