From: Aditya Garg <gargaditya@linux.microsoft.com>
To: kys@microsoft.com, haiyangz@microsoft.com, wei.liu@kernel.org,
decui@microsoft.com, andrew+netdev@lunn.ch, davem@davemloft.net,
edumazet@google.com, kuba@kernel.org, pabeni@redhat.com,
longli@microsoft.com, kotaranov@microsoft.com, horms@kernel.org,
shradhagupta@linux.microsoft.com, ssengar@linux.microsoft.com,
ernis@linux.microsoft.com, dipayanroy@linux.microsoft.com,
shirazsaleem@microsoft.com, leon@kernel.org, mlevitsk@redhat.com,
yury.norov@gmail.com, linux-hyperv@vger.kernel.org,
netdev@vger.kernel.org, linux-kernel@vger.kernel.org,
linux-rdma@vger.kernel.org, gargaditya@microsoft.com
Cc: Aditya Garg <gargaditya@linux.microsoft.com>
Subject: [PATCH net-next v3 1/2] net: mana: Handle SKB if TX SGEs exceed hardware limit
Date: Tue, 11 Nov 2025 00:13:00 -0800 [thread overview]
Message-ID: <1762848781-357-2-git-send-email-gargaditya@linux.microsoft.com> (raw)
In-Reply-To: <1762848781-357-1-git-send-email-gargaditya@linux.microsoft.com>
The MANA hardware supports a maximum of 30 scatter-gather entries (SGEs)
per TX WQE. Exceeding this limit can cause TX failures.
Add ndo_features_check() callback to validate SKB layout before
transmission. For GSO SKBs that would exceed the hardware SGE limit, clear
NETIF_F_GSO_MASK to enforce software segmentation in the stack.
Add a fallback in mana_start_xmit() to linearize non-GSO SKBs that still
exceed the SGE limit.
Also, Add ethtool counter for SKBs linearized
Co-developed-by: Dipayaan Roy <dipayanroy@linux.microsoft.com>
Signed-off-by: Dipayaan Roy <dipayanroy@linux.microsoft.com>
Signed-off-by: Aditya Garg <gargaditya@linux.microsoft.com>
---
drivers/net/ethernet/microsoft/mana/mana_en.c | 37 ++++++++++++++++++-
.../ethernet/microsoft/mana/mana_ethtool.c | 2 +
include/net/mana/gdma.h | 6 ++-
include/net/mana/mana.h | 1 +
4 files changed, 43 insertions(+), 3 deletions(-)
diff --git a/drivers/net/ethernet/microsoft/mana/mana_en.c b/drivers/net/ethernet/microsoft/mana/mana_en.c
index cccd5b63cee6..67ae5421f9ee 100644
--- a/drivers/net/ethernet/microsoft/mana/mana_en.c
+++ b/drivers/net/ethernet/microsoft/mana/mana_en.c
@@ -11,6 +11,7 @@
#include <linux/mm.h>
#include <linux/pci.h>
#include <linux/export.h>
+#include <linux/skbuff.h>
#include <net/checksum.h>
#include <net/ip6_checksum.h>
@@ -329,6 +330,20 @@ netdev_tx_t mana_start_xmit(struct sk_buff *skb, struct net_device *ndev)
cq = &apc->tx_qp[txq_idx].tx_cq;
tx_stats = &txq->stats;
+ if (MAX_SKB_FRAGS + 2 > MAX_TX_WQE_SGL_ENTRIES &&
+ skb_shinfo(skb)->nr_frags + 2 > MAX_TX_WQE_SGL_ENTRIES) {
+ /* GSO skb with Hardware SGE limit exceeded is not expected here
+ * as they are handled in mana_features_check() callback
+ */
+ if (skb_linearize(skb)) {
+ netdev_warn_once(ndev, "Failed to linearize skb with nr_frags=%d and is_gso=%d\n",
+ skb_shinfo(skb)->nr_frags,
+ skb_is_gso(skb));
+ goto tx_drop_count;
+ }
+ apc->eth_stats.linear_pkt_tx_cnt++;
+ }
+
pkg.tx_oob.s_oob.vcq_num = cq->gdma_id;
pkg.tx_oob.s_oob.vsq_frame = txq->vsq_frame;
@@ -442,8 +457,6 @@ netdev_tx_t mana_start_xmit(struct sk_buff *skb, struct net_device *ndev)
}
}
- WARN_ON_ONCE(pkg.wqe_req.num_sge > MAX_TX_WQE_SGL_ENTRIES);
-
if (pkg.wqe_req.num_sge <= ARRAY_SIZE(pkg.sgl_array)) {
pkg.wqe_req.sgl = pkg.sgl_array;
} else {
@@ -518,6 +531,25 @@ netdev_tx_t mana_start_xmit(struct sk_buff *skb, struct net_device *ndev)
return NETDEV_TX_OK;
}
+static netdev_features_t mana_features_check(struct sk_buff *skb,
+ struct net_device *ndev,
+ netdev_features_t features)
+{
+ if (MAX_SKB_FRAGS + 2 > MAX_TX_WQE_SGL_ENTRIES &&
+ skb_shinfo(skb)->nr_frags + 2 > MAX_TX_WQE_SGL_ENTRIES) {
+ /* Exceeds HW SGE limit.
+ * GSO case:
+ * Disable GSO so the stack will software-segment the skb
+ * into smaller skbs that fit the SGE budget.
+ * Non-GSO case:
+ * The xmit path will attempt skb_linearize() as a fallback.
+ */
+ if (skb_is_gso(skb))
+ features &= ~NETIF_F_GSO_MASK;
+ }
+ return features;
+}
+
static void mana_get_stats64(struct net_device *ndev,
struct rtnl_link_stats64 *st)
{
@@ -878,6 +910,7 @@ static const struct net_device_ops mana_devops = {
.ndo_open = mana_open,
.ndo_stop = mana_close,
.ndo_select_queue = mana_select_queue,
+ .ndo_features_check = mana_features_check,
.ndo_start_xmit = mana_start_xmit,
.ndo_validate_addr = eth_validate_addr,
.ndo_get_stats64 = mana_get_stats64,
diff --git a/drivers/net/ethernet/microsoft/mana/mana_ethtool.c b/drivers/net/ethernet/microsoft/mana/mana_ethtool.c
index a1afa75a9463..fa5e1a2f06a9 100644
--- a/drivers/net/ethernet/microsoft/mana/mana_ethtool.c
+++ b/drivers/net/ethernet/microsoft/mana/mana_ethtool.c
@@ -71,6 +71,8 @@ static const struct mana_stats_desc mana_eth_stats[] = {
{"tx_cq_err", offsetof(struct mana_ethtool_stats, tx_cqe_err)},
{"tx_cqe_unknown_type", offsetof(struct mana_ethtool_stats,
tx_cqe_unknown_type)},
+ {"linear_pkt_tx_cnt", offsetof(struct mana_ethtool_stats,
+ linear_pkt_tx_cnt)},
{"rx_coalesced_err", offsetof(struct mana_ethtool_stats,
rx_coalesced_err)},
{"rx_cqe_unknown_type", offsetof(struct mana_ethtool_stats,
diff --git a/include/net/mana/gdma.h b/include/net/mana/gdma.h
index 637f42485dba..84614ebe0f4c 100644
--- a/include/net/mana/gdma.h
+++ b/include/net/mana/gdma.h
@@ -592,6 +592,9 @@ enum {
#define GDMA_DRV_CAP_FLAG_1_HANDLE_RECONFIG_EQE BIT(17)
#define GDMA_DRV_CAP_FLAG_1_HW_VPORT_LINK_AWARE BIT(6)
+/* Driver supports linearizing the skb when num_sge exceeds hardware limit */
+#define GDMA_DRV_CAP_FLAG_1_SKB_LINEARIZE BIT(20)
+
#define GDMA_DRV_CAP_FLAGS1 \
(GDMA_DRV_CAP_FLAG_1_EQ_SHARING_MULTI_VPORT | \
GDMA_DRV_CAP_FLAG_1_NAPI_WKDONE_FIX | \
@@ -601,7 +604,8 @@ enum {
GDMA_DRV_CAP_FLAG_1_DYNAMIC_IRQ_ALLOC_SUPPORT | \
GDMA_DRV_CAP_FLAG_1_SELF_RESET_ON_EQE | \
GDMA_DRV_CAP_FLAG_1_HANDLE_RECONFIG_EQE | \
- GDMA_DRV_CAP_FLAG_1_HW_VPORT_LINK_AWARE)
+ GDMA_DRV_CAP_FLAG_1_HW_VPORT_LINK_AWARE | \
+ GDMA_DRV_CAP_FLAG_1_SKB_LINEARIZE)
#define GDMA_DRV_CAP_FLAGS2 0
diff --git a/include/net/mana/mana.h b/include/net/mana/mana.h
index 8906901535f5..50a532fb30d6 100644
--- a/include/net/mana/mana.h
+++ b/include/net/mana/mana.h
@@ -404,6 +404,7 @@ struct mana_ethtool_stats {
u64 hc_tx_err_gdma;
u64 tx_cqe_err;
u64 tx_cqe_unknown_type;
+ u64 linear_pkt_tx_cnt;
u64 rx_coalesced_err;
u64 rx_cqe_unknown_type;
};
--
2.43.0
next prev parent reply other threads:[~2025-11-11 8:14 UTC|newest]
Thread overview: 6+ messages / expand[flat|nested] mbox.gz Atom feed top
2025-11-11 8:12 [PATCH net-next v3 0/2] net: mana: Enforce TX SGE limit and fix error cleanup Aditya Garg
2025-11-11 8:13 ` Aditya Garg [this message]
2025-11-11 8:13 ` [PATCH net-next v3 2/2] net: mana: Drop TX skb on post_work_request failure and unmap resources Aditya Garg
2025-11-12 1:08 ` Jakub Kicinski
2025-11-12 12:12 ` Aditya Garg
2025-11-12 2:04 ` kernel test robot
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=1762848781-357-2-git-send-email-gargaditya@linux.microsoft.com \
--to=gargaditya@linux.microsoft.com \
--cc=andrew+netdev@lunn.ch \
--cc=davem@davemloft.net \
--cc=decui@microsoft.com \
--cc=dipayanroy@linux.microsoft.com \
--cc=edumazet@google.com \
--cc=ernis@linux.microsoft.com \
--cc=gargaditya@microsoft.com \
--cc=haiyangz@microsoft.com \
--cc=horms@kernel.org \
--cc=kotaranov@microsoft.com \
--cc=kuba@kernel.org \
--cc=kys@microsoft.com \
--cc=leon@kernel.org \
--cc=linux-hyperv@vger.kernel.org \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-rdma@vger.kernel.org \
--cc=longli@microsoft.com \
--cc=mlevitsk@redhat.com \
--cc=netdev@vger.kernel.org \
--cc=pabeni@redhat.com \
--cc=shirazsaleem@microsoft.com \
--cc=shradhagupta@linux.microsoft.com \
--cc=ssengar@linux.microsoft.com \
--cc=wei.liu@kernel.org \
--cc=yury.norov@gmail.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.