* [PATCH net-next v4 0/2] net: mana: Enforce TX SGE limit and fix error cleanup
@ 2025-11-12 13:01 Aditya Garg
2025-11-12 13:01 ` [PATCH net-next v4 1/2] net: mana: Handle SKB if TX SGEs exceed hardware limit Aditya Garg
2025-11-12 13:01 ` [PATCH net-next v4 2/2] net: mana: Drop TX skb on post_work_request failure and unmap resources Aditya Garg
0 siblings, 2 replies; 5+ messages in thread
From: Aditya Garg @ 2025-11-12 13:01 UTC (permalink / raw)
To: kys, haiyangz, wei.liu, decui, andrew+netdev, davem, edumazet,
kuba, pabeni, longli, kotaranov, horms, shradhagupta, ssengar,
ernis, dipayanroy, shirazsaleem, leon, mlevitsk, yury.norov,
sbhatta, linux-hyperv, netdev, linux-kernel, linux-rdma,
gargaditya
Cc: Aditya Garg
Add pre-transmission checks to block SKBs that exceed the hardware's SGE
limit. Force software segmentation for GSO traffic and linearize non-GSO
packets as needed.
Update TX error handling to drop failed SKBs and unmap resources
immediately.
---
Changes in v4:
* Fix warning during build reported by kernel test robot
---
Aditya Garg (2):
net: mana: Handle SKB if TX SGEs exceed hardware limit
net: mana: Drop TX skb on post_work_request failure and unmap
resources
.../net/ethernet/microsoft/mana/gdma_main.c | 6 +--
drivers/net/ethernet/microsoft/mana/mana_en.c | 44 ++++++++++++++++---
.../ethernet/microsoft/mana/mana_ethtool.c | 2 +
include/net/mana/gdma.h | 6 ++-
include/net/mana/mana.h | 2 +
5 files changed, 48 insertions(+), 12 deletions(-)
--
2.43.0
^ permalink raw reply [flat|nested] 5+ messages in thread
* [PATCH net-next v4 1/2] net: mana: Handle SKB if TX SGEs exceed hardware limit
2025-11-12 13:01 [PATCH net-next v4 0/2] net: mana: Enforce TX SGE limit and fix error cleanup Aditya Garg
@ 2025-11-12 13:01 ` Aditya Garg
2025-11-12 13:55 ` Eric Dumazet
2025-11-12 13:01 ` [PATCH net-next v4 2/2] net: mana: Drop TX skb on post_work_request failure and unmap resources Aditya Garg
1 sibling, 1 reply; 5+ messages in thread
From: Aditya Garg @ 2025-11-12 13:01 UTC (permalink / raw)
To: kys, haiyangz, wei.liu, decui, andrew+netdev, davem, edumazet,
kuba, pabeni, longli, kotaranov, horms, shradhagupta, ssengar,
ernis, dipayanroy, shirazsaleem, leon, mlevitsk, yury.norov,
sbhatta, linux-hyperv, netdev, linux-kernel, linux-rdma,
gargaditya
Cc: Aditya Garg
The MANA hardware supports a maximum of 30 scatter-gather entries (SGEs)
per TX WQE. Exceeding this limit can cause TX failures.
Add ndo_features_check() callback to validate SKB layout before
transmission. For GSO SKBs that would exceed the hardware SGE limit, clear
NETIF_F_GSO_MASK to enforce software segmentation in the stack.
Add a fallback in mana_start_xmit() to linearize non-GSO SKBs that still
exceed the SGE limit.
Also, Add ethtool counter for SKBs linearized
Co-developed-by: Dipayaan Roy <dipayanroy@linux.microsoft.com>
Signed-off-by: Dipayaan Roy <dipayanroy@linux.microsoft.com>
Signed-off-by: Aditya Garg <gargaditya@linux.microsoft.com>
---
drivers/net/ethernet/microsoft/mana/mana_en.c | 37 ++++++++++++++++++-
.../ethernet/microsoft/mana/mana_ethtool.c | 2 +
include/net/mana/gdma.h | 6 ++-
include/net/mana/mana.h | 1 +
4 files changed, 43 insertions(+), 3 deletions(-)
diff --git a/drivers/net/ethernet/microsoft/mana/mana_en.c b/drivers/net/ethernet/microsoft/mana/mana_en.c
index cccd5b63cee6..67ae5421f9ee 100644
--- a/drivers/net/ethernet/microsoft/mana/mana_en.c
+++ b/drivers/net/ethernet/microsoft/mana/mana_en.c
@@ -11,6 +11,7 @@
#include <linux/mm.h>
#include <linux/pci.h>
#include <linux/export.h>
+#include <linux/skbuff.h>
#include <net/checksum.h>
#include <net/ip6_checksum.h>
@@ -329,6 +330,20 @@ netdev_tx_t mana_start_xmit(struct sk_buff *skb, struct net_device *ndev)
cq = &apc->tx_qp[txq_idx].tx_cq;
tx_stats = &txq->stats;
+ if (MAX_SKB_FRAGS + 2 > MAX_TX_WQE_SGL_ENTRIES &&
+ skb_shinfo(skb)->nr_frags + 2 > MAX_TX_WQE_SGL_ENTRIES) {
+ /* GSO skb with Hardware SGE limit exceeded is not expected here
+ * as they are handled in mana_features_check() callback
+ */
+ if (skb_linearize(skb)) {
+ netdev_warn_once(ndev, "Failed to linearize skb with nr_frags=%d and is_gso=%d\n",
+ skb_shinfo(skb)->nr_frags,
+ skb_is_gso(skb));
+ goto tx_drop_count;
+ }
+ apc->eth_stats.linear_pkt_tx_cnt++;
+ }
+
pkg.tx_oob.s_oob.vcq_num = cq->gdma_id;
pkg.tx_oob.s_oob.vsq_frame = txq->vsq_frame;
@@ -442,8 +457,6 @@ netdev_tx_t mana_start_xmit(struct sk_buff *skb, struct net_device *ndev)
}
}
- WARN_ON_ONCE(pkg.wqe_req.num_sge > MAX_TX_WQE_SGL_ENTRIES);
-
if (pkg.wqe_req.num_sge <= ARRAY_SIZE(pkg.sgl_array)) {
pkg.wqe_req.sgl = pkg.sgl_array;
} else {
@@ -518,6 +531,25 @@ netdev_tx_t mana_start_xmit(struct sk_buff *skb, struct net_device *ndev)
return NETDEV_TX_OK;
}
+static netdev_features_t mana_features_check(struct sk_buff *skb,
+ struct net_device *ndev,
+ netdev_features_t features)
+{
+ if (MAX_SKB_FRAGS + 2 > MAX_TX_WQE_SGL_ENTRIES &&
+ skb_shinfo(skb)->nr_frags + 2 > MAX_TX_WQE_SGL_ENTRIES) {
+ /* Exceeds HW SGE limit.
+ * GSO case:
+ * Disable GSO so the stack will software-segment the skb
+ * into smaller skbs that fit the SGE budget.
+ * Non-GSO case:
+ * The xmit path will attempt skb_linearize() as a fallback.
+ */
+ if (skb_is_gso(skb))
+ features &= ~NETIF_F_GSO_MASK;
+ }
+ return features;
+}
+
static void mana_get_stats64(struct net_device *ndev,
struct rtnl_link_stats64 *st)
{
@@ -878,6 +910,7 @@ static const struct net_device_ops mana_devops = {
.ndo_open = mana_open,
.ndo_stop = mana_close,
.ndo_select_queue = mana_select_queue,
+ .ndo_features_check = mana_features_check,
.ndo_start_xmit = mana_start_xmit,
.ndo_validate_addr = eth_validate_addr,
.ndo_get_stats64 = mana_get_stats64,
diff --git a/drivers/net/ethernet/microsoft/mana/mana_ethtool.c b/drivers/net/ethernet/microsoft/mana/mana_ethtool.c
index a1afa75a9463..fa5e1a2f06a9 100644
--- a/drivers/net/ethernet/microsoft/mana/mana_ethtool.c
+++ b/drivers/net/ethernet/microsoft/mana/mana_ethtool.c
@@ -71,6 +71,8 @@ static const struct mana_stats_desc mana_eth_stats[] = {
{"tx_cq_err", offsetof(struct mana_ethtool_stats, tx_cqe_err)},
{"tx_cqe_unknown_type", offsetof(struct mana_ethtool_stats,
tx_cqe_unknown_type)},
+ {"linear_pkt_tx_cnt", offsetof(struct mana_ethtool_stats,
+ linear_pkt_tx_cnt)},
{"rx_coalesced_err", offsetof(struct mana_ethtool_stats,
rx_coalesced_err)},
{"rx_cqe_unknown_type", offsetof(struct mana_ethtool_stats,
diff --git a/include/net/mana/gdma.h b/include/net/mana/gdma.h
index 637f42485dba..84614ebe0f4c 100644
--- a/include/net/mana/gdma.h
+++ b/include/net/mana/gdma.h
@@ -592,6 +592,9 @@ enum {
#define GDMA_DRV_CAP_FLAG_1_HANDLE_RECONFIG_EQE BIT(17)
#define GDMA_DRV_CAP_FLAG_1_HW_VPORT_LINK_AWARE BIT(6)
+/* Driver supports linearizing the skb when num_sge exceeds hardware limit */
+#define GDMA_DRV_CAP_FLAG_1_SKB_LINEARIZE BIT(20)
+
#define GDMA_DRV_CAP_FLAGS1 \
(GDMA_DRV_CAP_FLAG_1_EQ_SHARING_MULTI_VPORT | \
GDMA_DRV_CAP_FLAG_1_NAPI_WKDONE_FIX | \
@@ -601,7 +604,8 @@ enum {
GDMA_DRV_CAP_FLAG_1_DYNAMIC_IRQ_ALLOC_SUPPORT | \
GDMA_DRV_CAP_FLAG_1_SELF_RESET_ON_EQE | \
GDMA_DRV_CAP_FLAG_1_HANDLE_RECONFIG_EQE | \
- GDMA_DRV_CAP_FLAG_1_HW_VPORT_LINK_AWARE)
+ GDMA_DRV_CAP_FLAG_1_HW_VPORT_LINK_AWARE | \
+ GDMA_DRV_CAP_FLAG_1_SKB_LINEARIZE)
#define GDMA_DRV_CAP_FLAGS2 0
diff --git a/include/net/mana/mana.h b/include/net/mana/mana.h
index 8906901535f5..50a532fb30d6 100644
--- a/include/net/mana/mana.h
+++ b/include/net/mana/mana.h
@@ -404,6 +404,7 @@ struct mana_ethtool_stats {
u64 hc_tx_err_gdma;
u64 tx_cqe_err;
u64 tx_cqe_unknown_type;
+ u64 linear_pkt_tx_cnt;
u64 rx_coalesced_err;
u64 rx_cqe_unknown_type;
};
--
2.43.0
^ permalink raw reply related [flat|nested] 5+ messages in thread
* [PATCH net-next v4 2/2] net: mana: Drop TX skb on post_work_request failure and unmap resources
2025-11-12 13:01 [PATCH net-next v4 0/2] net: mana: Enforce TX SGE limit and fix error cleanup Aditya Garg
2025-11-12 13:01 ` [PATCH net-next v4 1/2] net: mana: Handle SKB if TX SGEs exceed hardware limit Aditya Garg
@ 2025-11-12 13:01 ` Aditya Garg
1 sibling, 0 replies; 5+ messages in thread
From: Aditya Garg @ 2025-11-12 13:01 UTC (permalink / raw)
To: kys, haiyangz, wei.liu, decui, andrew+netdev, davem, edumazet,
kuba, pabeni, longli, kotaranov, horms, shradhagupta, ssengar,
ernis, dipayanroy, shirazsaleem, leon, mlevitsk, yury.norov,
sbhatta, linux-hyperv, netdev, linux-kernel, linux-rdma,
gargaditya
Cc: Aditya Garg
Drop TX packets when posting the work request fails and ensure DMA
mappings are always cleaned up.
Signed-off-by: Aditya Garg <gargaditya@linux.microsoft.com>
---
Changes in v4:
* Fix warning during build reported by kernel test robot
---
drivers/net/ethernet/microsoft/mana/gdma_main.c | 6 +-----
drivers/net/ethernet/microsoft/mana/mana_en.c | 7 +++----
include/net/mana/mana.h | 1 +
3 files changed, 5 insertions(+), 9 deletions(-)
diff --git a/drivers/net/ethernet/microsoft/mana/gdma_main.c b/drivers/net/ethernet/microsoft/mana/gdma_main.c
index effe0a2f207a..8fd70b34807a 100644
--- a/drivers/net/ethernet/microsoft/mana/gdma_main.c
+++ b/drivers/net/ethernet/microsoft/mana/gdma_main.c
@@ -1300,7 +1300,6 @@ int mana_gd_post_work_request(struct gdma_queue *wq,
struct gdma_posted_wqe_info *wqe_info)
{
u32 client_oob_size = wqe_req->inline_oob_size;
- struct gdma_context *gc;
u32 sgl_data_size;
u32 max_wqe_size;
u32 wqe_size;
@@ -1330,11 +1329,8 @@ int mana_gd_post_work_request(struct gdma_queue *wq,
if (wqe_size > max_wqe_size)
return -EINVAL;
- if (wq->monitor_avl_buf && wqe_size > mana_gd_wq_avail_space(wq)) {
- gc = wq->gdma_dev->gdma_context;
- dev_err(gc->dev, "unsuccessful flow control!\n");
+ if (wq->monitor_avl_buf && wqe_size > mana_gd_wq_avail_space(wq))
return -ENOSPC;
- }
if (wqe_info)
wqe_info->wqe_size_in_bu = wqe_size / GDMA_WQE_BU_SIZE;
diff --git a/drivers/net/ethernet/microsoft/mana/mana_en.c b/drivers/net/ethernet/microsoft/mana/mana_en.c
index 67ae5421f9ee..066d822f68f0 100644
--- a/drivers/net/ethernet/microsoft/mana/mana_en.c
+++ b/drivers/net/ethernet/microsoft/mana/mana_en.c
@@ -491,9 +491,9 @@ netdev_tx_t mana_start_xmit(struct sk_buff *skb, struct net_device *ndev)
if (err) {
(void)skb_dequeue_tail(&txq->pending_skbs);
+ mana_unmap_skb(skb, apc);
netdev_warn(ndev, "Failed to post TX OOB: %d\n", err);
- err = NETDEV_TX_BUSY;
- goto tx_busy;
+ goto free_sgl_ptr;
}
err = NETDEV_TX_OK;
@@ -513,7 +513,6 @@ netdev_tx_t mana_start_xmit(struct sk_buff *skb, struct net_device *ndev)
tx_stats->bytes += len + ((num_gso_seg - 1) * gso_hs);
u64_stats_update_end(&tx_stats->syncp);
-tx_busy:
if (netif_tx_queue_stopped(net_txq) && mana_can_tx(gdma_sq)) {
netif_tx_wake_queue(net_txq);
apc->eth_stats.wake_queue++;
@@ -1679,7 +1678,7 @@ static int mana_move_wq_tail(struct gdma_queue *wq, u32 num_units)
return 0;
}
-static void mana_unmap_skb(struct sk_buff *skb, struct mana_port_context *apc)
+void mana_unmap_skb(struct sk_buff *skb, struct mana_port_context *apc)
{
struct mana_skb_head *ash = (struct mana_skb_head *)skb->head;
struct gdma_context *gc = apc->ac->gdma_dev->gdma_context;
diff --git a/include/net/mana/mana.h b/include/net/mana/mana.h
index 50a532fb30d6..d05457d3e1ab 100644
--- a/include/net/mana/mana.h
+++ b/include/net/mana/mana.h
@@ -585,6 +585,7 @@ int mana_set_bw_clamp(struct mana_port_context *apc, u32 speed,
void mana_query_phy_stats(struct mana_port_context *apc);
int mana_pre_alloc_rxbufs(struct mana_port_context *apc, int mtu, int num_queues);
void mana_pre_dealloc_rxbufs(struct mana_port_context *apc);
+void mana_unmap_skb(struct sk_buff *skb, struct mana_port_context *apc);
extern const struct ethtool_ops mana_ethtool_ops;
extern struct dentry *mana_debugfs_root;
--
2.43.0
^ permalink raw reply related [flat|nested] 5+ messages in thread
* Re: [PATCH net-next v4 1/2] net: mana: Handle SKB if TX SGEs exceed hardware limit
2025-11-12 13:01 ` [PATCH net-next v4 1/2] net: mana: Handle SKB if TX SGEs exceed hardware limit Aditya Garg
@ 2025-11-12 13:55 ` Eric Dumazet
2025-11-14 20:58 ` Aditya Garg
0 siblings, 1 reply; 5+ messages in thread
From: Eric Dumazet @ 2025-11-12 13:55 UTC (permalink / raw)
To: Aditya Garg
Cc: kys, haiyangz, wei.liu, decui, andrew+netdev, davem, kuba, pabeni,
longli, kotaranov, horms, shradhagupta, ssengar, ernis,
dipayanroy, shirazsaleem, leon, mlevitsk, yury.norov, sbhatta,
linux-hyperv, netdev, linux-kernel, linux-rdma, gargaditya
On Wed, Nov 12, 2025 at 5:11 AM Aditya Garg
<gargaditya@linux.microsoft.com> wrote:
>
> The MANA hardware supports a maximum of 30 scatter-gather entries (SGEs)
> per TX WQE. Exceeding this limit can cause TX failures.
> Add ndo_features_check() callback to validate SKB layout before
> transmission. For GSO SKBs that would exceed the hardware SGE limit, clear
> NETIF_F_GSO_MASK to enforce software segmentation in the stack.
> Add a fallback in mana_start_xmit() to linearize non-GSO SKBs that still
> exceed the SGE limit.
>
> Also, Add ethtool counter for SKBs linearized
>
> Co-developed-by: Dipayaan Roy <dipayanroy@linux.microsoft.com>
> Signed-off-by: Dipayaan Roy <dipayanroy@linux.microsoft.com>
> Signed-off-by: Aditya Garg <gargaditya@linux.microsoft.com>
> ---
> drivers/net/ethernet/microsoft/mana/mana_en.c | 37 ++++++++++++++++++-
> .../ethernet/microsoft/mana/mana_ethtool.c | 2 +
> include/net/mana/gdma.h | 6 ++-
> include/net/mana/mana.h | 1 +
> 4 files changed, 43 insertions(+), 3 deletions(-)
>
> diff --git a/drivers/net/ethernet/microsoft/mana/mana_en.c b/drivers/net/ethernet/microsoft/mana/mana_en.c
> index cccd5b63cee6..67ae5421f9ee 100644
> --- a/drivers/net/ethernet/microsoft/mana/mana_en.c
> +++ b/drivers/net/ethernet/microsoft/mana/mana_en.c
> @@ -11,6 +11,7 @@
> #include <linux/mm.h>
> #include <linux/pci.h>
> #include <linux/export.h>
> +#include <linux/skbuff.h>
>
> #include <net/checksum.h>
> #include <net/ip6_checksum.h>
> @@ -329,6 +330,20 @@ netdev_tx_t mana_start_xmit(struct sk_buff *skb, struct net_device *ndev)
> cq = &apc->tx_qp[txq_idx].tx_cq;
> tx_stats = &txq->stats;
>
> + if (MAX_SKB_FRAGS + 2 > MAX_TX_WQE_SGL_ENTRIES &&
> + skb_shinfo(skb)->nr_frags + 2 > MAX_TX_WQE_SGL_ENTRIES) {
> + /* GSO skb with Hardware SGE limit exceeded is not expected here
> + * as they are handled in mana_features_check() callback
> + */
> + if (skb_linearize(skb)) {
> + netdev_warn_once(ndev, "Failed to linearize skb with nr_frags=%d and is_gso=%d\n",
> + skb_shinfo(skb)->nr_frags,
> + skb_is_gso(skb));
> + goto tx_drop_count;
> + }
> + apc->eth_stats.linear_pkt_tx_cnt++;
> + }
> +
> pkg.tx_oob.s_oob.vcq_num = cq->gdma_id;
> pkg.tx_oob.s_oob.vsq_frame = txq->vsq_frame;
>
> @@ -442,8 +457,6 @@ netdev_tx_t mana_start_xmit(struct sk_buff *skb, struct net_device *ndev)
> }
> }
>
> - WARN_ON_ONCE(pkg.wqe_req.num_sge > MAX_TX_WQE_SGL_ENTRIES);
> -
> if (pkg.wqe_req.num_sge <= ARRAY_SIZE(pkg.sgl_array)) {
> pkg.wqe_req.sgl = pkg.sgl_array;
> } else {
> @@ -518,6 +531,25 @@ netdev_tx_t mana_start_xmit(struct sk_buff *skb, struct net_device *ndev)
> return NETDEV_TX_OK;
> }
>
#if MAX_SKB_FRAGS + 2 > MAX_TX_WQE_SGL_ENTRIES
> +static netdev_features_t mana_features_check(struct sk_buff *skb,
> + struct net_device *ndev,
> + netdev_features_t features)
> +{
> + if (MAX_SKB_FRAGS + 2 > MAX_TX_WQE_SGL_ENTRIES &&
> + skb_shinfo(skb)->nr_frags + 2 > MAX_TX_WQE_SGL_ENTRIES) {
> + /* Exceeds HW SGE limit.
> + * GSO case:
> + * Disable GSO so the stack will software-segment the skb
> + * into smaller skbs that fit the SGE budget.
> + * Non-GSO case:
> + * The xmit path will attempt skb_linearize() as a fallback.
> + */
> + if (skb_is_gso(skb))
No need to test skb_is_gso(skb), you can clear bits, this will be a
NOP if the packet is non GSO anyway.
> + features &= ~NETIF_F_GSO_MASK;
> + }
> + return features;
> +}
#endif
> +
> static void mana_get_stats64(struct net_device *ndev,
> struct rtnl_link_stats64 *st)
> {
> @@ -878,6 +910,7 @@ static const struct net_device_ops mana_devops = {
> .ndo_open = mana_open,
> .ndo_stop = mana_close,
> .ndo_select_queue = mana_select_queue,
> + .ndo_features_check = mana_features_check,
Note that if your mana_features_check() is a nop if MAX_SKB_FRAGS is
small enough,
you could set a non NULL .ndo_features_check based on a preprocessor condition
#if MAX_SKB_FRAGS + 2 > MAX_TX_WQE_SGL_ENTRIES
.ndo_features_check = ....
#endif
This would avoid an expensive indirect call when possible.
> .ndo_start_xmit = mana_start_xmit,
> .ndo_validate_addr = eth_validate_addr,
> .ndo_get_stats64 = mana_get_stats64,
> diff --git a/drivers/net/ethernet/microsoft/mana/mana_ethtool.c b/drivers/net/ethernet/microsoft/mana/mana_ethtool.c
> index a1afa75a9463..fa5e1a2f06a9 100644
> --- a/drivers/net/ethernet/microsoft/mana/mana_ethtool.c
> +++ b/drivers/net/ethernet/microsoft/mana/mana_ethtool.c
> @@ -71,6 +71,8 @@ static const struct mana_stats_desc mana_eth_stats[] = {
> {"tx_cq_err", offsetof(struct mana_ethtool_stats, tx_cqe_err)},
> {"tx_cqe_unknown_type", offsetof(struct mana_ethtool_stats,
> tx_cqe_unknown_type)},
> + {"linear_pkt_tx_cnt", offsetof(struct mana_ethtool_stats,
> + linear_pkt_tx_cnt)},
> {"rx_coalesced_err", offsetof(struct mana_ethtool_stats,
> rx_coalesced_err)},
> {"rx_cqe_unknown_type", offsetof(struct mana_ethtool_stats,
> diff --git a/include/net/mana/gdma.h b/include/net/mana/gdma.h
> index 637f42485dba..84614ebe0f4c 100644
> --- a/include/net/mana/gdma.h
> +++ b/include/net/mana/gdma.h
> @@ -592,6 +592,9 @@ enum {
> #define GDMA_DRV_CAP_FLAG_1_HANDLE_RECONFIG_EQE BIT(17)
> #define GDMA_DRV_CAP_FLAG_1_HW_VPORT_LINK_AWARE BIT(6)
>
> +/* Driver supports linearizing the skb when num_sge exceeds hardware limit */
> +#define GDMA_DRV_CAP_FLAG_1_SKB_LINEARIZE BIT(20)
> +
> #define GDMA_DRV_CAP_FLAGS1 \
> (GDMA_DRV_CAP_FLAG_1_EQ_SHARING_MULTI_VPORT | \
> GDMA_DRV_CAP_FLAG_1_NAPI_WKDONE_FIX | \
> @@ -601,7 +604,8 @@ enum {
> GDMA_DRV_CAP_FLAG_1_DYNAMIC_IRQ_ALLOC_SUPPORT | \
> GDMA_DRV_CAP_FLAG_1_SELF_RESET_ON_EQE | \
> GDMA_DRV_CAP_FLAG_1_HANDLE_RECONFIG_EQE | \
> - GDMA_DRV_CAP_FLAG_1_HW_VPORT_LINK_AWARE)
> + GDMA_DRV_CAP_FLAG_1_HW_VPORT_LINK_AWARE | \
> + GDMA_DRV_CAP_FLAG_1_SKB_LINEARIZE)
>
> #define GDMA_DRV_CAP_FLAGS2 0
>
> diff --git a/include/net/mana/mana.h b/include/net/mana/mana.h
> index 8906901535f5..50a532fb30d6 100644
> --- a/include/net/mana/mana.h
> +++ b/include/net/mana/mana.h
> @@ -404,6 +404,7 @@ struct mana_ethtool_stats {
> u64 hc_tx_err_gdma;
> u64 tx_cqe_err;
> u64 tx_cqe_unknown_type;
> + u64 linear_pkt_tx_cnt;
> u64 rx_coalesced_err;
> u64 rx_cqe_unknown_type;
> };
> --
> 2.43.0
>
^ permalink raw reply [flat|nested] 5+ messages in thread
* Re: [PATCH net-next v4 1/2] net: mana: Handle SKB if TX SGEs exceed hardware limit
2025-11-12 13:55 ` Eric Dumazet
@ 2025-11-14 20:58 ` Aditya Garg
0 siblings, 0 replies; 5+ messages in thread
From: Aditya Garg @ 2025-11-14 20:58 UTC (permalink / raw)
To: Eric Dumazet
Cc: kys, haiyangz, wei.liu, decui, andrew+netdev, davem, kuba, pabeni,
longli, kotaranov, horms, shradhagupta, ssengar, ernis,
dipayanroy, shirazsaleem, leon, mlevitsk, yury.norov, sbhatta,
linux-hyperv, netdev, linux-kernel, linux-rdma, gargaditya
On 12-11-2025 19:25, Eric Dumazet wrote:
> On Wed, Nov 12, 2025 at 5:11 AM Aditya Garg
> <gargaditya@linux.microsoft.com> wrote:
>>
>> The MANA hardware supports a maximum of 30 scatter-gather entries (SGEs)
>> per TX WQE. Exceeding this limit can cause TX failures.
>> Add ndo_features_check() callback to validate SKB layout before
>> transmission. For GSO SKBs that would exceed the hardware SGE limit, clear
>> NETIF_F_GSO_MASK to enforce software segmentation in the stack.
>> Add a fallback in mana_start_xmit() to linearize non-GSO SKBs that still
>> exceed the SGE limit.
>>
>> Also, Add ethtool counter for SKBs linearized
>>
>> Co-developed-by: Dipayaan Roy <dipayanroy@linux.microsoft.com>
>> Signed-off-by: Dipayaan Roy <dipayanroy@linux.microsoft.com>
>> Signed-off-by: Aditya Garg <gargaditya@linux.microsoft.com>
>> ---
>> drivers/net/ethernet/microsoft/mana/mana_en.c | 37 ++++++++++++++++++-
>> .../ethernet/microsoft/mana/mana_ethtool.c | 2 +
>> include/net/mana/gdma.h | 6 ++-
>> include/net/mana/mana.h | 1 +
>> 4 files changed, 43 insertions(+), 3 deletions(-)
>>
>> diff --git a/drivers/net/ethernet/microsoft/mana/mana_en.c b/drivers/net/ethernet/microsoft/mana/mana_en.c
>> index cccd5b63cee6..67ae5421f9ee 100644
>> --- a/drivers/net/ethernet/microsoft/mana/mana_en.c
>> +++ b/drivers/net/ethernet/microsoft/mana/mana_en.c
>> @@ -11,6 +11,7 @@
>> #include <linux/mm.h>
>> #include <linux/pci.h>
>> #include <linux/export.h>
>> +#include <linux/skbuff.h>
>>
>> #include <net/checksum.h>
>> #include <net/ip6_checksum.h>
>> @@ -329,6 +330,20 @@ netdev_tx_t mana_start_xmit(struct sk_buff *skb, struct net_device *ndev)
>> cq = &apc->tx_qp[txq_idx].tx_cq;
>> tx_stats = &txq->stats;
>>
>> + if (MAX_SKB_FRAGS + 2 > MAX_TX_WQE_SGL_ENTRIES &&
>> + skb_shinfo(skb)->nr_frags + 2 > MAX_TX_WQE_SGL_ENTRIES) {
>> + /* GSO skb with Hardware SGE limit exceeded is not expected here
>> + * as they are handled in mana_features_check() callback
>> + */
>> + if (skb_linearize(skb)) {
>> + netdev_warn_once(ndev, "Failed to linearize skb with nr_frags=%d and is_gso=%d\n",
>> + skb_shinfo(skb)->nr_frags,
>> + skb_is_gso(skb));
>> + goto tx_drop_count;
>> + }
>> + apc->eth_stats.linear_pkt_tx_cnt++;
>> + }
>> +
>> pkg.tx_oob.s_oob.vcq_num = cq->gdma_id;
>> pkg.tx_oob.s_oob.vsq_frame = txq->vsq_frame;
>>
>> @@ -442,8 +457,6 @@ netdev_tx_t mana_start_xmit(struct sk_buff *skb, struct net_device *ndev)
>> }
>> }
>>
>> - WARN_ON_ONCE(pkg.wqe_req.num_sge > MAX_TX_WQE_SGL_ENTRIES);
>> -
>> if (pkg.wqe_req.num_sge <= ARRAY_SIZE(pkg.sgl_array)) {
>> pkg.wqe_req.sgl = pkg.sgl_array;
>> } else {
>> @@ -518,6 +531,25 @@ netdev_tx_t mana_start_xmit(struct sk_buff *skb, struct net_device *ndev)
>> return NETDEV_TX_OK;
>> }
>>
>
>
> #if MAX_SKB_FRAGS + 2 > MAX_TX_WQE_SGL_ENTRIES
>
>> +static netdev_features_t mana_features_check(struct sk_buff *skb,
>> + struct net_device *ndev,
>> + netdev_features_t features)
>> +{
>> + if (MAX_SKB_FRAGS + 2 > MAX_TX_WQE_SGL_ENTRIES &&
>> + skb_shinfo(skb)->nr_frags + 2 > MAX_TX_WQE_SGL_ENTRIES) {
>> + /* Exceeds HW SGE limit.
>> + * GSO case:
>> + * Disable GSO so the stack will software-segment the skb
>> + * into smaller skbs that fit the SGE budget.
>> + * Non-GSO case:
>> + * The xmit path will attempt skb_linearize() as a fallback.
>> + */
>> + if (skb_is_gso(skb))
>
> No need to test skb_is_gso(skb), you can clear bits, this will be a
> NOP if the packet is non GSO anyway.
>
>> + features &= ~NETIF_F_GSO_MASK;
>> + }
>> + return features;
>> +}
>
> #endif
>
>> +
>> static void mana_get_stats64(struct net_device *ndev,
>> struct rtnl_link_stats64 *st)
>> {
>> @@ -878,6 +910,7 @@ static const struct net_device_ops mana_devops = {
>> .ndo_open = mana_open,
>> .ndo_stop = mana_close,
>> .ndo_select_queue = mana_select_queue,
>> + .ndo_features_check = mana_features_check,
>
> Note that if your mana_features_check() is a nop if MAX_SKB_FRAGS is
> small enough,
> you could set a non NULL .ndo_features_check based on a preprocessor condition
>
> #if MAX_SKB_FRAGS + 2 > MAX_TX_WQE_SGL_ENTRIES
> .ndo_features_check = ....
> #endif
>
> This would avoid an expensive indirect call when possible.
>
>
>> .ndo_start_xmit = mana_start_xmit,
>> .ndo_validate_addr = eth_validate_addr,
>> .ndo_get_stats64 = mana_get_stats64,
>> diff --git a/drivers/net/ethernet/microsoft/mana/mana_ethtool.c b/drivers/net/ethernet/microsoft/mana/mana_ethtool.c
>> index a1afa75a9463..fa5e1a2f06a9 100644
>> --- a/drivers/net/ethernet/microsoft/mana/mana_ethtool.c
>> +++ b/drivers/net/ethernet/microsoft/mana/mana_ethtool.c
>> @@ -71,6 +71,8 @@ static const struct mana_stats_desc mana_eth_stats[] = {
>> {"tx_cq_err", offsetof(struct mana_ethtool_stats, tx_cqe_err)},
>> {"tx_cqe_unknown_type", offsetof(struct mana_ethtool_stats,
>> tx_cqe_unknown_type)},
>> + {"linear_pkt_tx_cnt", offsetof(struct mana_ethtool_stats,
>> + linear_pkt_tx_cnt)},
>> {"rx_coalesced_err", offsetof(struct mana_ethtool_stats,
>> rx_coalesced_err)},
>> {"rx_cqe_unknown_type", offsetof(struct mana_ethtool_stats,
>> diff --git a/include/net/mana/gdma.h b/include/net/mana/gdma.h
>> index 637f42485dba..84614ebe0f4c 100644
>> --- a/include/net/mana/gdma.h
>> +++ b/include/net/mana/gdma.h
>> @@ -592,6 +592,9 @@ enum {
>> #define GDMA_DRV_CAP_FLAG_1_HANDLE_RECONFIG_EQE BIT(17)
>> #define GDMA_DRV_CAP_FLAG_1_HW_VPORT_LINK_AWARE BIT(6)
>>
>> +/* Driver supports linearizing the skb when num_sge exceeds hardware limit */
>> +#define GDMA_DRV_CAP_FLAG_1_SKB_LINEARIZE BIT(20)
>> +
>> #define GDMA_DRV_CAP_FLAGS1 \
>> (GDMA_DRV_CAP_FLAG_1_EQ_SHARING_MULTI_VPORT | \
>> GDMA_DRV_CAP_FLAG_1_NAPI_WKDONE_FIX | \
>> @@ -601,7 +604,8 @@ enum {
>> GDMA_DRV_CAP_FLAG_1_DYNAMIC_IRQ_ALLOC_SUPPORT | \
>> GDMA_DRV_CAP_FLAG_1_SELF_RESET_ON_EQE | \
>> GDMA_DRV_CAP_FLAG_1_HANDLE_RECONFIG_EQE | \
>> - GDMA_DRV_CAP_FLAG_1_HW_VPORT_LINK_AWARE)
>> + GDMA_DRV_CAP_FLAG_1_HW_VPORT_LINK_AWARE | \
>> + GDMA_DRV_CAP_FLAG_1_SKB_LINEARIZE)
>>
>> #define GDMA_DRV_CAP_FLAGS2 0
>>
>> diff --git a/include/net/mana/mana.h b/include/net/mana/mana.h
>> index 8906901535f5..50a532fb30d6 100644
>> --- a/include/net/mana/mana.h
>> +++ b/include/net/mana/mana.h
>> @@ -404,6 +404,7 @@ struct mana_ethtool_stats {
>> u64 hc_tx_err_gdma;
>> u64 tx_cqe_err;
>> u64 tx_cqe_unknown_type;
>> + u64 linear_pkt_tx_cnt;
>> u64 rx_coalesced_err;
>> u64 rx_cqe_unknown_type;
>> };
>> --
>> 2.43.0
>>
Thanks for the review Eric. I will incorporate these changes in next
revision.
Regards,
Aditya
^ permalink raw reply [flat|nested] 5+ messages in thread
end of thread, other threads:[~2025-11-14 20:58 UTC | newest]
Thread overview: 5+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2025-11-12 13:01 [PATCH net-next v4 0/2] net: mana: Enforce TX SGE limit and fix error cleanup Aditya Garg
2025-11-12 13:01 ` [PATCH net-next v4 1/2] net: mana: Handle SKB if TX SGEs exceed hardware limit Aditya Garg
2025-11-12 13:55 ` Eric Dumazet
2025-11-14 20:58 ` Aditya Garg
2025-11-12 13:01 ` [PATCH net-next v4 2/2] net: mana: Drop TX skb on post_work_request failure and unmap resources Aditya Garg
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.