netdev.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH net-next v4 0/2] net: mana: Enforce TX SGE limit and fix error cleanup
@ 2025-11-12 13:01 Aditya Garg
  2025-11-12 13:01 ` [PATCH net-next v4 1/2] net: mana: Handle SKB if TX SGEs exceed hardware limit Aditya Garg
  2025-11-12 13:01 ` [PATCH net-next v4 2/2] net: mana: Drop TX skb on post_work_request failure and unmap resources Aditya Garg
  0 siblings, 2 replies; 5+ messages in thread
From: Aditya Garg @ 2025-11-12 13:01 UTC (permalink / raw)
  To: kys, haiyangz, wei.liu, decui, andrew+netdev, davem, edumazet,
	kuba, pabeni, longli, kotaranov, horms, shradhagupta, ssengar,
	ernis, dipayanroy, shirazsaleem, leon, mlevitsk, yury.norov,
	sbhatta, linux-hyperv, netdev, linux-kernel, linux-rdma,
	gargaditya
  Cc: Aditya Garg

Add pre-transmission checks to block SKBs that exceed the hardware's SGE 
limit. Force software segmentation for GSO traffic and linearize non-GSO 
packets as needed.

Update TX error handling to drop failed SKBs and unmap resources 
immediately.

---
Changes in v4:
* Fix warning during build reported by kernel test robot
---
Aditya Garg (2):
  net: mana: Handle SKB if TX SGEs exceed hardware limit
  net: mana: Drop TX skb on post_work_request failure and unmap
    resources

 .../net/ethernet/microsoft/mana/gdma_main.c   |  6 +--
 drivers/net/ethernet/microsoft/mana/mana_en.c | 44 ++++++++++++++++---
 .../ethernet/microsoft/mana/mana_ethtool.c    |  2 +
 include/net/mana/gdma.h                       |  6 ++-
 include/net/mana/mana.h                       |  2 +
 5 files changed, 48 insertions(+), 12 deletions(-)

-- 
2.43.0


^ permalink raw reply	[flat|nested] 5+ messages in thread

* [PATCH net-next v4 1/2] net: mana: Handle SKB if TX SGEs exceed hardware limit
  2025-11-12 13:01 [PATCH net-next v4 0/2] net: mana: Enforce TX SGE limit and fix error cleanup Aditya Garg
@ 2025-11-12 13:01 ` Aditya Garg
  2025-11-12 13:55   ` Eric Dumazet
  2025-11-12 13:01 ` [PATCH net-next v4 2/2] net: mana: Drop TX skb on post_work_request failure and unmap resources Aditya Garg
  1 sibling, 1 reply; 5+ messages in thread
From: Aditya Garg @ 2025-11-12 13:01 UTC (permalink / raw)
  To: kys, haiyangz, wei.liu, decui, andrew+netdev, davem, edumazet,
	kuba, pabeni, longli, kotaranov, horms, shradhagupta, ssengar,
	ernis, dipayanroy, shirazsaleem, leon, mlevitsk, yury.norov,
	sbhatta, linux-hyperv, netdev, linux-kernel, linux-rdma,
	gargaditya
  Cc: Aditya Garg

The MANA hardware supports a maximum of 30 scatter-gather entries (SGEs)
per TX WQE. Exceeding this limit can cause TX failures.
Add ndo_features_check() callback to validate SKB layout before
transmission. For GSO SKBs that would exceed the hardware SGE limit, clear
NETIF_F_GSO_MASK to enforce software segmentation in the stack.
Add a fallback in mana_start_xmit() to linearize non-GSO SKBs that still
exceed the SGE limit.

Also, Add ethtool counter for SKBs linearized

Co-developed-by: Dipayaan Roy <dipayanroy@linux.microsoft.com>
Signed-off-by: Dipayaan Roy <dipayanroy@linux.microsoft.com>
Signed-off-by: Aditya Garg <gargaditya@linux.microsoft.com>
---
 drivers/net/ethernet/microsoft/mana/mana_en.c | 37 ++++++++++++++++++-
 .../ethernet/microsoft/mana/mana_ethtool.c    |  2 +
 include/net/mana/gdma.h                       |  6 ++-
 include/net/mana/mana.h                       |  1 +
 4 files changed, 43 insertions(+), 3 deletions(-)

diff --git a/drivers/net/ethernet/microsoft/mana/mana_en.c b/drivers/net/ethernet/microsoft/mana/mana_en.c
index cccd5b63cee6..67ae5421f9ee 100644
--- a/drivers/net/ethernet/microsoft/mana/mana_en.c
+++ b/drivers/net/ethernet/microsoft/mana/mana_en.c
@@ -11,6 +11,7 @@
 #include <linux/mm.h>
 #include <linux/pci.h>
 #include <linux/export.h>
+#include <linux/skbuff.h>
 
 #include <net/checksum.h>
 #include <net/ip6_checksum.h>
@@ -329,6 +330,20 @@ netdev_tx_t mana_start_xmit(struct sk_buff *skb, struct net_device *ndev)
 	cq = &apc->tx_qp[txq_idx].tx_cq;
 	tx_stats = &txq->stats;
 
+	if (MAX_SKB_FRAGS + 2 > MAX_TX_WQE_SGL_ENTRIES &&
+	    skb_shinfo(skb)->nr_frags + 2 > MAX_TX_WQE_SGL_ENTRIES) {
+		/* GSO skb with Hardware SGE limit exceeded is not expected here
+		 * as they are handled in mana_features_check() callback
+		 */
+		if (skb_linearize(skb)) {
+			netdev_warn_once(ndev, "Failed to linearize skb with nr_frags=%d and is_gso=%d\n",
+					 skb_shinfo(skb)->nr_frags,
+					 skb_is_gso(skb));
+			goto tx_drop_count;
+		}
+		apc->eth_stats.linear_pkt_tx_cnt++;
+	}
+
 	pkg.tx_oob.s_oob.vcq_num = cq->gdma_id;
 	pkg.tx_oob.s_oob.vsq_frame = txq->vsq_frame;
 
@@ -442,8 +457,6 @@ netdev_tx_t mana_start_xmit(struct sk_buff *skb, struct net_device *ndev)
 		}
 	}
 
-	WARN_ON_ONCE(pkg.wqe_req.num_sge > MAX_TX_WQE_SGL_ENTRIES);
-
 	if (pkg.wqe_req.num_sge <= ARRAY_SIZE(pkg.sgl_array)) {
 		pkg.wqe_req.sgl = pkg.sgl_array;
 	} else {
@@ -518,6 +531,25 @@ netdev_tx_t mana_start_xmit(struct sk_buff *skb, struct net_device *ndev)
 	return NETDEV_TX_OK;
 }
 
+static netdev_features_t mana_features_check(struct sk_buff *skb,
+					     struct net_device *ndev,
+					     netdev_features_t features)
+{
+	if (MAX_SKB_FRAGS + 2 > MAX_TX_WQE_SGL_ENTRIES &&
+	    skb_shinfo(skb)->nr_frags + 2 > MAX_TX_WQE_SGL_ENTRIES) {
+		/* Exceeds HW SGE limit.
+		 * GSO case:
+		 *   Disable GSO so the stack will software-segment the skb
+		 *   into smaller skbs that fit the SGE budget.
+		 * Non-GSO case:
+		 *   The xmit path will attempt skb_linearize() as a fallback.
+		 */
+		if (skb_is_gso(skb))
+			features &= ~NETIF_F_GSO_MASK;
+	}
+	return features;
+}
+
 static void mana_get_stats64(struct net_device *ndev,
 			     struct rtnl_link_stats64 *st)
 {
@@ -878,6 +910,7 @@ static const struct net_device_ops mana_devops = {
 	.ndo_open		= mana_open,
 	.ndo_stop		= mana_close,
 	.ndo_select_queue	= mana_select_queue,
+	.ndo_features_check	= mana_features_check,
 	.ndo_start_xmit		= mana_start_xmit,
 	.ndo_validate_addr	= eth_validate_addr,
 	.ndo_get_stats64	= mana_get_stats64,
diff --git a/drivers/net/ethernet/microsoft/mana/mana_ethtool.c b/drivers/net/ethernet/microsoft/mana/mana_ethtool.c
index a1afa75a9463..fa5e1a2f06a9 100644
--- a/drivers/net/ethernet/microsoft/mana/mana_ethtool.c
+++ b/drivers/net/ethernet/microsoft/mana/mana_ethtool.c
@@ -71,6 +71,8 @@ static const struct mana_stats_desc mana_eth_stats[] = {
 	{"tx_cq_err", offsetof(struct mana_ethtool_stats, tx_cqe_err)},
 	{"tx_cqe_unknown_type", offsetof(struct mana_ethtool_stats,
 					tx_cqe_unknown_type)},
+	{"linear_pkt_tx_cnt", offsetof(struct mana_ethtool_stats,
+					linear_pkt_tx_cnt)},
 	{"rx_coalesced_err", offsetof(struct mana_ethtool_stats,
 					rx_coalesced_err)},
 	{"rx_cqe_unknown_type", offsetof(struct mana_ethtool_stats,
diff --git a/include/net/mana/gdma.h b/include/net/mana/gdma.h
index 637f42485dba..84614ebe0f4c 100644
--- a/include/net/mana/gdma.h
+++ b/include/net/mana/gdma.h
@@ -592,6 +592,9 @@ enum {
 #define GDMA_DRV_CAP_FLAG_1_HANDLE_RECONFIG_EQE BIT(17)
 #define GDMA_DRV_CAP_FLAG_1_HW_VPORT_LINK_AWARE BIT(6)
 
+/* Driver supports linearizing the skb when num_sge exceeds hardware limit */
+#define GDMA_DRV_CAP_FLAG_1_SKB_LINEARIZE BIT(20)
+
 #define GDMA_DRV_CAP_FLAGS1 \
 	(GDMA_DRV_CAP_FLAG_1_EQ_SHARING_MULTI_VPORT | \
 	 GDMA_DRV_CAP_FLAG_1_NAPI_WKDONE_FIX | \
@@ -601,7 +604,8 @@ enum {
 	 GDMA_DRV_CAP_FLAG_1_DYNAMIC_IRQ_ALLOC_SUPPORT | \
 	 GDMA_DRV_CAP_FLAG_1_SELF_RESET_ON_EQE | \
 	 GDMA_DRV_CAP_FLAG_1_HANDLE_RECONFIG_EQE | \
-	 GDMA_DRV_CAP_FLAG_1_HW_VPORT_LINK_AWARE)
+	 GDMA_DRV_CAP_FLAG_1_HW_VPORT_LINK_AWARE | \
+	 GDMA_DRV_CAP_FLAG_1_SKB_LINEARIZE)
 
 #define GDMA_DRV_CAP_FLAGS2 0
 
diff --git a/include/net/mana/mana.h b/include/net/mana/mana.h
index 8906901535f5..50a532fb30d6 100644
--- a/include/net/mana/mana.h
+++ b/include/net/mana/mana.h
@@ -404,6 +404,7 @@ struct mana_ethtool_stats {
 	u64 hc_tx_err_gdma;
 	u64 tx_cqe_err;
 	u64 tx_cqe_unknown_type;
+	u64 linear_pkt_tx_cnt;
 	u64 rx_coalesced_err;
 	u64 rx_cqe_unknown_type;
 };
-- 
2.43.0


^ permalink raw reply related	[flat|nested] 5+ messages in thread

* [PATCH net-next v4 2/2] net: mana: Drop TX skb on post_work_request failure and unmap resources
  2025-11-12 13:01 [PATCH net-next v4 0/2] net: mana: Enforce TX SGE limit and fix error cleanup Aditya Garg
  2025-11-12 13:01 ` [PATCH net-next v4 1/2] net: mana: Handle SKB if TX SGEs exceed hardware limit Aditya Garg
@ 2025-11-12 13:01 ` Aditya Garg
  1 sibling, 0 replies; 5+ messages in thread
From: Aditya Garg @ 2025-11-12 13:01 UTC (permalink / raw)
  To: kys, haiyangz, wei.liu, decui, andrew+netdev, davem, edumazet,
	kuba, pabeni, longli, kotaranov, horms, shradhagupta, ssengar,
	ernis, dipayanroy, shirazsaleem, leon, mlevitsk, yury.norov,
	sbhatta, linux-hyperv, netdev, linux-kernel, linux-rdma,
	gargaditya
  Cc: Aditya Garg

Drop TX packets when posting the work request fails and ensure DMA
mappings are always cleaned up.

Signed-off-by: Aditya Garg <gargaditya@linux.microsoft.com>
---
Changes in v4:
* Fix warning during build reported by kernel test robot
---
 drivers/net/ethernet/microsoft/mana/gdma_main.c | 6 +-----
 drivers/net/ethernet/microsoft/mana/mana_en.c   | 7 +++----
 include/net/mana/mana.h                         | 1 +
 3 files changed, 5 insertions(+), 9 deletions(-)

diff --git a/drivers/net/ethernet/microsoft/mana/gdma_main.c b/drivers/net/ethernet/microsoft/mana/gdma_main.c
index effe0a2f207a..8fd70b34807a 100644
--- a/drivers/net/ethernet/microsoft/mana/gdma_main.c
+++ b/drivers/net/ethernet/microsoft/mana/gdma_main.c
@@ -1300,7 +1300,6 @@ int mana_gd_post_work_request(struct gdma_queue *wq,
 			      struct gdma_posted_wqe_info *wqe_info)
 {
 	u32 client_oob_size = wqe_req->inline_oob_size;
-	struct gdma_context *gc;
 	u32 sgl_data_size;
 	u32 max_wqe_size;
 	u32 wqe_size;
@@ -1330,11 +1329,8 @@ int mana_gd_post_work_request(struct gdma_queue *wq,
 	if (wqe_size > max_wqe_size)
 		return -EINVAL;
 
-	if (wq->monitor_avl_buf && wqe_size > mana_gd_wq_avail_space(wq)) {
-		gc = wq->gdma_dev->gdma_context;
-		dev_err(gc->dev, "unsuccessful flow control!\n");
+	if (wq->monitor_avl_buf && wqe_size > mana_gd_wq_avail_space(wq))
 		return -ENOSPC;
-	}
 
 	if (wqe_info)
 		wqe_info->wqe_size_in_bu = wqe_size / GDMA_WQE_BU_SIZE;
diff --git a/drivers/net/ethernet/microsoft/mana/mana_en.c b/drivers/net/ethernet/microsoft/mana/mana_en.c
index 67ae5421f9ee..066d822f68f0 100644
--- a/drivers/net/ethernet/microsoft/mana/mana_en.c
+++ b/drivers/net/ethernet/microsoft/mana/mana_en.c
@@ -491,9 +491,9 @@ netdev_tx_t mana_start_xmit(struct sk_buff *skb, struct net_device *ndev)
 
 	if (err) {
 		(void)skb_dequeue_tail(&txq->pending_skbs);
+		mana_unmap_skb(skb, apc);
 		netdev_warn(ndev, "Failed to post TX OOB: %d\n", err);
-		err = NETDEV_TX_BUSY;
-		goto tx_busy;
+		goto free_sgl_ptr;
 	}
 
 	err = NETDEV_TX_OK;
@@ -513,7 +513,6 @@ netdev_tx_t mana_start_xmit(struct sk_buff *skb, struct net_device *ndev)
 	tx_stats->bytes += len + ((num_gso_seg - 1) * gso_hs);
 	u64_stats_update_end(&tx_stats->syncp);
 
-tx_busy:
 	if (netif_tx_queue_stopped(net_txq) && mana_can_tx(gdma_sq)) {
 		netif_tx_wake_queue(net_txq);
 		apc->eth_stats.wake_queue++;
@@ -1679,7 +1678,7 @@ static int mana_move_wq_tail(struct gdma_queue *wq, u32 num_units)
 	return 0;
 }
 
-static void mana_unmap_skb(struct sk_buff *skb, struct mana_port_context *apc)
+void mana_unmap_skb(struct sk_buff *skb, struct mana_port_context *apc)
 {
 	struct mana_skb_head *ash = (struct mana_skb_head *)skb->head;
 	struct gdma_context *gc = apc->ac->gdma_dev->gdma_context;
diff --git a/include/net/mana/mana.h b/include/net/mana/mana.h
index 50a532fb30d6..d05457d3e1ab 100644
--- a/include/net/mana/mana.h
+++ b/include/net/mana/mana.h
@@ -585,6 +585,7 @@ int mana_set_bw_clamp(struct mana_port_context *apc, u32 speed,
 void mana_query_phy_stats(struct mana_port_context *apc);
 int mana_pre_alloc_rxbufs(struct mana_port_context *apc, int mtu, int num_queues);
 void mana_pre_dealloc_rxbufs(struct mana_port_context *apc);
+void mana_unmap_skb(struct sk_buff *skb, struct mana_port_context *apc);
 
 extern const struct ethtool_ops mana_ethtool_ops;
 extern struct dentry *mana_debugfs_root;
-- 
2.43.0


^ permalink raw reply related	[flat|nested] 5+ messages in thread

* Re: [PATCH net-next v4 1/2] net: mana: Handle SKB if TX SGEs exceed hardware limit
  2025-11-12 13:01 ` [PATCH net-next v4 1/2] net: mana: Handle SKB if TX SGEs exceed hardware limit Aditya Garg
@ 2025-11-12 13:55   ` Eric Dumazet
  2025-11-14 20:58     ` Aditya Garg
  0 siblings, 1 reply; 5+ messages in thread
From: Eric Dumazet @ 2025-11-12 13:55 UTC (permalink / raw)
  To: Aditya Garg
  Cc: kys, haiyangz, wei.liu, decui, andrew+netdev, davem, kuba, pabeni,
	longli, kotaranov, horms, shradhagupta, ssengar, ernis,
	dipayanroy, shirazsaleem, leon, mlevitsk, yury.norov, sbhatta,
	linux-hyperv, netdev, linux-kernel, linux-rdma, gargaditya

On Wed, Nov 12, 2025 at 5:11 AM Aditya Garg
<gargaditya@linux.microsoft.com> wrote:
>
> The MANA hardware supports a maximum of 30 scatter-gather entries (SGEs)
> per TX WQE. Exceeding this limit can cause TX failures.
> Add ndo_features_check() callback to validate SKB layout before
> transmission. For GSO SKBs that would exceed the hardware SGE limit, clear
> NETIF_F_GSO_MASK to enforce software segmentation in the stack.
> Add a fallback in mana_start_xmit() to linearize non-GSO SKBs that still
> exceed the SGE limit.
>
> Also, Add ethtool counter for SKBs linearized
>
> Co-developed-by: Dipayaan Roy <dipayanroy@linux.microsoft.com>
> Signed-off-by: Dipayaan Roy <dipayanroy@linux.microsoft.com>
> Signed-off-by: Aditya Garg <gargaditya@linux.microsoft.com>
> ---
>  drivers/net/ethernet/microsoft/mana/mana_en.c | 37 ++++++++++++++++++-
>  .../ethernet/microsoft/mana/mana_ethtool.c    |  2 +
>  include/net/mana/gdma.h                       |  6 ++-
>  include/net/mana/mana.h                       |  1 +
>  4 files changed, 43 insertions(+), 3 deletions(-)
>
> diff --git a/drivers/net/ethernet/microsoft/mana/mana_en.c b/drivers/net/ethernet/microsoft/mana/mana_en.c
> index cccd5b63cee6..67ae5421f9ee 100644
> --- a/drivers/net/ethernet/microsoft/mana/mana_en.c
> +++ b/drivers/net/ethernet/microsoft/mana/mana_en.c
> @@ -11,6 +11,7 @@
>  #include <linux/mm.h>
>  #include <linux/pci.h>
>  #include <linux/export.h>
> +#include <linux/skbuff.h>
>
>  #include <net/checksum.h>
>  #include <net/ip6_checksum.h>
> @@ -329,6 +330,20 @@ netdev_tx_t mana_start_xmit(struct sk_buff *skb, struct net_device *ndev)
>         cq = &apc->tx_qp[txq_idx].tx_cq;
>         tx_stats = &txq->stats;
>
> +       if (MAX_SKB_FRAGS + 2 > MAX_TX_WQE_SGL_ENTRIES &&
> +           skb_shinfo(skb)->nr_frags + 2 > MAX_TX_WQE_SGL_ENTRIES) {
> +               /* GSO skb with Hardware SGE limit exceeded is not expected here
> +                * as they are handled in mana_features_check() callback
> +                */
> +               if (skb_linearize(skb)) {
> +                       netdev_warn_once(ndev, "Failed to linearize skb with nr_frags=%d and is_gso=%d\n",
> +                                        skb_shinfo(skb)->nr_frags,
> +                                        skb_is_gso(skb));
> +                       goto tx_drop_count;
> +               }
> +               apc->eth_stats.linear_pkt_tx_cnt++;
> +       }
> +
>         pkg.tx_oob.s_oob.vcq_num = cq->gdma_id;
>         pkg.tx_oob.s_oob.vsq_frame = txq->vsq_frame;
>
> @@ -442,8 +457,6 @@ netdev_tx_t mana_start_xmit(struct sk_buff *skb, struct net_device *ndev)
>                 }
>         }
>
> -       WARN_ON_ONCE(pkg.wqe_req.num_sge > MAX_TX_WQE_SGL_ENTRIES);
> -
>         if (pkg.wqe_req.num_sge <= ARRAY_SIZE(pkg.sgl_array)) {
>                 pkg.wqe_req.sgl = pkg.sgl_array;
>         } else {
> @@ -518,6 +531,25 @@ netdev_tx_t mana_start_xmit(struct sk_buff *skb, struct net_device *ndev)
>         return NETDEV_TX_OK;
>  }
>


#if MAX_SKB_FRAGS + 2 > MAX_TX_WQE_SGL_ENTRIES

> +static netdev_features_t mana_features_check(struct sk_buff *skb,
> +                                            struct net_device *ndev,
> +                                            netdev_features_t features)
> +{
> +       if (MAX_SKB_FRAGS + 2 > MAX_TX_WQE_SGL_ENTRIES &&
> +           skb_shinfo(skb)->nr_frags + 2 > MAX_TX_WQE_SGL_ENTRIES) {
> +               /* Exceeds HW SGE limit.
> +                * GSO case:
> +                *   Disable GSO so the stack will software-segment the skb
> +                *   into smaller skbs that fit the SGE budget.
> +                * Non-GSO case:
> +                *   The xmit path will attempt skb_linearize() as a fallback.
> +                */
> +               if (skb_is_gso(skb))

No need to test skb_is_gso(skb), you can clear bits, this will be a
NOP if the packet is non GSO anyway.

> +                       features &= ~NETIF_F_GSO_MASK;
> +       }
> +       return features;
> +}

#endif

> +
>  static void mana_get_stats64(struct net_device *ndev,
>                              struct rtnl_link_stats64 *st)
>  {
> @@ -878,6 +910,7 @@ static const struct net_device_ops mana_devops = {
>         .ndo_open               = mana_open,
>         .ndo_stop               = mana_close,
>         .ndo_select_queue       = mana_select_queue,
> +       .ndo_features_check     = mana_features_check,

Note that if your mana_features_check() is a nop if MAX_SKB_FRAGS is
small enough,
you could set a non NULL .ndo_features_check based on a preprocessor condition

#if MAX_SKB_FRAGS + 2 > MAX_TX_WQE_SGL_ENTRIES
    .ndo_features_check = ....
#endif

This would avoid an expensive indirect call when possible.


>         .ndo_start_xmit         = mana_start_xmit,
>         .ndo_validate_addr      = eth_validate_addr,
>         .ndo_get_stats64        = mana_get_stats64,
> diff --git a/drivers/net/ethernet/microsoft/mana/mana_ethtool.c b/drivers/net/ethernet/microsoft/mana/mana_ethtool.c
> index a1afa75a9463..fa5e1a2f06a9 100644
> --- a/drivers/net/ethernet/microsoft/mana/mana_ethtool.c
> +++ b/drivers/net/ethernet/microsoft/mana/mana_ethtool.c
> @@ -71,6 +71,8 @@ static const struct mana_stats_desc mana_eth_stats[] = {
>         {"tx_cq_err", offsetof(struct mana_ethtool_stats, tx_cqe_err)},
>         {"tx_cqe_unknown_type", offsetof(struct mana_ethtool_stats,
>                                         tx_cqe_unknown_type)},
> +       {"linear_pkt_tx_cnt", offsetof(struct mana_ethtool_stats,
> +                                       linear_pkt_tx_cnt)},
>         {"rx_coalesced_err", offsetof(struct mana_ethtool_stats,
>                                         rx_coalesced_err)},
>         {"rx_cqe_unknown_type", offsetof(struct mana_ethtool_stats,
> diff --git a/include/net/mana/gdma.h b/include/net/mana/gdma.h
> index 637f42485dba..84614ebe0f4c 100644
> --- a/include/net/mana/gdma.h
> +++ b/include/net/mana/gdma.h
> @@ -592,6 +592,9 @@ enum {
>  #define GDMA_DRV_CAP_FLAG_1_HANDLE_RECONFIG_EQE BIT(17)
>  #define GDMA_DRV_CAP_FLAG_1_HW_VPORT_LINK_AWARE BIT(6)
>
> +/* Driver supports linearizing the skb when num_sge exceeds hardware limit */
> +#define GDMA_DRV_CAP_FLAG_1_SKB_LINEARIZE BIT(20)
> +
>  #define GDMA_DRV_CAP_FLAGS1 \
>         (GDMA_DRV_CAP_FLAG_1_EQ_SHARING_MULTI_VPORT | \
>          GDMA_DRV_CAP_FLAG_1_NAPI_WKDONE_FIX | \
> @@ -601,7 +604,8 @@ enum {
>          GDMA_DRV_CAP_FLAG_1_DYNAMIC_IRQ_ALLOC_SUPPORT | \
>          GDMA_DRV_CAP_FLAG_1_SELF_RESET_ON_EQE | \
>          GDMA_DRV_CAP_FLAG_1_HANDLE_RECONFIG_EQE | \
> -        GDMA_DRV_CAP_FLAG_1_HW_VPORT_LINK_AWARE)
> +        GDMA_DRV_CAP_FLAG_1_HW_VPORT_LINK_AWARE | \
> +        GDMA_DRV_CAP_FLAG_1_SKB_LINEARIZE)
>
>  #define GDMA_DRV_CAP_FLAGS2 0
>
> diff --git a/include/net/mana/mana.h b/include/net/mana/mana.h
> index 8906901535f5..50a532fb30d6 100644
> --- a/include/net/mana/mana.h
> +++ b/include/net/mana/mana.h
> @@ -404,6 +404,7 @@ struct mana_ethtool_stats {
>         u64 hc_tx_err_gdma;
>         u64 tx_cqe_err;
>         u64 tx_cqe_unknown_type;
> +       u64 linear_pkt_tx_cnt;
>         u64 rx_coalesced_err;
>         u64 rx_cqe_unknown_type;
>  };
> --
> 2.43.0
>

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH net-next v4 1/2] net: mana: Handle SKB if TX SGEs exceed hardware limit
  2025-11-12 13:55   ` Eric Dumazet
@ 2025-11-14 20:58     ` Aditya Garg
  0 siblings, 0 replies; 5+ messages in thread
From: Aditya Garg @ 2025-11-14 20:58 UTC (permalink / raw)
  To: Eric Dumazet
  Cc: kys, haiyangz, wei.liu, decui, andrew+netdev, davem, kuba, pabeni,
	longli, kotaranov, horms, shradhagupta, ssengar, ernis,
	dipayanroy, shirazsaleem, leon, mlevitsk, yury.norov, sbhatta,
	linux-hyperv, netdev, linux-kernel, linux-rdma, gargaditya

On 12-11-2025 19:25, Eric Dumazet wrote:
> On Wed, Nov 12, 2025 at 5:11 AM Aditya Garg
> <gargaditya@linux.microsoft.com> wrote:
>>
>> The MANA hardware supports a maximum of 30 scatter-gather entries (SGEs)
>> per TX WQE. Exceeding this limit can cause TX failures.
>> Add ndo_features_check() callback to validate SKB layout before
>> transmission. For GSO SKBs that would exceed the hardware SGE limit, clear
>> NETIF_F_GSO_MASK to enforce software segmentation in the stack.
>> Add a fallback in mana_start_xmit() to linearize non-GSO SKBs that still
>> exceed the SGE limit.
>>
>> Also, Add ethtool counter for SKBs linearized
>>
>> Co-developed-by: Dipayaan Roy <dipayanroy@linux.microsoft.com>
>> Signed-off-by: Dipayaan Roy <dipayanroy@linux.microsoft.com>
>> Signed-off-by: Aditya Garg <gargaditya@linux.microsoft.com>
>> ---
>>   drivers/net/ethernet/microsoft/mana/mana_en.c | 37 ++++++++++++++++++-
>>   .../ethernet/microsoft/mana/mana_ethtool.c    |  2 +
>>   include/net/mana/gdma.h                       |  6 ++-
>>   include/net/mana/mana.h                       |  1 +
>>   4 files changed, 43 insertions(+), 3 deletions(-)
>>
>> diff --git a/drivers/net/ethernet/microsoft/mana/mana_en.c b/drivers/net/ethernet/microsoft/mana/mana_en.c
>> index cccd5b63cee6..67ae5421f9ee 100644
>> --- a/drivers/net/ethernet/microsoft/mana/mana_en.c
>> +++ b/drivers/net/ethernet/microsoft/mana/mana_en.c
>> @@ -11,6 +11,7 @@
>>   #include <linux/mm.h>
>>   #include <linux/pci.h>
>>   #include <linux/export.h>
>> +#include <linux/skbuff.h>
>>
>>   #include <net/checksum.h>
>>   #include <net/ip6_checksum.h>
>> @@ -329,6 +330,20 @@ netdev_tx_t mana_start_xmit(struct sk_buff *skb, struct net_device *ndev)
>>          cq = &apc->tx_qp[txq_idx].tx_cq;
>>          tx_stats = &txq->stats;
>>
>> +       if (MAX_SKB_FRAGS + 2 > MAX_TX_WQE_SGL_ENTRIES &&
>> +           skb_shinfo(skb)->nr_frags + 2 > MAX_TX_WQE_SGL_ENTRIES) {
>> +               /* GSO skb with Hardware SGE limit exceeded is not expected here
>> +                * as they are handled in mana_features_check() callback
>> +                */
>> +               if (skb_linearize(skb)) {
>> +                       netdev_warn_once(ndev, "Failed to linearize skb with nr_frags=%d and is_gso=%d\n",
>> +                                        skb_shinfo(skb)->nr_frags,
>> +                                        skb_is_gso(skb));
>> +                       goto tx_drop_count;
>> +               }
>> +               apc->eth_stats.linear_pkt_tx_cnt++;
>> +       }
>> +
>>          pkg.tx_oob.s_oob.vcq_num = cq->gdma_id;
>>          pkg.tx_oob.s_oob.vsq_frame = txq->vsq_frame;
>>
>> @@ -442,8 +457,6 @@ netdev_tx_t mana_start_xmit(struct sk_buff *skb, struct net_device *ndev)
>>                  }
>>          }
>>
>> -       WARN_ON_ONCE(pkg.wqe_req.num_sge > MAX_TX_WQE_SGL_ENTRIES);
>> -
>>          if (pkg.wqe_req.num_sge <= ARRAY_SIZE(pkg.sgl_array)) {
>>                  pkg.wqe_req.sgl = pkg.sgl_array;
>>          } else {
>> @@ -518,6 +531,25 @@ netdev_tx_t mana_start_xmit(struct sk_buff *skb, struct net_device *ndev)
>>          return NETDEV_TX_OK;
>>   }
>>
> 
> 
> #if MAX_SKB_FRAGS + 2 > MAX_TX_WQE_SGL_ENTRIES
> 
>> +static netdev_features_t mana_features_check(struct sk_buff *skb,
>> +                                            struct net_device *ndev,
>> +                                            netdev_features_t features)
>> +{
>> +       if (MAX_SKB_FRAGS + 2 > MAX_TX_WQE_SGL_ENTRIES &&
>> +           skb_shinfo(skb)->nr_frags + 2 > MAX_TX_WQE_SGL_ENTRIES) {
>> +               /* Exceeds HW SGE limit.
>> +                * GSO case:
>> +                *   Disable GSO so the stack will software-segment the skb
>> +                *   into smaller skbs that fit the SGE budget.
>> +                * Non-GSO case:
>> +                *   The xmit path will attempt skb_linearize() as a fallback.
>> +                */
>> +               if (skb_is_gso(skb))
> 
> No need to test skb_is_gso(skb), you can clear bits, this will be a
> NOP if the packet is non GSO anyway.
> 
>> +                       features &= ~NETIF_F_GSO_MASK;
>> +       }
>> +       return features;
>> +}
> 
> #endif
> 
>> +
>>   static void mana_get_stats64(struct net_device *ndev,
>>                               struct rtnl_link_stats64 *st)
>>   {
>> @@ -878,6 +910,7 @@ static const struct net_device_ops mana_devops = {
>>          .ndo_open               = mana_open,
>>          .ndo_stop               = mana_close,
>>          .ndo_select_queue       = mana_select_queue,
>> +       .ndo_features_check     = mana_features_check,
> 
> Note that if your mana_features_check() is a nop if MAX_SKB_FRAGS is
> small enough,
> you could set a non NULL .ndo_features_check based on a preprocessor condition
> 
> #if MAX_SKB_FRAGS + 2 > MAX_TX_WQE_SGL_ENTRIES
>      .ndo_features_check = ....
> #endif
> 
> This would avoid an expensive indirect call when possible.
> 
> 
>>          .ndo_start_xmit         = mana_start_xmit,
>>          .ndo_validate_addr      = eth_validate_addr,
>>          .ndo_get_stats64        = mana_get_stats64,
>> diff --git a/drivers/net/ethernet/microsoft/mana/mana_ethtool.c b/drivers/net/ethernet/microsoft/mana/mana_ethtool.c
>> index a1afa75a9463..fa5e1a2f06a9 100644
>> --- a/drivers/net/ethernet/microsoft/mana/mana_ethtool.c
>> +++ b/drivers/net/ethernet/microsoft/mana/mana_ethtool.c
>> @@ -71,6 +71,8 @@ static const struct mana_stats_desc mana_eth_stats[] = {
>>          {"tx_cq_err", offsetof(struct mana_ethtool_stats, tx_cqe_err)},
>>          {"tx_cqe_unknown_type", offsetof(struct mana_ethtool_stats,
>>                                          tx_cqe_unknown_type)},
>> +       {"linear_pkt_tx_cnt", offsetof(struct mana_ethtool_stats,
>> +                                       linear_pkt_tx_cnt)},
>>          {"rx_coalesced_err", offsetof(struct mana_ethtool_stats,
>>                                          rx_coalesced_err)},
>>          {"rx_cqe_unknown_type", offsetof(struct mana_ethtool_stats,
>> diff --git a/include/net/mana/gdma.h b/include/net/mana/gdma.h
>> index 637f42485dba..84614ebe0f4c 100644
>> --- a/include/net/mana/gdma.h
>> +++ b/include/net/mana/gdma.h
>> @@ -592,6 +592,9 @@ enum {
>>   #define GDMA_DRV_CAP_FLAG_1_HANDLE_RECONFIG_EQE BIT(17)
>>   #define GDMA_DRV_CAP_FLAG_1_HW_VPORT_LINK_AWARE BIT(6)
>>
>> +/* Driver supports linearizing the skb when num_sge exceeds hardware limit */
>> +#define GDMA_DRV_CAP_FLAG_1_SKB_LINEARIZE BIT(20)
>> +
>>   #define GDMA_DRV_CAP_FLAGS1 \
>>          (GDMA_DRV_CAP_FLAG_1_EQ_SHARING_MULTI_VPORT | \
>>           GDMA_DRV_CAP_FLAG_1_NAPI_WKDONE_FIX | \
>> @@ -601,7 +604,8 @@ enum {
>>           GDMA_DRV_CAP_FLAG_1_DYNAMIC_IRQ_ALLOC_SUPPORT | \
>>           GDMA_DRV_CAP_FLAG_1_SELF_RESET_ON_EQE | \
>>           GDMA_DRV_CAP_FLAG_1_HANDLE_RECONFIG_EQE | \
>> -        GDMA_DRV_CAP_FLAG_1_HW_VPORT_LINK_AWARE)
>> +        GDMA_DRV_CAP_FLAG_1_HW_VPORT_LINK_AWARE | \
>> +        GDMA_DRV_CAP_FLAG_1_SKB_LINEARIZE)
>>
>>   #define GDMA_DRV_CAP_FLAGS2 0
>>
>> diff --git a/include/net/mana/mana.h b/include/net/mana/mana.h
>> index 8906901535f5..50a532fb30d6 100644
>> --- a/include/net/mana/mana.h
>> +++ b/include/net/mana/mana.h
>> @@ -404,6 +404,7 @@ struct mana_ethtool_stats {
>>          u64 hc_tx_err_gdma;
>>          u64 tx_cqe_err;
>>          u64 tx_cqe_unknown_type;
>> +       u64 linear_pkt_tx_cnt;
>>          u64 rx_coalesced_err;
>>          u64 rx_cqe_unknown_type;
>>   };
>> --
>> 2.43.0
>>

Thanks for the review Eric. I will incorporate these changes in next 
revision.

Regards,
Aditya

^ permalink raw reply	[flat|nested] 5+ messages in thread

end of thread, other threads:[~2025-11-14 20:58 UTC | newest]

Thread overview: 5+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2025-11-12 13:01 [PATCH net-next v4 0/2] net: mana: Enforce TX SGE limit and fix error cleanup Aditya Garg
2025-11-12 13:01 ` [PATCH net-next v4 1/2] net: mana: Handle SKB if TX SGEs exceed hardware limit Aditya Garg
2025-11-12 13:55   ` Eric Dumazet
2025-11-14 20:58     ` Aditya Garg
2025-11-12 13:01 ` [PATCH net-next v4 2/2] net: mana: Drop TX skb on post_work_request failure and unmap resources Aditya Garg

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).