linux-hyperv.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH net-next v2] net: mana: Handle SKB if TX SGEs exceed hardware limit
@ 2025-10-29 13:12 Aditya Garg
  2025-10-30  9:04 ` Simon Horman
  2025-10-31 23:26 ` Jakub Kicinski
  0 siblings, 2 replies; 10+ messages in thread
From: Aditya Garg @ 2025-10-29 13:12 UTC (permalink / raw)
  To: kys, haiyangz, wei.liu, decui, andrew+netdev, davem, edumazet,
	kuba, pabeni, longli, kotaranov, horms, shradhagupta, ssengar,
	ernis, dipayanroy, shirazsaleem, linux-hyperv, netdev,
	linux-kernel, linux-rdma, gargaditya

The MANA hardware supports a maximum of 30 scatter-gather entries (SGEs)
per TX WQE. Exceeding this limit can cause TX failures.
Add ndo_features_check() callback to validate SKB layout before
transmission. For GSO SKBs that would exceed the hardware SGE limit, clear
NETIF_F_GSO_MASK to enforce software segmentation in the stack.
Add a fallback in mana_start_xmit() to linearize non-GSO SKBs that still
exceed the SGE limit.

Return NETDEV_TX_BUSY only for -ENOSPC from mana_gd_post_work_request(),
send other errors to free_sgl_ptr to free resources and record the tx
drop.

Co-developed-by: Dipayaan Roy <dipayanroy@linux.microsoft.com>
Signed-off-by: Dipayaan Roy <dipayanroy@linux.microsoft.com>
Signed-off-by: Aditya Garg <gargaditya@linux.microsoft.com>
---
 drivers/net/ethernet/microsoft/mana/mana_en.c | 48 +++++++++++++++++--
 include/net/mana/gdma.h                       |  6 ++-
 include/net/mana/mana.h                       |  1 +
 3 files changed, 49 insertions(+), 6 deletions(-)

diff --git a/drivers/net/ethernet/microsoft/mana/mana_en.c b/drivers/net/ethernet/microsoft/mana/mana_en.c
index 0142fd98392c..1f95b644eba1 100644
--- a/drivers/net/ethernet/microsoft/mana/mana_en.c
+++ b/drivers/net/ethernet/microsoft/mana/mana_en.c
@@ -11,6 +11,7 @@
 #include <linux/mm.h>
 #include <linux/pci.h>
 #include <linux/export.h>
+#include <linux/skbuff.h>
 
 #include <net/checksum.h>
 #include <net/ip6_checksum.h>
@@ -289,6 +290,21 @@ netdev_tx_t mana_start_xmit(struct sk_buff *skb, struct net_device *ndev)
 	cq = &apc->tx_qp[txq_idx].tx_cq;
 	tx_stats = &txq->stats;
 
+	if (MAX_SKB_FRAGS + 2 > MAX_TX_WQE_SGL_ENTRIES &&
+	    skb_shinfo(skb)->nr_frags + 2 > MAX_TX_WQE_SGL_ENTRIES) {
+		/* GSO skb with Hardware SGE limit exceeded is not expected here
+		 * as they are handled in mana_features_check() callback
+		 */
+		if (skb_is_gso(skb))
+			netdev_warn_once(ndev, "GSO enabled skb exceeds max SGE limit\n");
+		if (skb_linearize(skb)) {
+			netdev_warn_once(ndev, "Failed to linearize skb with nr_frags=%d and is_gso=%d\n",
+					 skb_shinfo(skb)->nr_frags,
+					 skb_is_gso(skb));
+			goto tx_drop_count;
+		}
+	}
+
 	pkg.tx_oob.s_oob.vcq_num = cq->gdma_id;
 	pkg.tx_oob.s_oob.vsq_frame = txq->vsq_frame;
 
@@ -402,8 +418,6 @@ netdev_tx_t mana_start_xmit(struct sk_buff *skb, struct net_device *ndev)
 		}
 	}
 
-	WARN_ON_ONCE(pkg.wqe_req.num_sge > MAX_TX_WQE_SGL_ENTRIES);
-
 	if (pkg.wqe_req.num_sge <= ARRAY_SIZE(pkg.sgl_array)) {
 		pkg.wqe_req.sgl = pkg.sgl_array;
 	} else {
@@ -438,9 +452,13 @@ netdev_tx_t mana_start_xmit(struct sk_buff *skb, struct net_device *ndev)
 
 	if (err) {
 		(void)skb_dequeue_tail(&txq->pending_skbs);
+		mana_unmap_skb(skb, apc);
 		netdev_warn(ndev, "Failed to post TX OOB: %d\n", err);
-		err = NETDEV_TX_BUSY;
-		goto tx_busy;
+		if (err == -ENOSPC) {
+			err = NETDEV_TX_BUSY;
+			goto tx_busy;
+		}
+		goto free_sgl_ptr;
 	}
 
 	err = NETDEV_TX_OK;
@@ -478,6 +496,25 @@ netdev_tx_t mana_start_xmit(struct sk_buff *skb, struct net_device *ndev)
 	return NETDEV_TX_OK;
 }
 
+static netdev_features_t mana_features_check(struct sk_buff *skb,
+					     struct net_device *ndev,
+					     netdev_features_t features)
+{
+	if (MAX_SKB_FRAGS + 2 > MAX_TX_WQE_SGL_ENTRIES &&
+	    skb_shinfo(skb)->nr_frags + 2 > MAX_TX_WQE_SGL_ENTRIES) {
+		/* Exceeds HW SGE limit.
+		 * GSO case:
+		 *   Disable GSO so the stack will software-segment the skb
+		 *   into smaller skbs that fit the SGE budget.
+		 * Non-GSO case:
+		 *   The xmit path will attempt skb_linearize() as a fallback.
+		 */
+		if (skb_is_gso(skb))
+			features &= ~NETIF_F_GSO_MASK;
+	}
+	return features;
+}
+
 static void mana_get_stats64(struct net_device *ndev,
 			     struct rtnl_link_stats64 *st)
 {
@@ -838,6 +875,7 @@ static const struct net_device_ops mana_devops = {
 	.ndo_open		= mana_open,
 	.ndo_stop		= mana_close,
 	.ndo_select_queue	= mana_select_queue,
+	.ndo_features_check	= mana_features_check,
 	.ndo_start_xmit		= mana_start_xmit,
 	.ndo_validate_addr	= eth_validate_addr,
 	.ndo_get_stats64	= mana_get_stats64,
@@ -1606,7 +1644,7 @@ static int mana_move_wq_tail(struct gdma_queue *wq, u32 num_units)
 	return 0;
 }
 
-static void mana_unmap_skb(struct sk_buff *skb, struct mana_port_context *apc)
+void mana_unmap_skb(struct sk_buff *skb, struct mana_port_context *apc)
 {
 	struct mana_skb_head *ash = (struct mana_skb_head *)skb->head;
 	struct gdma_context *gc = apc->ac->gdma_dev->gdma_context;
diff --git a/include/net/mana/gdma.h b/include/net/mana/gdma.h
index 57df78cfbf82..b35ecc58fbab 100644
--- a/include/net/mana/gdma.h
+++ b/include/net/mana/gdma.h
@@ -591,6 +591,9 @@ enum {
 /* Driver can self reset on FPGA Reconfig EQE notification */
 #define GDMA_DRV_CAP_FLAG_1_HANDLE_RECONFIG_EQE BIT(17)
 
+/* Driver supports linearizing the skb when num_sge exceeds hardware limit */
+#define GDMA_DRV_CAP_FLAG_1_SKB_LINEARIZE BIT(20)
+
 #define GDMA_DRV_CAP_FLAGS1 \
 	(GDMA_DRV_CAP_FLAG_1_EQ_SHARING_MULTI_VPORT | \
 	 GDMA_DRV_CAP_FLAG_1_NAPI_WKDONE_FIX | \
@@ -599,7 +602,8 @@ enum {
 	 GDMA_DRV_CAP_FLAG_1_DEV_LIST_HOLES_SUP | \
 	 GDMA_DRV_CAP_FLAG_1_DYNAMIC_IRQ_ALLOC_SUPPORT | \
 	 GDMA_DRV_CAP_FLAG_1_SELF_RESET_ON_EQE | \
-	 GDMA_DRV_CAP_FLAG_1_HANDLE_RECONFIG_EQE)
+	 GDMA_DRV_CAP_FLAG_1_HANDLE_RECONFIG_EQE | \
+	 GDMA_DRV_CAP_FLAG_1_SKB_LINEARIZE)
 
 #define GDMA_DRV_CAP_FLAGS2 0
 
diff --git a/include/net/mana/mana.h b/include/net/mana/mana.h
index 0921485565c0..330e1bb088bb 100644
--- a/include/net/mana/mana.h
+++ b/include/net/mana/mana.h
@@ -580,6 +580,7 @@ int mana_set_bw_clamp(struct mana_port_context *apc, u32 speed,
 void mana_query_phy_stats(struct mana_port_context *apc);
 int mana_pre_alloc_rxbufs(struct mana_port_context *apc, int mtu, int num_queues);
 void mana_pre_dealloc_rxbufs(struct mana_port_context *apc);
+void mana_unmap_skb(struct sk_buff *skb, struct mana_port_context *apc);
 
 extern const struct ethtool_ops mana_ethtool_ops;
 extern struct dentry *mana_debugfs_root;
-- 
2.43.0


^ permalink raw reply related	[flat|nested] 10+ messages in thread

* Re: [PATCH net-next v2] net: mana: Handle SKB if TX SGEs exceed hardware limit
  2025-10-29 13:12 [PATCH net-next v2] net: mana: Handle SKB if TX SGEs exceed hardware limit Aditya Garg
@ 2025-10-30  9:04 ` Simon Horman
  2025-10-31 13:20   ` Aditya Garg
  2025-10-31 23:26 ` Jakub Kicinski
  1 sibling, 1 reply; 10+ messages in thread
From: Simon Horman @ 2025-10-30  9:04 UTC (permalink / raw)
  To: Aditya Garg
  Cc: kys, haiyangz, wei.liu, decui, andrew+netdev, davem, edumazet,
	kuba, pabeni, longli, kotaranov, shradhagupta, ssengar, ernis,
	dipayanroy, shirazsaleem, linux-hyperv, netdev, linux-kernel,
	linux-rdma, gargaditya

On Wed, Oct 29, 2025 at 06:12:35AM -0700, Aditya Garg wrote:
> The MANA hardware supports a maximum of 30 scatter-gather entries (SGEs)
> per TX WQE. Exceeding this limit can cause TX failures.
> Add ndo_features_check() callback to validate SKB layout before
> transmission. For GSO SKBs that would exceed the hardware SGE limit, clear
> NETIF_F_GSO_MASK to enforce software segmentation in the stack.
> Add a fallback in mana_start_xmit() to linearize non-GSO SKBs that still
> exceed the SGE limit.
> 
> Return NETDEV_TX_BUSY only for -ENOSPC from mana_gd_post_work_request(),
> send other errors to free_sgl_ptr to free resources and record the tx
> drop.
> 
> Co-developed-by: Dipayaan Roy <dipayanroy@linux.microsoft.com>
> Signed-off-by: Dipayaan Roy <dipayanroy@linux.microsoft.com>
> Signed-off-by: Aditya Garg <gargaditya@linux.microsoft.com>

...

> @@ -289,6 +290,21 @@ netdev_tx_t mana_start_xmit(struct sk_buff *skb, struct net_device *ndev)
>  	cq = &apc->tx_qp[txq_idx].tx_cq;
>  	tx_stats = &txq->stats;
>  
> +	if (MAX_SKB_FRAGS + 2 > MAX_TX_WQE_SGL_ENTRIES &&
> +	    skb_shinfo(skb)->nr_frags + 2 > MAX_TX_WQE_SGL_ENTRIES) {
> +		/* GSO skb with Hardware SGE limit exceeded is not expected here
> +		 * as they are handled in mana_features_check() callback
> +		 */

Hi,

I'm curious to know if we actually need this code.
Are there cases where the mana_features_check() doesn't
handle things and the kernel will reach this line?

> +		if (skb_is_gso(skb))
> +			netdev_warn_once(ndev, "GSO enabled skb exceeds max SGE limit\n");
> +		if (skb_linearize(skb)) {
> +			netdev_warn_once(ndev, "Failed to linearize skb with nr_frags=%d and is_gso=%d\n",
> +					 skb_shinfo(skb)->nr_frags,
> +					 skb_is_gso(skb));
> +			goto tx_drop_count;
> +		}
> +	}
> +
>  	pkg.tx_oob.s_oob.vcq_num = cq->gdma_id;
>  	pkg.tx_oob.s_oob.vsq_frame = txq->vsq_frame;
>  

...

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH net-next v2] net: mana: Handle SKB if TX SGEs exceed hardware limit
  2025-10-30  9:04 ` Simon Horman
@ 2025-10-31 13:20   ` Aditya Garg
  2025-11-03 14:58     ` Simon Horman
  0 siblings, 1 reply; 10+ messages in thread
From: Aditya Garg @ 2025-10-31 13:20 UTC (permalink / raw)
  To: Simon Horman
  Cc: kys, haiyangz, wei.liu, decui, andrew+netdev, davem, edumazet,
	kuba, pabeni, longli, kotaranov, shradhagupta, ssengar, ernis,
	dipayanroy, shirazsaleem, linux-hyperv, netdev, linux-kernel,
	linux-rdma, gargaditya

On 30-10-2025 14:34, Simon Horman wrote:
> On Wed, Oct 29, 2025 at 06:12:35AM -0700, Aditya Garg wrote:
>> The MANA hardware supports a maximum of 30 scatter-gather entries (SGEs)
>> per TX WQE. Exceeding this limit can cause TX failures.
>> Add ndo_features_check() callback to validate SKB layout before
>> transmission. For GSO SKBs that would exceed the hardware SGE limit, clear
>> NETIF_F_GSO_MASK to enforce software segmentation in the stack.
>> Add a fallback in mana_start_xmit() to linearize non-GSO SKBs that still
>> exceed the SGE limit.
>>
>> Return NETDEV_TX_BUSY only for -ENOSPC from mana_gd_post_work_request(),
>> send other errors to free_sgl_ptr to free resources and record the tx
>> drop.
>>
>> Co-developed-by: Dipayaan Roy <dipayanroy@linux.microsoft.com>
>> Signed-off-by: Dipayaan Roy <dipayanroy@linux.microsoft.com>
>> Signed-off-by: Aditya Garg <gargaditya@linux.microsoft.com>
> 
> ...
> 
>> @@ -289,6 +290,21 @@ netdev_tx_t mana_start_xmit(struct sk_buff *skb, struct net_device *ndev)
>>   	cq = &apc->tx_qp[txq_idx].tx_cq;
>>   	tx_stats = &txq->stats;
>>   
>> +	if (MAX_SKB_FRAGS + 2 > MAX_TX_WQE_SGL_ENTRIES &&
>> +	    skb_shinfo(skb)->nr_frags + 2 > MAX_TX_WQE_SGL_ENTRIES) {
>> +		/* GSO skb with Hardware SGE limit exceeded is not expected here
>> +		 * as they are handled in mana_features_check() callback
>> +		 */
> 
> Hi,
> 
> I'm curious to know if we actually need this code.
> Are there cases where the mana_features_check() doesn't
> handle things and the kernel will reach this line?
> 
>> +		if (skb_is_gso(skb))
>> +			netdev_warn_once(ndev, "GSO enabled skb exceeds max SGE limit\n");
>> +		if (skb_linearize(skb)) {
>> +			netdev_warn_once(ndev, "Failed to linearize skb with nr_frags=%d and is_gso=%d\n",
>> +					 skb_shinfo(skb)->nr_frags,
>> +					 skb_is_gso(skb));
>> +			goto tx_drop_count;
>> +		}
>> +	}
>> +
>>   	pkg.tx_oob.s_oob.vcq_num = cq->gdma_id;
>>   	pkg.tx_oob.s_oob.vsq_frame = txq->vsq_frame;
>>   
> 
> ...

Hi Simon,
As it was previously discussed and agreed on with Eric, this is for 
Non-GSO skbs which could have possibly nr_frags greater than hardware limit.

Quoting Eric's comment from v1 thread: 
https://lore.kernel.org/netdev/CANn89iKwHWdUaeAsdSuZUXG-W8XwyM2oppQL9spKkex0p9-Azw@mail.gmail.com/
"I think that for non GSO, the linearization attempt is fine.

Note that this is extremely unlikely for non malicious users,
and MTU being usually small (9K or less),
the allocation will be much smaller than a GSO packet."

Regards,
Aditya

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH net-next v2] net: mana: Handle SKB if TX SGEs exceed hardware limit
  2025-10-29 13:12 [PATCH net-next v2] net: mana: Handle SKB if TX SGEs exceed hardware limit Aditya Garg
  2025-10-30  9:04 ` Simon Horman
@ 2025-10-31 23:26 ` Jakub Kicinski
  2025-11-05 16:40   ` Aditya Garg
  1 sibling, 1 reply; 10+ messages in thread
From: Jakub Kicinski @ 2025-10-31 23:26 UTC (permalink / raw)
  To: Aditya Garg
  Cc: kys, haiyangz, wei.liu, decui, andrew+netdev, davem, edumazet,
	pabeni, longli, kotaranov, horms, shradhagupta, ssengar, ernis,
	dipayanroy, shirazsaleem, linux-hyperv, netdev, linux-kernel,
	linux-rdma, gargaditya

On Wed, 29 Oct 2025 06:12:35 -0700 Aditya Garg wrote:
> @@ -289,6 +290,21 @@ netdev_tx_t mana_start_xmit(struct sk_buff *skb, struct net_device *ndev)
>  	cq = &apc->tx_qp[txq_idx].tx_cq;
>  	tx_stats = &txq->stats;
>  
> +	if (MAX_SKB_FRAGS + 2 > MAX_TX_WQE_SGL_ENTRIES &&
> +	    skb_shinfo(skb)->nr_frags + 2 > MAX_TX_WQE_SGL_ENTRIES) {
> +		/* GSO skb with Hardware SGE limit exceeded is not expected here
> +		 * as they are handled in mana_features_check() callback
> +		 */
> +		if (skb_is_gso(skb))
> +			netdev_warn_once(ndev, "GSO enabled skb exceeds max SGE limit\n");

This could be the same question Simon asked but why do you think you
need this line? Sure you need to linearize non-GSO but why do you care
to warn specifically about GSO?! Looks like defensive programming or
testing leftover..

> +		if (skb_linearize(skb)) {
> +			netdev_warn_once(ndev, "Failed to linearize skb with nr_frags=%d and is_gso=%d\n",
> +					 skb_shinfo(skb)->nr_frags,
> +					 skb_is_gso(skb));

.. in practice including is_gso() here as you do is probably enough for
debug

> +			goto tx_drop_count;
> +		}
> +	}
> +
>  	pkg.tx_oob.s_oob.vcq_num = cq->gdma_id;
>  	pkg.tx_oob.s_oob.vsq_frame = txq->vsq_frame;
>  
> @@ -402,8 +418,6 @@ netdev_tx_t mana_start_xmit(struct sk_buff *skb, struct net_device *ndev)
>  		}
>  	}
>  
> -	WARN_ON_ONCE(pkg.wqe_req.num_sge > MAX_TX_WQE_SGL_ENTRIES);
> -
>  	if (pkg.wqe_req.num_sge <= ARRAY_SIZE(pkg.sgl_array)) {
>  		pkg.wqe_req.sgl = pkg.sgl_array;
>  	} else {
> @@ -438,9 +452,13 @@ netdev_tx_t mana_start_xmit(struct sk_buff *skb, struct net_device *ndev)
>  
>  	if (err) {
>  		(void)skb_dequeue_tail(&txq->pending_skbs);
> +		mana_unmap_skb(skb, apc);
>  		netdev_warn(ndev, "Failed to post TX OOB: %d\n", err);

You have a print right here and in the callee. This condition must
(almost) never happen in practice. It's likely fine to just drop
the packet.

Either way -- this should be a separate patch.

> -		err = NETDEV_TX_BUSY;
> -		goto tx_busy;
> +		if (err == -ENOSPC) {
> +			err = NETDEV_TX_BUSY;
> +			goto tx_busy;
> +		}
> +		goto free_sgl_ptr;
>  	}
>  
>  	err = NETDEV_TX_OK;
> @@ -478,6 +496,25 @@ netdev_tx_t mana_start_xmit(struct sk_buff *skb, struct net_device *ndev)
>  	return NETDEV_TX_OK;
>  }
-- 
pw-bot: cr

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH net-next v2] net: mana: Handle SKB if TX SGEs exceed hardware limit
  2025-10-31 13:20   ` Aditya Garg
@ 2025-11-03 14:58     ` Simon Horman
  0 siblings, 0 replies; 10+ messages in thread
From: Simon Horman @ 2025-11-03 14:58 UTC (permalink / raw)
  To: Aditya Garg
  Cc: kys, haiyangz, wei.liu, decui, andrew+netdev, davem, edumazet,
	kuba, pabeni, longli, kotaranov, shradhagupta, ssengar, ernis,
	dipayanroy, shirazsaleem, linux-hyperv, netdev, linux-kernel,
	linux-rdma, gargaditya

On Fri, Oct 31, 2025 at 06:50:10PM +0530, Aditya Garg wrote:
> On 30-10-2025 14:34, Simon Horman wrote:
> > On Wed, Oct 29, 2025 at 06:12:35AM -0700, Aditya Garg wrote:
> > > The MANA hardware supports a maximum of 30 scatter-gather entries (SGEs)
> > > per TX WQE. Exceeding this limit can cause TX failures.
> > > Add ndo_features_check() callback to validate SKB layout before
> > > transmission. For GSO SKBs that would exceed the hardware SGE limit, clear
> > > NETIF_F_GSO_MASK to enforce software segmentation in the stack.
> > > Add a fallback in mana_start_xmit() to linearize non-GSO SKBs that still
> > > exceed the SGE limit.
> > > 
> > > Return NETDEV_TX_BUSY only for -ENOSPC from mana_gd_post_work_request(),
> > > send other errors to free_sgl_ptr to free resources and record the tx
> > > drop.
> > > 
> > > Co-developed-by: Dipayaan Roy <dipayanroy@linux.microsoft.com>
> > > Signed-off-by: Dipayaan Roy <dipayanroy@linux.microsoft.com>
> > > Signed-off-by: Aditya Garg <gargaditya@linux.microsoft.com>
> > 
> > ...
> > 
> > > @@ -289,6 +290,21 @@ netdev_tx_t mana_start_xmit(struct sk_buff *skb, struct net_device *ndev)
> > >   	cq = &apc->tx_qp[txq_idx].tx_cq;
> > >   	tx_stats = &txq->stats;
> > > +	if (MAX_SKB_FRAGS + 2 > MAX_TX_WQE_SGL_ENTRIES &&
> > > +	    skb_shinfo(skb)->nr_frags + 2 > MAX_TX_WQE_SGL_ENTRIES) {
> > > +		/* GSO skb with Hardware SGE limit exceeded is not expected here
> > > +		 * as they are handled in mana_features_check() callback
> > > +		 */
> > 
> > Hi,
> > 
> > I'm curious to know if we actually need this code.
> > Are there cases where the mana_features_check() doesn't
> > handle things and the kernel will reach this line?
> > 
> > > +		if (skb_is_gso(skb))
> > > +			netdev_warn_once(ndev, "GSO enabled skb exceeds max SGE limit\n");
> > > +		if (skb_linearize(skb)) {
> > > +			netdev_warn_once(ndev, "Failed to linearize skb with nr_frags=%d and is_gso=%d\n",
> > > +					 skb_shinfo(skb)->nr_frags,
> > > +					 skb_is_gso(skb));
> > > +			goto tx_drop_count;
> > > +		}
> > > +	}
> > > +
> > >   	pkg.tx_oob.s_oob.vcq_num = cq->gdma_id;
> > >   	pkg.tx_oob.s_oob.vsq_frame = txq->vsq_frame;
> > 
> > ...
> 
> Hi Simon,
> As it was previously discussed and agreed on with Eric, this is for Non-GSO
> skbs which could have possibly nr_frags greater than hardware limit.
> 
> Quoting Eric's comment from v1 thread: https://lore.kernel.org/netdev/CANn89iKwHWdUaeAsdSuZUXG-W8XwyM2oppQL9spKkex0p9-Azw@mail.gmail.com/
> "I think that for non GSO, the linearization attempt is fine.
> 
> Note that this is extremely unlikely for non malicious users,
> and MTU being usually small (9K or less),
> the allocation will be much smaller than a GSO packet."

Thanks for the clarification, that makes sense to me.

FTR, Jakub's question (elsewhere) is different to mine.

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH net-next v2] net: mana: Handle SKB if TX SGEs exceed hardware limit
  2025-10-31 23:26 ` Jakub Kicinski
@ 2025-11-05 16:40   ` Aditya Garg
  2025-11-06  0:17     ` Jakub Kicinski
  0 siblings, 1 reply; 10+ messages in thread
From: Aditya Garg @ 2025-11-05 16:40 UTC (permalink / raw)
  To: Jakub Kicinski
  Cc: kys, haiyangz, wei.liu, decui, andrew+netdev, davem, edumazet,
	pabeni, longli, kotaranov, horms, shradhagupta, ssengar, ernis,
	dipayanroy, shirazsaleem, linux-hyperv, netdev, linux-kernel,
	linux-rdma, gargaditya

On 01-11-2025 04:56, Jakub Kicinski wrote:
> On Wed, 29 Oct 2025 06:12:35 -0700 Aditya Garg wrote:
>> @@ -289,6 +290,21 @@ netdev_tx_t mana_start_xmit(struct sk_buff *skb, struct net_device *ndev)
>>   	cq = &apc->tx_qp[txq_idx].tx_cq;
>>   	tx_stats = &txq->stats;
>>   
>> +	if (MAX_SKB_FRAGS + 2 > MAX_TX_WQE_SGL_ENTRIES &&
>> +	    skb_shinfo(skb)->nr_frags + 2 > MAX_TX_WQE_SGL_ENTRIES) {
>> +		/* GSO skb with Hardware SGE limit exceeded is not expected here
>> +		 * as they are handled in mana_features_check() callback
>> +		 */
>> +		if (skb_is_gso(skb))
>> +			netdev_warn_once(ndev, "GSO enabled skb exceeds max SGE limit\n");
> 
> This could be the same question Simon asked but why do you think you
> need this line? Sure you need to linearize non-GSO but why do you care
> to warn specifically about GSO?! Looks like defensive programming or
> testing leftover..
> 
Hi Jakub,
Agreed, The GSO specific warning is redundant. I'll drop it in next 
revision.
>> +		if (skb_linearize(skb)) {
>> +			netdev_warn_once(ndev, "Failed to linearize skb with nr_frags=%d and is_gso=%d\n",
>> +					 skb_shinfo(skb)->nr_frags,
>> +					 skb_is_gso(skb));
> 
> .. in practice including is_gso() here as you do is probably enough for
> debug
> 
Ok
>> +			goto tx_drop_count;
>> +		}
>> +	}
>> +
>>   	pkg.tx_oob.s_oob.vcq_num = cq->gdma_id;
>>   	pkg.tx_oob.s_oob.vsq_frame = txq->vsq_frame;
>>   
>> @@ -402,8 +418,6 @@ netdev_tx_t mana_start_xmit(struct sk_buff *skb, struct net_device *ndev)
>>   		}
>>   	}
>>   
>> -	WARN_ON_ONCE(pkg.wqe_req.num_sge > MAX_TX_WQE_SGL_ENTRIES);
>> -
>>   	if (pkg.wqe_req.num_sge <= ARRAY_SIZE(pkg.sgl_array)) {
>>   		pkg.wqe_req.sgl = pkg.sgl_array;
>>   	} else {
>> @@ -438,9 +452,13 @@ netdev_tx_t mana_start_xmit(struct sk_buff *skb, struct net_device *ndev)
>>   
>>   	if (err) {
>>   		(void)skb_dequeue_tail(&txq->pending_skbs);
>> +		mana_unmap_skb(skb, apc);
>>   		netdev_warn(ndev, "Failed to post TX OOB: %d\n", err);
> 
> You have a print right here and in the callee. This condition must
> (almost) never happen in practice. It's likely fine to just drop
> the packet.
> The logs placed in callee doesn't covers all the failure scenarios, 
hence I feel to have this log here with proper status. Maybe I can 
remove the log in the callee?

> Either way -- this should be a separate patch.
> 
Are you suggesting a separate patch altogether or two patch in the same 
series?

Based on your suggestion i can work on v3.
Regards,
Aditya

>> -		err = NETDEV_TX_BUSY;
>> -		goto tx_busy;
>> +		if (err == -ENOSPC) {
>> +			err = NETDEV_TX_BUSY;
>> +			goto tx_busy;
>> +		}
>> +		goto free_sgl_ptr;
>>   	}
>>   
>>   	err = NETDEV_TX_OK;
>> @@ -478,6 +496,25 @@ netdev_tx_t mana_start_xmit(struct sk_buff *skb, struct net_device *ndev)
>>   	return NETDEV_TX_OK;
>>   }


^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH net-next v2] net: mana: Handle SKB if TX SGEs exceed hardware limit
  2025-11-05 16:40   ` Aditya Garg
@ 2025-11-06  0:17     ` Jakub Kicinski
  2025-11-06 13:00       ` Aditya Garg
  0 siblings, 1 reply; 10+ messages in thread
From: Jakub Kicinski @ 2025-11-06  0:17 UTC (permalink / raw)
  To: Aditya Garg
  Cc: kys, haiyangz, wei.liu, decui, andrew+netdev, davem, edumazet,
	pabeni, longli, kotaranov, horms, shradhagupta, ssengar, ernis,
	dipayanroy, shirazsaleem, linux-hyperv, netdev, linux-kernel,
	linux-rdma, gargaditya

On Wed, 5 Nov 2025 22:10:23 +0530 Aditya Garg wrote:
> >>   	if (err) {
> >>   		(void)skb_dequeue_tail(&txq->pending_skbs);
> >> +		mana_unmap_skb(skb, apc);
> >>   		netdev_warn(ndev, "Failed to post TX OOB: %d\n", err);  
> > 
> > You have a print right here and in the callee. This condition must
> > (almost) never happen in practice. It's likely fine to just drop
> > the packet.
>
> The logs placed in callee doesn't covers all the failure scenarios,   
> hence I feel to have this log here with proper status. Maybe I can 
> remove the log in the callee?

I think my point was that since there are logs (per packet!) when the
condition is hit -- if it did in fact hit with any noticeable frequency
your users would have complained. So handling the condition gracefully
and returning BUSY is likely just unnecessary complexity in practice.

The logs themselves I don't care all that much about. Sure, having two
lines for one error is a bit unclean.
 
> > Either way -- this should be a separate patch.
> >   
> Are you suggesting a separate patch altogether or two patch in the same 
> series?

The changes feel related enough to make them a series, but either way
is fine.

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH net-next v2] net: mana: Handle SKB if TX SGEs exceed hardware limit
  2025-11-06  0:17     ` Jakub Kicinski
@ 2025-11-06 13:00       ` Aditya Garg
  2025-11-06 13:17         ` Eric Dumazet
  0 siblings, 1 reply; 10+ messages in thread
From: Aditya Garg @ 2025-11-06 13:00 UTC (permalink / raw)
  To: Jakub Kicinski
  Cc: kys, haiyangz, wei.liu, decui, andrew+netdev, davem, edumazet,
	pabeni, longli, kotaranov, horms, shradhagupta, ssengar, ernis,
	dipayanroy, shirazsaleem, linux-hyperv, netdev, linux-kernel,
	linux-rdma, gargaditya

On 06-11-2025 05:47, Jakub Kicinski wrote:
> On Wed, 5 Nov 2025 22:10:23 +0530 Aditya Garg wrote:
>>>>    	if (err) {
>>>>    		(void)skb_dequeue_tail(&txq->pending_skbs);
>>>> +		mana_unmap_skb(skb, apc);
>>>>    		netdev_warn(ndev, "Failed to post TX OOB: %d\n", err);
>>>
>>> You have a print right here and in the callee. This condition must
>>> (almost) never happen in practice. It's likely fine to just drop
>>> the packet.
>>
>> The logs placed in callee doesn't covers all the failure scenarios,
>> hence I feel to have this log here with proper status. Maybe I can
>> remove the log in the callee?
> 
> I think my point was that since there are logs (per packet!) when the
> condition is hit -- if it did in fact hit with any noticeable frequency
> your users would have complained. So handling the condition gracefully
> and returning BUSY is likely just unnecessary complexity in practice.
> 

In this, we are returning tx_busy when the error reason is -ENOSPC, for 
all other errors, skb is dropped.
Is it okay requeue only for -ENOSPC cases or should we drop the skb?

> The logs themselves I don't care all that much about. Sure, having two
> lines for one error is a bit unclean.
>   
>>> Either way -- this should be a separate patch.
>>>    
>> Are you suggesting a separate patch altogether or two patch in the same
>> series?
> 
> The changes feel related enough to make them a series, but either way
> is fine.

Regards,
Aditya


^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH net-next v2] net: mana: Handle SKB if TX SGEs exceed hardware limit
  2025-11-06 13:00       ` Aditya Garg
@ 2025-11-06 13:17         ` Eric Dumazet
  2025-11-10 12:08           ` Aditya Garg
  0 siblings, 1 reply; 10+ messages in thread
From: Eric Dumazet @ 2025-11-06 13:17 UTC (permalink / raw)
  To: Aditya Garg
  Cc: Jakub Kicinski, kys, haiyangz, wei.liu, decui, andrew+netdev,
	davem, pabeni, longli, kotaranov, horms, shradhagupta, ssengar,
	ernis, dipayanroy, shirazsaleem, linux-hyperv, netdev,
	linux-kernel, linux-rdma, gargaditya

On Thu, Nov 6, 2025 at 5:01 AM Aditya Garg
<gargaditya@linux.microsoft.com> wrote:
>
> On 06-11-2025 05:47, Jakub Kicinski wrote:
> > On Wed, 5 Nov 2025 22:10:23 +0530 Aditya Garg wrote:
> >>>>            if (err) {
> >>>>                    (void)skb_dequeue_tail(&txq->pending_skbs);
> >>>> +          mana_unmap_skb(skb, apc);
> >>>>                    netdev_warn(ndev, "Failed to post TX OOB: %d\n", err);
> >>>
> >>> You have a print right here and in the callee. This condition must
> >>> (almost) never happen in practice. It's likely fine to just drop
> >>> the packet.
> >>
> >> The logs placed in callee doesn't covers all the failure scenarios,
> >> hence I feel to have this log here with proper status. Maybe I can
> >> remove the log in the callee?
> >
> > I think my point was that since there are logs (per packet!) when the
> > condition is hit -- if it did in fact hit with any noticeable frequency
> > your users would have complained. So handling the condition gracefully
> > and returning BUSY is likely just unnecessary complexity in practice.
> >
>
> In this, we are returning tx_busy when the error reason is -ENOSPC, for
> all other errors, skb is dropped.
> Is it okay requeue only for -ENOSPC cases or should we drop the skb?

I would avoid NETDEV_TX_BUSY like the plague.
Most drivers get it wrong (including mana)
Documentation/networking/driver.rst

Please drop the packet.

>
> > The logs themselves I don't care all that much about. Sure, having two
> > lines for one error is a bit unclean.
> >
> >>> Either way -- this should be a separate patch.
> >>>
> >> Are you suggesting a separate patch altogether or two patch in the same
> >> series?
> >
> > The changes feel related enough to make them a series, but either way
> > is fine.
>
> Regards,
> Aditya
>

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH net-next v2] net: mana: Handle SKB if TX SGEs exceed hardware limit
  2025-11-06 13:17         ` Eric Dumazet
@ 2025-11-10 12:08           ` Aditya Garg
  0 siblings, 0 replies; 10+ messages in thread
From: Aditya Garg @ 2025-11-10 12:08 UTC (permalink / raw)
  To: Eric Dumazet
  Cc: Jakub Kicinski, kys, haiyangz, wei.liu, decui, andrew+netdev,
	davem, pabeni, longli, kotaranov, horms, shradhagupta, ssengar,
	ernis, dipayanroy, shirazsaleem, linux-hyperv, netdev,
	linux-kernel, linux-rdma, gargaditya

On 06-11-2025 18:47, Eric Dumazet wrote:
> On Thu, Nov 6, 2025 at 5:01 AM Aditya Garg
> <gargaditya@linux.microsoft.com> wrote:
>>
>> On 06-11-2025 05:47, Jakub Kicinski wrote:
>>> On Wed, 5 Nov 2025 22:10:23 +0530 Aditya Garg wrote:
>>>>>>             if (err) {
>>>>>>                     (void)skb_dequeue_tail(&txq->pending_skbs);
>>>>>> +          mana_unmap_skb(skb, apc);
>>>>>>                     netdev_warn(ndev, "Failed to post TX OOB: %d\n", err);
>>>>>
>>>>> You have a print right here and in the callee. This condition must
>>>>> (almost) never happen in practice. It's likely fine to just drop
>>>>> the packet.
>>>>
>>>> The logs placed in callee doesn't covers all the failure scenarios,
>>>> hence I feel to have this log here with proper status. Maybe I can
>>>> remove the log in the callee?
>>>
>>> I think my point was that since there are logs (per packet!) when the
>>> condition is hit -- if it did in fact hit with any noticeable frequency
>>> your users would have complained. So handling the condition gracefully
>>> and returning BUSY is likely just unnecessary complexity in practice.
>>>
>>
>> In this, we are returning tx_busy when the error reason is -ENOSPC, for
>> all other errors, skb is dropped.
>> Is it okay requeue only for -ENOSPC cases or should we drop the skb?
> 
> I would avoid NETDEV_TX_BUSY like the plague.
> Most drivers get it wrong (including mana)
> Documentation/networking/driver.rst
> 
> Please drop the packet.
> 

Hi Eric,
Okay, Will send v3 with changes discussed.

Thanks,
Aditya

^ permalink raw reply	[flat|nested] 10+ messages in thread

end of thread, other threads:[~2025-11-10 12:08 UTC | newest]

Thread overview: 10+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2025-10-29 13:12 [PATCH net-next v2] net: mana: Handle SKB if TX SGEs exceed hardware limit Aditya Garg
2025-10-30  9:04 ` Simon Horman
2025-10-31 13:20   ` Aditya Garg
2025-11-03 14:58     ` Simon Horman
2025-10-31 23:26 ` Jakub Kicinski
2025-11-05 16:40   ` Aditya Garg
2025-11-06  0:17     ` Jakub Kicinski
2025-11-06 13:00       ` Aditya Garg
2025-11-06 13:17         ` Eric Dumazet
2025-11-10 12:08           ` Aditya Garg

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).