Re: [PATCH net-next v2] net: airoha: Add TCP LRO support

Linux-ARM-Kernel Archive on lore.kernel.org
 help / color / mirror / Atom feed

From: Lorenzo Bianconi <lorenzo@kernel.org>
To: Andrew Lunn <andrew+netdev@lunn.ch>,
	"David S. Miller" <davem@davemloft.net>,
	Eric Dumazet <edumazet@google.com>,
	Jakub Kicinski <kuba@kernel.org>, Paolo Abeni <pabeni@redhat.com>
Cc: Alexander Lobakin <aleksander.lobakin@intel.com>,
	linux-arm-kernel@lists.infradead.org,
	linux-mediatek@lists.infradead.org, netdev@vger.kernel.org,
	Madhur Agrawal <madhur.agrawal@airoha.com>
Subject: Re: [PATCH net-next v2] net: airoha: Add TCP LRO support
Date: Fri, 12 Jun 2026 11:15:12 +0200	[thread overview]
Message-ID: <aivOIN_8ND5XFgE-@lore-desk> (raw)
In-Reply-To: <20260610-airoha-eth-lro-v2-1-54be99b9a2d5@kernel.org>

[-- Attachment #1: Type: text/plain, Size: 13242 bytes --]

> Add hardware TCP Large Receive Offload (LRO) support to the airoha_eth
> driver, leveraging the EN7581/AN7583 SoC's 8 dedicated LRO hardware queues
> mapped to RX queues 24–31. LRO hw offloading does not support
> Scatter-Gather (SG) so it is required to increase the page_pool allocation
> order to 2 for RX queues 24–31 (LRO queues).
> Since HW LRO is configured per-QDMA and shared across all devices using
> it, LRO is mutually exclusive with multiple active devices on the same
> QDMA block. Call netdev_update_features() on sibling devices in
> ndo_open/ndo_stop so that NETIF_F_LRO availability is re-evaluated when
> the QDMA user count changes.

commenting on sashiko's report:
https://sashiko.dev/#/patchset/20260610-airoha-eth-lro-v2-1-54be99b9a2d5%40kernel.org

> 

[...]

> +static int airoha_qdma_lro_rx_process(struct sk_buff *skb,
> +				      struct airoha_qdma_desc *desc)
> +{
> +	u32 desc_ctrl = le32_to_cpu(READ_ONCE(desc->ctrl));
> +	u32 len, th_off, tcp_ack_seq, agg_count, data_off;
> +	struct skb_shared_info *shinfo = skb_shinfo(skb);
> +	u32 msg1 = le32_to_cpu(READ_ONCE(desc->msg1));
> +	u32 msg2 = le32_to_cpu(READ_ONCE(desc->msg2));
> +	u32 msg3 = le32_to_cpu(READ_ONCE(desc->msg3));
> +	u16 tcp_win, l2_len;
> +	struct tcphdr *th;
> +	bool ipv4, ipv6;
> +
> +	agg_count = FIELD_GET(QDMA_ETH_RXMSG_AGG_COUNT_MASK, msg2);
> +	if (agg_count <= 1)
> +		return 0;
> +
> +	ipv4 = FIELD_GET(QDMA_ETH_RXMSG_IP4_MASK, msg1);
> +	ipv6 = FIELD_GET(QDMA_ETH_RXMSG_IP6_MASK, msg1);
> +	if (!ipv4 && !ipv6)
> +		return -EOPNOTSUPP;
> +
> +	l2_len = FIELD_GET(QDMA_ETH_RXMSG_L2_LEN_MASK, msg2);
> +	len = FIELD_GET(QDMA_DESC_LEN_MASK, desc_ctrl);
> +	if (ipv4) {
> +		struct iphdr *iph, _iph;
> +
> +		iph = skb_header_pointer(skb, l2_len, sizeof(*iph), &_iph);
> +		if (!iph)
> +			return -EINVAL;
> +
> +		if (iph->protocol != IPPROTO_TCP)
> +			return -EOPNOTSUPP;
> +
> +		if (iph->ihl < 5)
> +			return -EINVAL;
> +
> +		th_off = l2_len + (iph->ihl << 2);
> +		if (!pskb_may_pull(skb, th_off))
> +			return -EINVAL;
> +
> +		iph = (struct iphdr *)(skb->data + l2_len);
> +		iph->tot_len = cpu_to_be16(len - l2_len);
> +		iph->check = 0;
> +		iph->check = ip_fast_csum((void *)iph, iph->ihl);
> +	} else {
> +		struct ipv6hdr *ip6h;
> +
> +		th_off = l2_len + sizeof(*ip6h);
> +		if (!pskb_may_pull(skb, th_off))
> +			return -EINVAL;
> +
> +		ip6h = (struct ipv6hdr *)(skb->data + l2_len);
> +		if (ip6h->nexthdr != NEXTHDR_TCP)
> +			return -EOPNOTSUPP;
> +
> +		ip6h->payload_len = cpu_to_be16(len - th_off);
> +	}
> +
> +	tcp_win = FIELD_GET(QDMA_ETH_RXMSG_TCP_WIN_MASK, msg3);
> +	tcp_ack_seq = le32_to_cpu(READ_ONCE(desc->data));
> +
> +	if (!pskb_may_pull(skb, th_off + sizeof(*th)))
> +		return -EINVAL;
> +
> +	th = (struct tcphdr *)(skb->data + th_off);

- Does this code validate that the TCP header length is at least 5?
  - ack, I will add it in the next revision.

> +	data_off = th_off + (th->doff << 2);
> +	if (len <= data_off)
> +		return -EINVAL;
> +
> +	th->ack_seq = cpu_to_be32(tcp_ack_seq);
> +	th->window = cpu_to_be16(tcp_win);
> +
> +	/* Check tcp timestamp option */
> +	if (th->doff == (sizeof(*th) + TCPOLEN_TSTAMP_ALIGNED) / 4) {
> +		u32 topt;
> +
> +		if (!pskb_may_pull(skb, data_off))
> +			return -EINVAL;
> +
> +		th = (struct tcphdr *)(skb->data + th_off);
> +		topt = get_unaligned_be32(th + 1);
> +		if (topt == ((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
> +			     (TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP)) {
> +			u8 *ptr = (u8 *)th + sizeof(*th) + 2 * sizeof(__be32);
> +			__le32 tcp_ts_reply = READ_ONCE(desc->tcp_ts_reply);
> +
> +			put_unaligned_be32(le32_to_cpu(tcp_ts_reply), ptr);
> +		}
> +	}
> +
> +	shinfo->gso_type = ipv4 ? SKB_GSO_TCPV4 : SKB_GSO_TCPV6;
> +	shinfo->gso_size = (len - data_off) / agg_count;
> +	shinfo->gso_segs = agg_count;


- Could this gso_size calculation corrupt TCP sequence numbers during
  forwarding?
  - The TSO in the egress NIC will perform segmentation when the device is
    routing the LRO packet to the destination so the TCP sequence numbers
    will be recalculated. I agree gso_size is just an estimation of the real
    MSS on the first n-1 packets and we can have a suboptimal MSS size on
    egress segmented traffic, but the hw does not provide this info.

- Does this LRO processing need to initialize CHECKSUM_PARTIAL metadata?
  - ack, I will work on it

> +
> +	return 0;
> +}
> +
>  static int airoha_qdma_rx_process(struct airoha_queue *q, int budget)
>  {
>  	enum dma_data_direction dir = page_pool_get_dma_dir(q->page_pool);

[...]

>  
> +static void airoha_update_netdev_features(struct airoha_gdm_dev *dev)
> +{
> +	struct airoha_eth *eth = dev->eth;
> +	int i, j;
> +
> +	for (i = 0; i < ARRAY_SIZE(eth->ports); i++) {
> +		struct airoha_gdm_port *port = eth->ports[i];
> +
> +		if (!port)
> +			continue;
> +
> +		for (j = 0; j < ARRAY_SIZE(port->devs); j++) {
> +			struct airoha_gdm_dev *iter_dev = port->devs[j];
> +			struct net_device *netdev;
> +
> +			if (!iter_dev || iter_dev == dev)
> +				continue;
> +
> +			if (iter_dev->qdma != dev->qdma)
> +				continue;
> +
> +			netdev = netdev_from_priv(iter_dev);
> +			if (netdev->reg_state != NETREG_REGISTERED)
> +				continue;
> +
> +			netdev_update_features(netdev);
> +		}
> +	}
> +}
> +
>  static int airoha_dev_open(struct net_device *netdev)
>  {
>  	int err, len = ETH_HLEN + netdev->mtu + ETH_FCS_LEN;
> @@ -1778,6 +1970,18 @@ static int airoha_dev_open(struct net_device *netdev)
>  	struct airoha_gdm_port *port = dev->port;
>  	u32 cur_len, pse_port = FE_PSE_PORT_PPE1;
>  	struct airoha_qdma *qdma = dev->qdma;
> +	int qdma_id = qdma - &qdma->eth->qdma[0];
> +
> +	/* HW LRO is configured on the QDMA and it is shared between
> +	 * all the devices using it. Refuse to open a second device on
> +	 * the same QDMA if LRO is enabled on any device sharing it.
> +	 */
> +	if (atomic_read(&qdma->users) &&
> +	    airoha_fe_lro_is_enabled(qdma->eth, qdma_id)) {
> +		netdev_warn(netdev, "required to disable LRO on QDMA%d\n",
> +			    qdma_id);
> +		return -EBUSY;
> +	}

- Can a sibling interface that is DOWN bypass this LRO mutual exclusion check?
  - AFAIU the kernel does not run the ndo_set_features() when the device is
    down, just set the requested feature. When we bring up eth1, ndo_open()
    callback will return an error.

Regards,
Lorenzo

>  
>  	netif_tx_start_all_queues(netdev);
>  	err = airoha_set_vip_for_gdm_port(dev, true);
> @@ -1817,6 +2021,8 @@ static int airoha_dev_open(struct net_device *netdev)
>  	airoha_set_gdm_port_fwd_cfg(qdma->eth, REG_GDM_FWD_CFG(port->id),
>  				    pse_port);
>  
> +	airoha_update_netdev_features(dev);
> +
>  	return 0;
>  }
>  
> @@ -1876,6 +2082,8 @@ static int airoha_dev_stop(struct net_device *netdev)
>  		}
>  	}
>  
> +	airoha_update_netdev_features(dev);
> +
>  	return 0;
>  }
>  
> @@ -2154,6 +2362,56 @@ int airoha_get_fe_port(struct airoha_gdm_dev *dev)
>  	}
>  }
>  
> +static netdev_features_t airoha_dev_fix_features(struct net_device *netdev,
> +						 netdev_features_t features)
> +{
> +	struct airoha_gdm_dev *dev = netdev_priv(netdev);
> +	struct airoha_qdma *qdma = dev->qdma;
> +
> +	if (atomic_read(&qdma->users) > 1)
> +		features &= ~NETIF_F_LRO;
> +
> +	return features;
> +}
> +
> +static int airoha_dev_set_features(struct net_device *netdev,
> +				   netdev_features_t features)
> +{
> +	netdev_features_t diff = netdev->features ^ features;
> +	struct airoha_gdm_dev *dev = netdev_priv(netdev);
> +	struct airoha_qdma *qdma = dev->qdma;
> +	struct airoha_eth *eth = qdma->eth;
> +	int qdma_id = qdma - &eth->qdma[0];
> +
> +	if (!(diff & NETIF_F_LRO))
> +		return 0;
> +
> +	if (features & NETIF_F_LRO) {
> +		int i, lro_queue_index = 0;
> +
> +		for (i = 0; i < ARRAY_SIZE(qdma->q_rx); i++) {
> +			struct airoha_queue *q = &qdma->q_rx[i];
> +			u32 size;
> +
> +			if (!q->ndesc)
> +				continue;
> +
> +			if (!airoha_qdma_is_lro_queue(q))
> +				continue;
> +
> +			size = SKB_WITH_OVERHEAD(AIROHA_RX_LEN(q->buf_size));
> +			size = min_t(u32, size, CDM_LRO_AGG_SIZE_MASK);
> +			airoha_fe_lro_init_rx_queue(eth, qdma_id,
> +						    lro_queue_index, i, size);
> +			lro_queue_index++;
> +		}
> +	} else {
> +		airoha_fe_lro_disable(eth, qdma_id);
> +	}
> +
> +	return 0;
> +}
> +
>  static netdev_tx_t airoha_dev_xmit(struct sk_buff *skb,
>  				   struct net_device *netdev)
>  {
> @@ -3082,6 +3340,8 @@ static const struct net_device_ops airoha_netdev_ops = {
>  	.ndo_stop		= airoha_dev_stop,
>  	.ndo_change_mtu		= airoha_dev_change_mtu,
>  	.ndo_select_queue	= airoha_dev_select_queue,
> +	.ndo_fix_features	= airoha_dev_fix_features,
> +	.ndo_set_features	= airoha_dev_set_features,
>  	.ndo_start_xmit		= airoha_dev_xmit,
>  	.ndo_get_stats64        = airoha_dev_get_stats64,
>  	.ndo_set_mac_address	= airoha_dev_set_macaddr,
> @@ -3169,11 +3429,9 @@ static int airoha_alloc_gdm_device(struct airoha_eth *eth,
>  	netdev->ethtool_ops = &airoha_ethtool_ops;
>  	netdev->max_mtu = AIROHA_MAX_MTU;
>  	netdev->watchdog_timeo = 5 * HZ;
> -	netdev->hw_features = NETIF_F_IP_CSUM | NETIF_F_RXCSUM | NETIF_F_TSO6 |
> -			      NETIF_F_IPV6_CSUM | NETIF_F_SG | NETIF_F_TSO |
> -			      NETIF_F_HW_TC;
> -	netdev->features |= netdev->hw_features;
> -	netdev->vlan_features = netdev->hw_features;
> +	netdev->hw_features = AIROHA_HW_FEATURES | NETIF_F_LRO;
> +	netdev->features |= AIROHA_HW_FEATURES;
> +	netdev->vlan_features = AIROHA_HW_FEATURES;
>  	SET_NETDEV_DEV(netdev, eth->dev);
>  
>  	/* reserve hw queues for HTB offloading */
> diff --git a/drivers/net/ethernet/airoha/airoha_eth.h b/drivers/net/ethernet/airoha/airoha_eth.h
> index 8f42973f9cf5..e78ef751f244 100644
> --- a/drivers/net/ethernet/airoha/airoha_eth.h
> +++ b/drivers/net/ethernet/airoha/airoha_eth.h
> @@ -44,6 +44,18 @@
>  	 (_n) == 15 ? 128 :		\
>  	 (_n) ==  0 ? 1024 : 16)
>  
> +#define AIROHA_LRO_PAGE_ORDER		order_base_2(SZ_16K / PAGE_SIZE)
> +#define AIROHA_MAX_NUM_LRO_QUEUES	8
> +#define AIROHA_RXQ_LRO_EN_MASK		GENMASK(31, 24)
> +#define AIROHA_RXQ_LRO_MAX_AGG_COUNT	64
> +#define AIROHA_RXQ_LRO_MAX_AGG_TIME	100
> +#define AIROHA_RXQ_LRO_MAX_AGE_TIME	2000
> +
> +#define AIROHA_HW_FEATURES			\
> +	(NETIF_F_IP_CSUM | NETIF_F_RXCSUM |	\
> +	 NETIF_F_TSO6 | NETIF_F_IPV6_CSUM |	\
> +	 NETIF_F_SG | NETIF_F_TSO | NETIF_F_HW_TC)
> +
>  #define PSE_RSV_PAGES			128
>  #define PSE_QUEUE_RSV_PAGES		64
>  
> @@ -672,6 +684,18 @@ static inline bool airoha_is_7583(struct airoha_eth *eth)
>  	return eth->soc->version == 0x7583;
>  }
>  
> +static inline bool airoha_qdma_is_lro_queue(struct airoha_queue *q)
> +{
> +	struct airoha_qdma *qdma = q->qdma;
> +	int qid = q - &qdma->q_rx[0];
> +
> +	/* EN7581 SoC supports at most 8 LRO rx queues */
> +	BUILD_BUG_ON(hweight32(AIROHA_RXQ_LRO_EN_MASK) >
> +		     AIROHA_MAX_NUM_LRO_QUEUES);
> +
> +	return !!(AIROHA_RXQ_LRO_EN_MASK & BIT(qid));
> +}
> +
>  int airoha_get_fe_port(struct airoha_gdm_dev *dev);
>  bool airoha_is_valid_gdm_dev(struct airoha_eth *eth,
>  			     struct airoha_gdm_dev *dev);
> diff --git a/drivers/net/ethernet/airoha/airoha_regs.h b/drivers/net/ethernet/airoha/airoha_regs.h
> index 436f3c8779c1..dfc786583774 100644
> --- a/drivers/net/ethernet/airoha/airoha_regs.h
> +++ b/drivers/net/ethernet/airoha/airoha_regs.h
> @@ -122,6 +122,20 @@
>  #define CDM_CRSN_QSEL_REASON_MASK(_n)	\
>  	GENMASK(4 + (((_n) % 4) << 3),	(((_n) % 4) << 3))
>  
> +#define REG_CDM_LRO_RXQ(_n, _m)		(CDM_BASE(_n) + 0x78 + ((_m) & 0x4))
> +#define LRO_RXQ_MASK(_n)		GENMASK(4 + (((_n) & 0x3) << 3), ((_n) & 0x3) << 3)
> +
> +#define REG_CDM_LRO_EN(_n)		(CDM_BASE(_n) + 0x80)
> +#define LRO_RXQ_EN_MASK			GENMASK(7, 0)
> +
> +#define REG_CDM_LRO_LIMIT(_n)		(CDM_BASE(_n) + 0x84)
> +#define CDM_LRO_AGG_NUM_MASK		GENMASK(23, 16)
> +#define CDM_LRO_AGG_SIZE_MASK		GENMASK(15, 0)
> +
> +#define REG_CDM_LRO_AGE_TIME(_n)	(CDM_BASE(_n) + 0x88)
> +#define CDM_LRO_AGE_TIME_MASK		GENMASK(31, 16)
> +#define CDM_LRO_AGG_TIME_MASK		GENMASK(15, 0)
> +
>  #define REG_GDM_FWD_CFG(_n)		GDM_BASE(_n)
>  #define GDM_PAD_EN_MASK			BIT(28)
>  #define GDM_DROP_CRC_ERR_MASK		BIT(23)
> @@ -883,9 +897,15 @@
>  #define QDMA_ETH_RXMSG_SPORT_MASK	GENMASK(25, 21)
>  #define QDMA_ETH_RXMSG_CRSN_MASK	GENMASK(20, 16)
>  #define QDMA_ETH_RXMSG_PPE_ENTRY_MASK	GENMASK(15, 0)
> +/* RX MSG2 */
> +#define QDMA_ETH_RXMSG_AGG_COUNT_MASK	GENMASK(31, 24)
> +#define QDMA_ETH_RXMSG_L2_LEN_MASK	GENMASK(6, 0)
> +/* RX MSG3 */
> +#define QDMA_ETH_RXMSG_AGG_LEN_MASK	GENMASK(31, 16)
> +#define QDMA_ETH_RXMSG_TCP_WIN_MASK	GENMASK(15, 0)
>  
>  struct airoha_qdma_desc {
> -	__le32 rsv;
> +	__le32 tcp_ts_reply;
>  	__le32 ctrl;
>  	__le32 addr;
>  	__le32 data;
> 
> ---
> base-commit: 660a9e399ab02c0cb86d277ed6b0c9d10c350fdd
> change-id: 20260520-airoha-eth-lro-a5d1c3631811
> 
> Best regards,
> -- 
> Lorenzo Bianconi <lorenzo@kernel.org>
> 

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 228 bytes --]

next prev parent reply	other threads:[~2026-06-12  9:15 UTC|newest]

Thread overview: 3+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2026-06-10 15:00 [PATCH net-next v2] net: airoha: Add TCP LRO support Lorenzo Bianconi
2026-06-12  9:15 ` Lorenzo Bianconi [this message]
2026-06-12 10:05 ` Lorenzo Bianconi

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=aivOIN_8ND5XFgE-@lore-desk \
    --to=lorenzo@kernel.org \
    --cc=aleksander.lobakin@intel.com \
    --cc=andrew+netdev@lunn.ch \
    --cc=davem@davemloft.net \
    --cc=edumazet@google.com \
    --cc=kuba@kernel.org \
    --cc=linux-arm-kernel@lists.infradead.org \
    --cc=linux-mediatek@lists.infradead.org \
    --cc=madhur.agrawal@airoha.com \
    --cc=netdev@vger.kernel.org \
    --cc=pabeni@redhat.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox