Netdev List
 help / color / mirror / Atom feed
* [PATCH net-next v2] net: airoha: Add TCP LRO support
@ 2026-06-10 15:00 Lorenzo Bianconi
  2026-06-12  9:15 ` Lorenzo Bianconi
  2026-06-12 10:05 ` Lorenzo Bianconi
  0 siblings, 2 replies; 3+ messages in thread
From: Lorenzo Bianconi @ 2026-06-10 15:00 UTC (permalink / raw)
  To: Andrew Lunn, David S. Miller, Eric Dumazet, Jakub Kicinski,
	Paolo Abeni, Lorenzo Bianconi
  Cc: Alexander Lobakin, linux-arm-kernel, linux-mediatek, netdev,
	Madhur Agrawal

Add hardware TCP Large Receive Offload (LRO) support to the airoha_eth
driver, leveraging the EN7581/AN7583 SoC's 8 dedicated LRO hardware queues
mapped to RX queues 24–31. LRO hw offloading does not support
Scatter-Gather (SG) so it is required to increase the page_pool allocation
order to 2 for RX queues 24–31 (LRO queues).
Since HW LRO is configured per-QDMA and shared across all devices using
it, LRO is mutually exclusive with multiple active devices on the same
QDMA block. Call netdev_update_features() on sibling devices in
ndo_open/ndo_stop so that NETIF_F_LRO availability is re-evaluated when
the QDMA user count changes.

Performance comparison between GRO and hw LRO has been carried out using
a 10Gbps NIC:
GRO: ~2.7 Gbps
LRO: ~8.1 Gbps

Tested-by: Madhur Agrawal <madhur.agrawal@airoha.com>
Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
---
Changes in v2:
- Rebase on top of net-next main branch.
- Link to v1: https://lore.kernel.org/r/20260606-airoha-eth-lro-v1-1-0ebceb0eafc3@kernel.org

Changes in v1:
- Please note this patch depends on the following patch not applied yet
  to net-next
  https://lore.kernel.org/netdev/20260606-airoha_qdma_users-no-atomic-v1-1-86e2d6a1bfaf@kernel.org/T/#u
- Restrict LRO to single user QDMA.
- Introduce some more sanity checks.
- Disable scatter-gather for LRO queues.
- Run netif_receive_skb() for LRO packets.
- Link to v3: https://lore.kernel.org/r/20260528-airoha-eth-lro-v3-1-dd09c1fb000e@kernel.org

Changes in RFC v3:
- Fix double-free of the page_pool of airoha_qdma_lro_rx_process()
  fails.
- Set AIROHA_LRO_PAGE_ORDER according to PAGE_SIZE.
- Add missig gso metadata for the LRO packet.
- Link to v2: https://lore.kernel.org/r/20260526-airoha-eth-lro-v2-1-24e2a9e7a397@kernel.org

Changes in RFC v2:
- Improve performances fixing buf_size computation.
- Fix possible overflow in REG_CDM_LRO_LIMIT() register configuration.
- Require the device to be not running before configuring LRO.
- Fix configuration order in airoha_fe_lro_is_enabled().
- Check skb header length in airoha_qdma_lro_rx_process().
- Do not check net_device feature in airoha_qdma_rx_process() before
  executing airoha_qdma_lro_rx_process() but rely on
  airoha_qdma_lro_rx_process() logic.
- Fix possible double recycle in airoha_qdma_rx_process() for LRO
  packets.
- Always use AIROHA_RXQ_LRO_MAX_AGG_COUNT macro for max LRO aggregated
  fragments in airoha_fe_lro_init_rx_queue().
- Link to v1: https://lore.kernel.org/r/20260520-airoha-eth-lro-v1-1-129cc33766e9@kernel.org
---
 drivers/net/ethernet/airoha/airoha_eth.c  | 282 ++++++++++++++++++++++++++++--
 drivers/net/ethernet/airoha/airoha_eth.h  |  24 +++
 drivers/net/ethernet/airoha/airoha_regs.h |  22 ++-
 3 files changed, 315 insertions(+), 13 deletions(-)

diff --git a/drivers/net/ethernet/airoha/airoha_eth.c b/drivers/net/ethernet/airoha/airoha_eth.c
index 5a8e84fa9918..851005b86314 100644
--- a/drivers/net/ethernet/airoha/airoha_eth.c
+++ b/drivers/net/ethernet/airoha/airoha_eth.c
@@ -12,6 +12,7 @@
 #include <net/dst_metadata.h>
 #include <net/page_pool/helpers.h>
 #include <net/pkt_cls.h>
+#include <net/tcp.h>
 #include <uapi/linux/ppp_defs.h>
 
 #include "airoha_regs.h"
@@ -486,6 +487,48 @@ static void airoha_fe_crsn_qsel_init(struct airoha_eth *eth)
 				 CDM_CRSN_QSEL_Q1));
 }
 
+static void airoha_fe_lro_init_rx_queue(struct airoha_eth *eth, int qdma_id,
+					int lro_queue_index, int qid,
+					int buf_size)
+{
+	int id = qdma_id + 1;
+
+	airoha_fe_rmw(eth, REG_CDM_LRO_LIMIT(id),
+		      CDM_LRO_AGG_NUM_MASK | CDM_LRO_AGG_SIZE_MASK,
+		      FIELD_PREP(CDM_LRO_AGG_SIZE_MASK, buf_size) |
+		      FIELD_PREP(CDM_LRO_AGG_NUM_MASK,
+				 AIROHA_RXQ_LRO_MAX_AGG_COUNT));
+	airoha_fe_rmw(eth, REG_CDM_LRO_AGE_TIME(id),
+		      CDM_LRO_AGE_TIME_MASK | CDM_LRO_AGG_TIME_MASK,
+		      FIELD_PREP(CDM_LRO_AGE_TIME_MASK,
+				 AIROHA_RXQ_LRO_MAX_AGE_TIME) |
+		      FIELD_PREP(CDM_LRO_AGG_TIME_MASK,
+				 AIROHA_RXQ_LRO_MAX_AGG_TIME));
+	airoha_fe_rmw(eth, REG_CDM_LRO_RXQ(id, lro_queue_index),
+		      LRO_RXQ_MASK(lro_queue_index),
+		      __field_prep(LRO_RXQ_MASK(lro_queue_index), qid));
+	airoha_fe_set(eth, REG_CDM_LRO_EN(id), BIT(lro_queue_index));
+}
+
+static void airoha_fe_lro_disable(struct airoha_eth *eth, int qdma_id)
+{
+	int i, id = qdma_id + 1;
+
+	airoha_fe_clear(eth, REG_CDM_LRO_EN(id), LRO_RXQ_EN_MASK);
+	airoha_fe_clear(eth, REG_CDM_LRO_LIMIT(id),
+			CDM_LRO_AGG_NUM_MASK | CDM_LRO_AGG_SIZE_MASK);
+	airoha_fe_clear(eth, REG_CDM_LRO_AGE_TIME(id),
+			CDM_LRO_AGE_TIME_MASK | CDM_LRO_AGG_TIME_MASK);
+	for (i = 0; i < AIROHA_MAX_NUM_LRO_QUEUES; i++)
+		airoha_fe_clear(eth, REG_CDM_LRO_RXQ(id, i), LRO_RXQ_MASK(i));
+}
+
+static bool airoha_fe_lro_is_enabled(struct airoha_eth *eth, int qdma_id)
+{
+	return airoha_fe_get(eth, REG_CDM_LRO_EN(qdma_id + 1),
+			     LRO_RXQ_EN_MASK);
+}
+
 static int airoha_fe_init(struct airoha_eth *eth)
 {
 	airoha_fe_maccr_init(eth);
@@ -603,6 +646,7 @@ static int airoha_qdma_fill_rx_queue(struct airoha_queue *q)
 		e->dma_addr = page_pool_get_dma_addr(page) + offset;
 		e->dma_len = SKB_WITH_OVERHEAD(AIROHA_RX_LEN(q->buf_size));
 
+		WRITE_ONCE(desc->tcp_ts_reply, 0);
 		val = FIELD_PREP(QDMA_DESC_LEN_MASK, e->dma_len);
 		WRITE_ONCE(desc->ctrl, cpu_to_le32(val));
 		WRITE_ONCE(desc->addr, cpu_to_le32(e->dma_addr));
@@ -644,6 +688,104 @@ airoha_qdma_get_gdm_dev(struct airoha_eth *eth, struct airoha_qdma_desc *desc)
 	return port->devs[d] ? port->devs[d] : ERR_PTR(-ENODEV);
 }
 
+static int airoha_qdma_lro_rx_process(struct sk_buff *skb,
+				      struct airoha_qdma_desc *desc)
+{
+	u32 desc_ctrl = le32_to_cpu(READ_ONCE(desc->ctrl));
+	u32 len, th_off, tcp_ack_seq, agg_count, data_off;
+	struct skb_shared_info *shinfo = skb_shinfo(skb);
+	u32 msg1 = le32_to_cpu(READ_ONCE(desc->msg1));
+	u32 msg2 = le32_to_cpu(READ_ONCE(desc->msg2));
+	u32 msg3 = le32_to_cpu(READ_ONCE(desc->msg3));
+	u16 tcp_win, l2_len;
+	struct tcphdr *th;
+	bool ipv4, ipv6;
+
+	agg_count = FIELD_GET(QDMA_ETH_RXMSG_AGG_COUNT_MASK, msg2);
+	if (agg_count <= 1)
+		return 0;
+
+	ipv4 = FIELD_GET(QDMA_ETH_RXMSG_IP4_MASK, msg1);
+	ipv6 = FIELD_GET(QDMA_ETH_RXMSG_IP6_MASK, msg1);
+	if (!ipv4 && !ipv6)
+		return -EOPNOTSUPP;
+
+	l2_len = FIELD_GET(QDMA_ETH_RXMSG_L2_LEN_MASK, msg2);
+	len = FIELD_GET(QDMA_DESC_LEN_MASK, desc_ctrl);
+	if (ipv4) {
+		struct iphdr *iph, _iph;
+
+		iph = skb_header_pointer(skb, l2_len, sizeof(*iph), &_iph);
+		if (!iph)
+			return -EINVAL;
+
+		if (iph->protocol != IPPROTO_TCP)
+			return -EOPNOTSUPP;
+
+		if (iph->ihl < 5)
+			return -EINVAL;
+
+		th_off = l2_len + (iph->ihl << 2);
+		if (!pskb_may_pull(skb, th_off))
+			return -EINVAL;
+
+		iph = (struct iphdr *)(skb->data + l2_len);
+		iph->tot_len = cpu_to_be16(len - l2_len);
+		iph->check = 0;
+		iph->check = ip_fast_csum((void *)iph, iph->ihl);
+	} else {
+		struct ipv6hdr *ip6h;
+
+		th_off = l2_len + sizeof(*ip6h);
+		if (!pskb_may_pull(skb, th_off))
+			return -EINVAL;
+
+		ip6h = (struct ipv6hdr *)(skb->data + l2_len);
+		if (ip6h->nexthdr != NEXTHDR_TCP)
+			return -EOPNOTSUPP;
+
+		ip6h->payload_len = cpu_to_be16(len - th_off);
+	}
+
+	tcp_win = FIELD_GET(QDMA_ETH_RXMSG_TCP_WIN_MASK, msg3);
+	tcp_ack_seq = le32_to_cpu(READ_ONCE(desc->data));
+
+	if (!pskb_may_pull(skb, th_off + sizeof(*th)))
+		return -EINVAL;
+
+	th = (struct tcphdr *)(skb->data + th_off);
+	data_off = th_off + (th->doff << 2);
+	if (len <= data_off)
+		return -EINVAL;
+
+	th->ack_seq = cpu_to_be32(tcp_ack_seq);
+	th->window = cpu_to_be16(tcp_win);
+
+	/* Check tcp timestamp option */
+	if (th->doff == (sizeof(*th) + TCPOLEN_TSTAMP_ALIGNED) / 4) {
+		u32 topt;
+
+		if (!pskb_may_pull(skb, data_off))
+			return -EINVAL;
+
+		th = (struct tcphdr *)(skb->data + th_off);
+		topt = get_unaligned_be32(th + 1);
+		if (topt == ((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
+			     (TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP)) {
+			u8 *ptr = (u8 *)th + sizeof(*th) + 2 * sizeof(__be32);
+			__le32 tcp_ts_reply = READ_ONCE(desc->tcp_ts_reply);
+
+			put_unaligned_be32(le32_to_cpu(tcp_ts_reply), ptr);
+		}
+	}
+
+	shinfo->gso_type = ipv4 ? SKB_GSO_TCPV4 : SKB_GSO_TCPV6;
+	shinfo->gso_size = (len - data_off) / agg_count;
+	shinfo->gso_segs = agg_count;
+
+	return 0;
+}
+
 static int airoha_qdma_rx_process(struct airoha_queue *q, int budget)
 {
 	enum dma_data_direction dir = page_pool_get_dma_dir(q->page_pool);
@@ -694,9 +836,17 @@ static int airoha_qdma_rx_process(struct airoha_queue *q, int budget)
 			__skb_put(q->skb, len);
 			skb_mark_for_recycle(q->skb);
 			q->skb->dev = netdev;
-			q->skb->protocol = eth_type_trans(q->skb, netdev);
 			q->skb->ip_summed = CHECKSUM_UNNECESSARY;
 			skb_record_rx_queue(q->skb, qid);
+
+			if (airoha_qdma_lro_rx_process(q->skb, desc) < 0) {
+				netdev->stats.rx_dropped++;
+				dev_kfree_skb(q->skb);
+				q->skb = NULL;
+				continue;
+			}
+
+			q->skb->protocol = eth_type_trans(q->skb, netdev);
 		} else { /* scattered frame */
 			struct skb_shared_info *shinfo = skb_shinfo(q->skb);
 			int nr_frags = shinfo->nr_frags;
@@ -741,7 +891,10 @@ static int airoha_qdma_rx_process(struct airoha_queue *q, int budget)
 					     false);
 
 		done++;
-		napi_gro_receive(&q->napi, q->skb);
+		if (skb_is_gso(q->skb))
+			netif_receive_skb(q->skb);
+		else
+			napi_gro_receive(&q->napi, q->skb);
 		q->skb = NULL;
 		continue;
 free_frag:
@@ -787,12 +940,10 @@ static int airoha_qdma_rx_napi_poll(struct napi_struct *napi, int budget)
 static int airoha_qdma_init_rx_queue(struct airoha_queue *q,
 				     struct airoha_qdma *qdma, int ndesc)
 {
-	const struct page_pool_params pp_params = {
-		.order = 0,
+	struct page_pool_params pp_params = {
 		.pool_size = 256,
 		.flags = PP_FLAG_DMA_MAP | PP_FLAG_DMA_SYNC_DEV,
 		.dma_dir = DMA_FROM_DEVICE,
-		.max_len = PAGE_SIZE,
 		.nid = NUMA_NO_NODE,
 		.dev = qdma->eth->dev,
 		.napi = &q->napi,
@@ -800,9 +951,10 @@ static int airoha_qdma_init_rx_queue(struct airoha_queue *q,
 	struct airoha_eth *eth = qdma->eth;
 	int qid = q - &qdma->q_rx[0], thr;
 	dma_addr_t dma_addr;
+	bool lro_q;
 
-	q->buf_size = PAGE_SIZE / 2;
 	q->qdma = qdma;
+	lro_q = airoha_qdma_is_lro_queue(q);
 
 	q->entry = devm_kzalloc(eth->dev, ndesc * sizeof(*q->entry),
 				GFP_KERNEL);
@@ -814,6 +966,9 @@ static int airoha_qdma_init_rx_queue(struct airoha_queue *q,
 	if (!q->desc)
 		return -ENOMEM;
 
+	pp_params.order = lro_q ? AIROHA_LRO_PAGE_ORDER : 0;
+	pp_params.max_len = PAGE_SIZE << pp_params.order;
+
 	q->page_pool = page_pool_create(&pp_params);
 	if (IS_ERR(q->page_pool)) {
 		int err = PTR_ERR(q->page_pool);
@@ -822,6 +977,7 @@ static int airoha_qdma_init_rx_queue(struct airoha_queue *q,
 		return err;
 	}
 
+	q->buf_size = lro_q ? pp_params.max_len : pp_params.max_len / 2;
 	q->ndesc = ndesc;
 	netif_napi_add(eth->napi_dev, &q->napi, airoha_qdma_rx_napi_poll);
 
@@ -835,7 +991,12 @@ static int airoha_qdma_init_rx_queue(struct airoha_queue *q,
 			FIELD_PREP(RX_RING_THR_MASK, thr));
 	airoha_qdma_rmw(qdma, REG_RX_DMA_IDX(qid), RX_RING_DMA_IDX_MASK,
 			FIELD_PREP(RX_RING_DMA_IDX_MASK, q->head));
-	airoha_qdma_set(qdma, REG_RX_SCATTER_CFG(qid), RX_RING_SG_EN_MASK);
+	if (lro_q)
+		airoha_qdma_clear(qdma, REG_RX_SCATTER_CFG(qid),
+				  RX_RING_SG_EN_MASK);
+	else
+		airoha_qdma_set(qdma, REG_RX_SCATTER_CFG(qid),
+				RX_RING_SG_EN_MASK);
 
 	airoha_qdma_fill_rx_queue(q);
 
@@ -857,6 +1018,7 @@ static void airoha_qdma_cleanup_rx_queue(struct airoha_queue *q)
 					page_pool_get_dma_dir(q->page_pool));
 		page_pool_put_full_page(q->page_pool, page, false);
 		/* Reset DMA descriptor */
+		WRITE_ONCE(desc->tcp_ts_reply, 0);
 		WRITE_ONCE(desc->ctrl, 0);
 		WRITE_ONCE(desc->addr, 0);
 		WRITE_ONCE(desc->data, 0);
@@ -1771,6 +1933,36 @@ static void airoha_update_hw_stats(struct airoha_gdm_dev *dev)
 	spin_unlock(&port->stats.lock);
 }
 
+static void airoha_update_netdev_features(struct airoha_gdm_dev *dev)
+{
+	struct airoha_eth *eth = dev->eth;
+	int i, j;
+
+	for (i = 0; i < ARRAY_SIZE(eth->ports); i++) {
+		struct airoha_gdm_port *port = eth->ports[i];
+
+		if (!port)
+			continue;
+
+		for (j = 0; j < ARRAY_SIZE(port->devs); j++) {
+			struct airoha_gdm_dev *iter_dev = port->devs[j];
+			struct net_device *netdev;
+
+			if (!iter_dev || iter_dev == dev)
+				continue;
+
+			if (iter_dev->qdma != dev->qdma)
+				continue;
+
+			netdev = netdev_from_priv(iter_dev);
+			if (netdev->reg_state != NETREG_REGISTERED)
+				continue;
+
+			netdev_update_features(netdev);
+		}
+	}
+}
+
 static int airoha_dev_open(struct net_device *netdev)
 {
 	int err, len = ETH_HLEN + netdev->mtu + ETH_FCS_LEN;
@@ -1778,6 +1970,18 @@ static int airoha_dev_open(struct net_device *netdev)
 	struct airoha_gdm_port *port = dev->port;
 	u32 cur_len, pse_port = FE_PSE_PORT_PPE1;
 	struct airoha_qdma *qdma = dev->qdma;
+	int qdma_id = qdma - &qdma->eth->qdma[0];
+
+	/* HW LRO is configured on the QDMA and it is shared between
+	 * all the devices using it. Refuse to open a second device on
+	 * the same QDMA if LRO is enabled on any device sharing it.
+	 */
+	if (atomic_read(&qdma->users) &&
+	    airoha_fe_lro_is_enabled(qdma->eth, qdma_id)) {
+		netdev_warn(netdev, "required to disable LRO on QDMA%d\n",
+			    qdma_id);
+		return -EBUSY;
+	}
 
 	netif_tx_start_all_queues(netdev);
 	err = airoha_set_vip_for_gdm_port(dev, true);
@@ -1817,6 +2021,8 @@ static int airoha_dev_open(struct net_device *netdev)
 	airoha_set_gdm_port_fwd_cfg(qdma->eth, REG_GDM_FWD_CFG(port->id),
 				    pse_port);
 
+	airoha_update_netdev_features(dev);
+
 	return 0;
 }
 
@@ -1876,6 +2082,8 @@ static int airoha_dev_stop(struct net_device *netdev)
 		}
 	}
 
+	airoha_update_netdev_features(dev);
+
 	return 0;
 }
 
@@ -2154,6 +2362,56 @@ int airoha_get_fe_port(struct airoha_gdm_dev *dev)
 	}
 }
 
+static netdev_features_t airoha_dev_fix_features(struct net_device *netdev,
+						 netdev_features_t features)
+{
+	struct airoha_gdm_dev *dev = netdev_priv(netdev);
+	struct airoha_qdma *qdma = dev->qdma;
+
+	if (atomic_read(&qdma->users) > 1)
+		features &= ~NETIF_F_LRO;
+
+	return features;
+}
+
+static int airoha_dev_set_features(struct net_device *netdev,
+				   netdev_features_t features)
+{
+	netdev_features_t diff = netdev->features ^ features;
+	struct airoha_gdm_dev *dev = netdev_priv(netdev);
+	struct airoha_qdma *qdma = dev->qdma;
+	struct airoha_eth *eth = qdma->eth;
+	int qdma_id = qdma - &eth->qdma[0];
+
+	if (!(diff & NETIF_F_LRO))
+		return 0;
+
+	if (features & NETIF_F_LRO) {
+		int i, lro_queue_index = 0;
+
+		for (i = 0; i < ARRAY_SIZE(qdma->q_rx); i++) {
+			struct airoha_queue *q = &qdma->q_rx[i];
+			u32 size;
+
+			if (!q->ndesc)
+				continue;
+
+			if (!airoha_qdma_is_lro_queue(q))
+				continue;
+
+			size = SKB_WITH_OVERHEAD(AIROHA_RX_LEN(q->buf_size));
+			size = min_t(u32, size, CDM_LRO_AGG_SIZE_MASK);
+			airoha_fe_lro_init_rx_queue(eth, qdma_id,
+						    lro_queue_index, i, size);
+			lro_queue_index++;
+		}
+	} else {
+		airoha_fe_lro_disable(eth, qdma_id);
+	}
+
+	return 0;
+}
+
 static netdev_tx_t airoha_dev_xmit(struct sk_buff *skb,
 				   struct net_device *netdev)
 {
@@ -3082,6 +3340,8 @@ static const struct net_device_ops airoha_netdev_ops = {
 	.ndo_stop		= airoha_dev_stop,
 	.ndo_change_mtu		= airoha_dev_change_mtu,
 	.ndo_select_queue	= airoha_dev_select_queue,
+	.ndo_fix_features	= airoha_dev_fix_features,
+	.ndo_set_features	= airoha_dev_set_features,
 	.ndo_start_xmit		= airoha_dev_xmit,
 	.ndo_get_stats64        = airoha_dev_get_stats64,
 	.ndo_set_mac_address	= airoha_dev_set_macaddr,
@@ -3169,11 +3429,9 @@ static int airoha_alloc_gdm_device(struct airoha_eth *eth,
 	netdev->ethtool_ops = &airoha_ethtool_ops;
 	netdev->max_mtu = AIROHA_MAX_MTU;
 	netdev->watchdog_timeo = 5 * HZ;
-	netdev->hw_features = NETIF_F_IP_CSUM | NETIF_F_RXCSUM | NETIF_F_TSO6 |
-			      NETIF_F_IPV6_CSUM | NETIF_F_SG | NETIF_F_TSO |
-			      NETIF_F_HW_TC;
-	netdev->features |= netdev->hw_features;
-	netdev->vlan_features = netdev->hw_features;
+	netdev->hw_features = AIROHA_HW_FEATURES | NETIF_F_LRO;
+	netdev->features |= AIROHA_HW_FEATURES;
+	netdev->vlan_features = AIROHA_HW_FEATURES;
 	SET_NETDEV_DEV(netdev, eth->dev);
 
 	/* reserve hw queues for HTB offloading */
diff --git a/drivers/net/ethernet/airoha/airoha_eth.h b/drivers/net/ethernet/airoha/airoha_eth.h
index 8f42973f9cf5..e78ef751f244 100644
--- a/drivers/net/ethernet/airoha/airoha_eth.h
+++ b/drivers/net/ethernet/airoha/airoha_eth.h
@@ -44,6 +44,18 @@
 	 (_n) == 15 ? 128 :		\
 	 (_n) ==  0 ? 1024 : 16)
 
+#define AIROHA_LRO_PAGE_ORDER		order_base_2(SZ_16K / PAGE_SIZE)
+#define AIROHA_MAX_NUM_LRO_QUEUES	8
+#define AIROHA_RXQ_LRO_EN_MASK		GENMASK(31, 24)
+#define AIROHA_RXQ_LRO_MAX_AGG_COUNT	64
+#define AIROHA_RXQ_LRO_MAX_AGG_TIME	100
+#define AIROHA_RXQ_LRO_MAX_AGE_TIME	2000
+
+#define AIROHA_HW_FEATURES			\
+	(NETIF_F_IP_CSUM | NETIF_F_RXCSUM |	\
+	 NETIF_F_TSO6 | NETIF_F_IPV6_CSUM |	\
+	 NETIF_F_SG | NETIF_F_TSO | NETIF_F_HW_TC)
+
 #define PSE_RSV_PAGES			128
 #define PSE_QUEUE_RSV_PAGES		64
 
@@ -672,6 +684,18 @@ static inline bool airoha_is_7583(struct airoha_eth *eth)
 	return eth->soc->version == 0x7583;
 }
 
+static inline bool airoha_qdma_is_lro_queue(struct airoha_queue *q)
+{
+	struct airoha_qdma *qdma = q->qdma;
+	int qid = q - &qdma->q_rx[0];
+
+	/* EN7581 SoC supports at most 8 LRO rx queues */
+	BUILD_BUG_ON(hweight32(AIROHA_RXQ_LRO_EN_MASK) >
+		     AIROHA_MAX_NUM_LRO_QUEUES);
+
+	return !!(AIROHA_RXQ_LRO_EN_MASK & BIT(qid));
+}
+
 int airoha_get_fe_port(struct airoha_gdm_dev *dev);
 bool airoha_is_valid_gdm_dev(struct airoha_eth *eth,
 			     struct airoha_gdm_dev *dev);
diff --git a/drivers/net/ethernet/airoha/airoha_regs.h b/drivers/net/ethernet/airoha/airoha_regs.h
index 436f3c8779c1..dfc786583774 100644
--- a/drivers/net/ethernet/airoha/airoha_regs.h
+++ b/drivers/net/ethernet/airoha/airoha_regs.h
@@ -122,6 +122,20 @@
 #define CDM_CRSN_QSEL_REASON_MASK(_n)	\
 	GENMASK(4 + (((_n) % 4) << 3),	(((_n) % 4) << 3))
 
+#define REG_CDM_LRO_RXQ(_n, _m)		(CDM_BASE(_n) + 0x78 + ((_m) & 0x4))
+#define LRO_RXQ_MASK(_n)		GENMASK(4 + (((_n) & 0x3) << 3), ((_n) & 0x3) << 3)
+
+#define REG_CDM_LRO_EN(_n)		(CDM_BASE(_n) + 0x80)
+#define LRO_RXQ_EN_MASK			GENMASK(7, 0)
+
+#define REG_CDM_LRO_LIMIT(_n)		(CDM_BASE(_n) + 0x84)
+#define CDM_LRO_AGG_NUM_MASK		GENMASK(23, 16)
+#define CDM_LRO_AGG_SIZE_MASK		GENMASK(15, 0)
+
+#define REG_CDM_LRO_AGE_TIME(_n)	(CDM_BASE(_n) + 0x88)
+#define CDM_LRO_AGE_TIME_MASK		GENMASK(31, 16)
+#define CDM_LRO_AGG_TIME_MASK		GENMASK(15, 0)
+
 #define REG_GDM_FWD_CFG(_n)		GDM_BASE(_n)
 #define GDM_PAD_EN_MASK			BIT(28)
 #define GDM_DROP_CRC_ERR_MASK		BIT(23)
@@ -883,9 +897,15 @@
 #define QDMA_ETH_RXMSG_SPORT_MASK	GENMASK(25, 21)
 #define QDMA_ETH_RXMSG_CRSN_MASK	GENMASK(20, 16)
 #define QDMA_ETH_RXMSG_PPE_ENTRY_MASK	GENMASK(15, 0)
+/* RX MSG2 */
+#define QDMA_ETH_RXMSG_AGG_COUNT_MASK	GENMASK(31, 24)
+#define QDMA_ETH_RXMSG_L2_LEN_MASK	GENMASK(6, 0)
+/* RX MSG3 */
+#define QDMA_ETH_RXMSG_AGG_LEN_MASK	GENMASK(31, 16)
+#define QDMA_ETH_RXMSG_TCP_WIN_MASK	GENMASK(15, 0)
 
 struct airoha_qdma_desc {
-	__le32 rsv;
+	__le32 tcp_ts_reply;
 	__le32 ctrl;
 	__le32 addr;
 	__le32 data;

---
base-commit: 660a9e399ab02c0cb86d277ed6b0c9d10c350fdd
change-id: 20260520-airoha-eth-lro-a5d1c3631811

Best regards,
-- 
Lorenzo Bianconi <lorenzo@kernel.org>


^ permalink raw reply related	[flat|nested] 3+ messages in thread

* Re: [PATCH net-next v2] net: airoha: Add TCP LRO support
  2026-06-10 15:00 [PATCH net-next v2] net: airoha: Add TCP LRO support Lorenzo Bianconi
@ 2026-06-12  9:15 ` Lorenzo Bianconi
  2026-06-12 10:05 ` Lorenzo Bianconi
  1 sibling, 0 replies; 3+ messages in thread
From: Lorenzo Bianconi @ 2026-06-12  9:15 UTC (permalink / raw)
  To: Andrew Lunn, David S. Miller, Eric Dumazet, Jakub Kicinski,
	Paolo Abeni
  Cc: Alexander Lobakin, linux-arm-kernel, linux-mediatek, netdev,
	Madhur Agrawal

[-- Attachment #1: Type: text/plain, Size: 13242 bytes --]

> Add hardware TCP Large Receive Offload (LRO) support to the airoha_eth
> driver, leveraging the EN7581/AN7583 SoC's 8 dedicated LRO hardware queues
> mapped to RX queues 24–31. LRO hw offloading does not support
> Scatter-Gather (SG) so it is required to increase the page_pool allocation
> order to 2 for RX queues 24–31 (LRO queues).
> Since HW LRO is configured per-QDMA and shared across all devices using
> it, LRO is mutually exclusive with multiple active devices on the same
> QDMA block. Call netdev_update_features() on sibling devices in
> ndo_open/ndo_stop so that NETIF_F_LRO availability is re-evaluated when
> the QDMA user count changes.

commenting on sashiko's report:
https://sashiko.dev/#/patchset/20260610-airoha-eth-lro-v2-1-54be99b9a2d5%40kernel.org

> 

[...]

> +static int airoha_qdma_lro_rx_process(struct sk_buff *skb,
> +				      struct airoha_qdma_desc *desc)
> +{
> +	u32 desc_ctrl = le32_to_cpu(READ_ONCE(desc->ctrl));
> +	u32 len, th_off, tcp_ack_seq, agg_count, data_off;
> +	struct skb_shared_info *shinfo = skb_shinfo(skb);
> +	u32 msg1 = le32_to_cpu(READ_ONCE(desc->msg1));
> +	u32 msg2 = le32_to_cpu(READ_ONCE(desc->msg2));
> +	u32 msg3 = le32_to_cpu(READ_ONCE(desc->msg3));
> +	u16 tcp_win, l2_len;
> +	struct tcphdr *th;
> +	bool ipv4, ipv6;
> +
> +	agg_count = FIELD_GET(QDMA_ETH_RXMSG_AGG_COUNT_MASK, msg2);
> +	if (agg_count <= 1)
> +		return 0;
> +
> +	ipv4 = FIELD_GET(QDMA_ETH_RXMSG_IP4_MASK, msg1);
> +	ipv6 = FIELD_GET(QDMA_ETH_RXMSG_IP6_MASK, msg1);
> +	if (!ipv4 && !ipv6)
> +		return -EOPNOTSUPP;
> +
> +	l2_len = FIELD_GET(QDMA_ETH_RXMSG_L2_LEN_MASK, msg2);
> +	len = FIELD_GET(QDMA_DESC_LEN_MASK, desc_ctrl);
> +	if (ipv4) {
> +		struct iphdr *iph, _iph;
> +
> +		iph = skb_header_pointer(skb, l2_len, sizeof(*iph), &_iph);
> +		if (!iph)
> +			return -EINVAL;
> +
> +		if (iph->protocol != IPPROTO_TCP)
> +			return -EOPNOTSUPP;
> +
> +		if (iph->ihl < 5)
> +			return -EINVAL;
> +
> +		th_off = l2_len + (iph->ihl << 2);
> +		if (!pskb_may_pull(skb, th_off))
> +			return -EINVAL;
> +
> +		iph = (struct iphdr *)(skb->data + l2_len);
> +		iph->tot_len = cpu_to_be16(len - l2_len);
> +		iph->check = 0;
> +		iph->check = ip_fast_csum((void *)iph, iph->ihl);
> +	} else {
> +		struct ipv6hdr *ip6h;
> +
> +		th_off = l2_len + sizeof(*ip6h);
> +		if (!pskb_may_pull(skb, th_off))
> +			return -EINVAL;
> +
> +		ip6h = (struct ipv6hdr *)(skb->data + l2_len);
> +		if (ip6h->nexthdr != NEXTHDR_TCP)
> +			return -EOPNOTSUPP;
> +
> +		ip6h->payload_len = cpu_to_be16(len - th_off);
> +	}
> +
> +	tcp_win = FIELD_GET(QDMA_ETH_RXMSG_TCP_WIN_MASK, msg3);
> +	tcp_ack_seq = le32_to_cpu(READ_ONCE(desc->data));
> +
> +	if (!pskb_may_pull(skb, th_off + sizeof(*th)))
> +		return -EINVAL;
> +
> +	th = (struct tcphdr *)(skb->data + th_off);

- Does this code validate that the TCP header length is at least 5?
  - ack, I will add it in the next revision.

> +	data_off = th_off + (th->doff << 2);
> +	if (len <= data_off)
> +		return -EINVAL;
> +
> +	th->ack_seq = cpu_to_be32(tcp_ack_seq);
> +	th->window = cpu_to_be16(tcp_win);
> +
> +	/* Check tcp timestamp option */
> +	if (th->doff == (sizeof(*th) + TCPOLEN_TSTAMP_ALIGNED) / 4) {
> +		u32 topt;
> +
> +		if (!pskb_may_pull(skb, data_off))
> +			return -EINVAL;
> +
> +		th = (struct tcphdr *)(skb->data + th_off);
> +		topt = get_unaligned_be32(th + 1);
> +		if (topt == ((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
> +			     (TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP)) {
> +			u8 *ptr = (u8 *)th + sizeof(*th) + 2 * sizeof(__be32);
> +			__le32 tcp_ts_reply = READ_ONCE(desc->tcp_ts_reply);
> +
> +			put_unaligned_be32(le32_to_cpu(tcp_ts_reply), ptr);
> +		}
> +	}
> +
> +	shinfo->gso_type = ipv4 ? SKB_GSO_TCPV4 : SKB_GSO_TCPV6;
> +	shinfo->gso_size = (len - data_off) / agg_count;
> +	shinfo->gso_segs = agg_count;


- Could this gso_size calculation corrupt TCP sequence numbers during
  forwarding?
  - The TSO in the egress NIC will perform segmentation when the device is
    routing the LRO packet to the destination so the TCP sequence numbers
    will be recalculated. I agree gso_size is just an estimation of the real
    MSS on the first n-1 packets and we can have a suboptimal MSS size on
    egress segmented traffic, but the hw does not provide this info.

- Does this LRO processing need to initialize CHECKSUM_PARTIAL metadata?
  - ack, I will work on it

> +
> +	return 0;
> +}
> +
>  static int airoha_qdma_rx_process(struct airoha_queue *q, int budget)
>  {
>  	enum dma_data_direction dir = page_pool_get_dma_dir(q->page_pool);

[...]

>  
> +static void airoha_update_netdev_features(struct airoha_gdm_dev *dev)
> +{
> +	struct airoha_eth *eth = dev->eth;
> +	int i, j;
> +
> +	for (i = 0; i < ARRAY_SIZE(eth->ports); i++) {
> +		struct airoha_gdm_port *port = eth->ports[i];
> +
> +		if (!port)
> +			continue;
> +
> +		for (j = 0; j < ARRAY_SIZE(port->devs); j++) {
> +			struct airoha_gdm_dev *iter_dev = port->devs[j];
> +			struct net_device *netdev;
> +
> +			if (!iter_dev || iter_dev == dev)
> +				continue;
> +
> +			if (iter_dev->qdma != dev->qdma)
> +				continue;
> +
> +			netdev = netdev_from_priv(iter_dev);
> +			if (netdev->reg_state != NETREG_REGISTERED)
> +				continue;
> +
> +			netdev_update_features(netdev);
> +		}
> +	}
> +}
> +
>  static int airoha_dev_open(struct net_device *netdev)
>  {
>  	int err, len = ETH_HLEN + netdev->mtu + ETH_FCS_LEN;
> @@ -1778,6 +1970,18 @@ static int airoha_dev_open(struct net_device *netdev)
>  	struct airoha_gdm_port *port = dev->port;
>  	u32 cur_len, pse_port = FE_PSE_PORT_PPE1;
>  	struct airoha_qdma *qdma = dev->qdma;
> +	int qdma_id = qdma - &qdma->eth->qdma[0];
> +
> +	/* HW LRO is configured on the QDMA and it is shared between
> +	 * all the devices using it. Refuse to open a second device on
> +	 * the same QDMA if LRO is enabled on any device sharing it.
> +	 */
> +	if (atomic_read(&qdma->users) &&
> +	    airoha_fe_lro_is_enabled(qdma->eth, qdma_id)) {
> +		netdev_warn(netdev, "required to disable LRO on QDMA%d\n",
> +			    qdma_id);
> +		return -EBUSY;
> +	}

- Can a sibling interface that is DOWN bypass this LRO mutual exclusion check?
  - AFAIU the kernel does not run the ndo_set_features() when the device is
    down, just set the requested feature. When we bring up eth1, ndo_open()
    callback will return an error.

Regards,
Lorenzo

>  
>  	netif_tx_start_all_queues(netdev);
>  	err = airoha_set_vip_for_gdm_port(dev, true);
> @@ -1817,6 +2021,8 @@ static int airoha_dev_open(struct net_device *netdev)
>  	airoha_set_gdm_port_fwd_cfg(qdma->eth, REG_GDM_FWD_CFG(port->id),
>  				    pse_port);
>  
> +	airoha_update_netdev_features(dev);
> +
>  	return 0;
>  }
>  
> @@ -1876,6 +2082,8 @@ static int airoha_dev_stop(struct net_device *netdev)
>  		}
>  	}
>  
> +	airoha_update_netdev_features(dev);
> +
>  	return 0;
>  }
>  
> @@ -2154,6 +2362,56 @@ int airoha_get_fe_port(struct airoha_gdm_dev *dev)
>  	}
>  }
>  
> +static netdev_features_t airoha_dev_fix_features(struct net_device *netdev,
> +						 netdev_features_t features)
> +{
> +	struct airoha_gdm_dev *dev = netdev_priv(netdev);
> +	struct airoha_qdma *qdma = dev->qdma;
> +
> +	if (atomic_read(&qdma->users) > 1)
> +		features &= ~NETIF_F_LRO;
> +
> +	return features;
> +}
> +
> +static int airoha_dev_set_features(struct net_device *netdev,
> +				   netdev_features_t features)
> +{
> +	netdev_features_t diff = netdev->features ^ features;
> +	struct airoha_gdm_dev *dev = netdev_priv(netdev);
> +	struct airoha_qdma *qdma = dev->qdma;
> +	struct airoha_eth *eth = qdma->eth;
> +	int qdma_id = qdma - &eth->qdma[0];
> +
> +	if (!(diff & NETIF_F_LRO))
> +		return 0;
> +
> +	if (features & NETIF_F_LRO) {
> +		int i, lro_queue_index = 0;
> +
> +		for (i = 0; i < ARRAY_SIZE(qdma->q_rx); i++) {
> +			struct airoha_queue *q = &qdma->q_rx[i];
> +			u32 size;
> +
> +			if (!q->ndesc)
> +				continue;
> +
> +			if (!airoha_qdma_is_lro_queue(q))
> +				continue;
> +
> +			size = SKB_WITH_OVERHEAD(AIROHA_RX_LEN(q->buf_size));
> +			size = min_t(u32, size, CDM_LRO_AGG_SIZE_MASK);
> +			airoha_fe_lro_init_rx_queue(eth, qdma_id,
> +						    lro_queue_index, i, size);
> +			lro_queue_index++;
> +		}
> +	} else {
> +		airoha_fe_lro_disable(eth, qdma_id);
> +	}
> +
> +	return 0;
> +}
> +
>  static netdev_tx_t airoha_dev_xmit(struct sk_buff *skb,
>  				   struct net_device *netdev)
>  {
> @@ -3082,6 +3340,8 @@ static const struct net_device_ops airoha_netdev_ops = {
>  	.ndo_stop		= airoha_dev_stop,
>  	.ndo_change_mtu		= airoha_dev_change_mtu,
>  	.ndo_select_queue	= airoha_dev_select_queue,
> +	.ndo_fix_features	= airoha_dev_fix_features,
> +	.ndo_set_features	= airoha_dev_set_features,
>  	.ndo_start_xmit		= airoha_dev_xmit,
>  	.ndo_get_stats64        = airoha_dev_get_stats64,
>  	.ndo_set_mac_address	= airoha_dev_set_macaddr,
> @@ -3169,11 +3429,9 @@ static int airoha_alloc_gdm_device(struct airoha_eth *eth,
>  	netdev->ethtool_ops = &airoha_ethtool_ops;
>  	netdev->max_mtu = AIROHA_MAX_MTU;
>  	netdev->watchdog_timeo = 5 * HZ;
> -	netdev->hw_features = NETIF_F_IP_CSUM | NETIF_F_RXCSUM | NETIF_F_TSO6 |
> -			      NETIF_F_IPV6_CSUM | NETIF_F_SG | NETIF_F_TSO |
> -			      NETIF_F_HW_TC;
> -	netdev->features |= netdev->hw_features;
> -	netdev->vlan_features = netdev->hw_features;
> +	netdev->hw_features = AIROHA_HW_FEATURES | NETIF_F_LRO;
> +	netdev->features |= AIROHA_HW_FEATURES;
> +	netdev->vlan_features = AIROHA_HW_FEATURES;
>  	SET_NETDEV_DEV(netdev, eth->dev);
>  
>  	/* reserve hw queues for HTB offloading */
> diff --git a/drivers/net/ethernet/airoha/airoha_eth.h b/drivers/net/ethernet/airoha/airoha_eth.h
> index 8f42973f9cf5..e78ef751f244 100644
> --- a/drivers/net/ethernet/airoha/airoha_eth.h
> +++ b/drivers/net/ethernet/airoha/airoha_eth.h
> @@ -44,6 +44,18 @@
>  	 (_n) == 15 ? 128 :		\
>  	 (_n) ==  0 ? 1024 : 16)
>  
> +#define AIROHA_LRO_PAGE_ORDER		order_base_2(SZ_16K / PAGE_SIZE)
> +#define AIROHA_MAX_NUM_LRO_QUEUES	8
> +#define AIROHA_RXQ_LRO_EN_MASK		GENMASK(31, 24)
> +#define AIROHA_RXQ_LRO_MAX_AGG_COUNT	64
> +#define AIROHA_RXQ_LRO_MAX_AGG_TIME	100
> +#define AIROHA_RXQ_LRO_MAX_AGE_TIME	2000
> +
> +#define AIROHA_HW_FEATURES			\
> +	(NETIF_F_IP_CSUM | NETIF_F_RXCSUM |	\
> +	 NETIF_F_TSO6 | NETIF_F_IPV6_CSUM |	\
> +	 NETIF_F_SG | NETIF_F_TSO | NETIF_F_HW_TC)
> +
>  #define PSE_RSV_PAGES			128
>  #define PSE_QUEUE_RSV_PAGES		64
>  
> @@ -672,6 +684,18 @@ static inline bool airoha_is_7583(struct airoha_eth *eth)
>  	return eth->soc->version == 0x7583;
>  }
>  
> +static inline bool airoha_qdma_is_lro_queue(struct airoha_queue *q)
> +{
> +	struct airoha_qdma *qdma = q->qdma;
> +	int qid = q - &qdma->q_rx[0];
> +
> +	/* EN7581 SoC supports at most 8 LRO rx queues */
> +	BUILD_BUG_ON(hweight32(AIROHA_RXQ_LRO_EN_MASK) >
> +		     AIROHA_MAX_NUM_LRO_QUEUES);
> +
> +	return !!(AIROHA_RXQ_LRO_EN_MASK & BIT(qid));
> +}
> +
>  int airoha_get_fe_port(struct airoha_gdm_dev *dev);
>  bool airoha_is_valid_gdm_dev(struct airoha_eth *eth,
>  			     struct airoha_gdm_dev *dev);
> diff --git a/drivers/net/ethernet/airoha/airoha_regs.h b/drivers/net/ethernet/airoha/airoha_regs.h
> index 436f3c8779c1..dfc786583774 100644
> --- a/drivers/net/ethernet/airoha/airoha_regs.h
> +++ b/drivers/net/ethernet/airoha/airoha_regs.h
> @@ -122,6 +122,20 @@
>  #define CDM_CRSN_QSEL_REASON_MASK(_n)	\
>  	GENMASK(4 + (((_n) % 4) << 3),	(((_n) % 4) << 3))
>  
> +#define REG_CDM_LRO_RXQ(_n, _m)		(CDM_BASE(_n) + 0x78 + ((_m) & 0x4))
> +#define LRO_RXQ_MASK(_n)		GENMASK(4 + (((_n) & 0x3) << 3), ((_n) & 0x3) << 3)
> +
> +#define REG_CDM_LRO_EN(_n)		(CDM_BASE(_n) + 0x80)
> +#define LRO_RXQ_EN_MASK			GENMASK(7, 0)
> +
> +#define REG_CDM_LRO_LIMIT(_n)		(CDM_BASE(_n) + 0x84)
> +#define CDM_LRO_AGG_NUM_MASK		GENMASK(23, 16)
> +#define CDM_LRO_AGG_SIZE_MASK		GENMASK(15, 0)
> +
> +#define REG_CDM_LRO_AGE_TIME(_n)	(CDM_BASE(_n) + 0x88)
> +#define CDM_LRO_AGE_TIME_MASK		GENMASK(31, 16)
> +#define CDM_LRO_AGG_TIME_MASK		GENMASK(15, 0)
> +
>  #define REG_GDM_FWD_CFG(_n)		GDM_BASE(_n)
>  #define GDM_PAD_EN_MASK			BIT(28)
>  #define GDM_DROP_CRC_ERR_MASK		BIT(23)
> @@ -883,9 +897,15 @@
>  #define QDMA_ETH_RXMSG_SPORT_MASK	GENMASK(25, 21)
>  #define QDMA_ETH_RXMSG_CRSN_MASK	GENMASK(20, 16)
>  #define QDMA_ETH_RXMSG_PPE_ENTRY_MASK	GENMASK(15, 0)
> +/* RX MSG2 */
> +#define QDMA_ETH_RXMSG_AGG_COUNT_MASK	GENMASK(31, 24)
> +#define QDMA_ETH_RXMSG_L2_LEN_MASK	GENMASK(6, 0)
> +/* RX MSG3 */
> +#define QDMA_ETH_RXMSG_AGG_LEN_MASK	GENMASK(31, 16)
> +#define QDMA_ETH_RXMSG_TCP_WIN_MASK	GENMASK(15, 0)
>  
>  struct airoha_qdma_desc {
> -	__le32 rsv;
> +	__le32 tcp_ts_reply;
>  	__le32 ctrl;
>  	__le32 addr;
>  	__le32 data;
> 
> ---
> base-commit: 660a9e399ab02c0cb86d277ed6b0c9d10c350fdd
> change-id: 20260520-airoha-eth-lro-a5d1c3631811
> 
> Best regards,
> -- 
> Lorenzo Bianconi <lorenzo@kernel.org>
> 

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 228 bytes --]

^ permalink raw reply	[flat|nested] 3+ messages in thread

* Re: [PATCH net-next v2] net: airoha: Add TCP LRO support
  2026-06-10 15:00 [PATCH net-next v2] net: airoha: Add TCP LRO support Lorenzo Bianconi
  2026-06-12  9:15 ` Lorenzo Bianconi
@ 2026-06-12 10:05 ` Lorenzo Bianconi
  1 sibling, 0 replies; 3+ messages in thread
From: Lorenzo Bianconi @ 2026-06-12 10:05 UTC (permalink / raw)
  To: Andrew Lunn, David S. Miller, Eric Dumazet, Jakub Kicinski,
	Paolo Abeni
  Cc: Alexander Lobakin, linux-arm-kernel, linux-mediatek, netdev,
	Madhur Agrawal

[-- Attachment #1: Type: text/plain, Size: 16917 bytes --]

> Add hardware TCP Large Receive Offload (LRO) support to the airoha_eth
> driver, leveraging the EN7581/AN7583 SoC's 8 dedicated LRO hardware queues
> mapped to RX queues 24–31. LRO hw offloading does not support
> Scatter-Gather (SG) so it is required to increase the page_pool allocation
> order to 2 for RX queues 24–31 (LRO queues).
> Since HW LRO is configured per-QDMA and shared across all devices using
> it, LRO is mutually exclusive with multiple active devices on the same
> QDMA block. Call netdev_update_features() on sibling devices in
> ndo_open/ndo_stop so that NETIF_F_LRO availability is re-evaluated when
> the QDMA user count changes.
> 
> Performance comparison between GRO and hw LRO has been carried out using
> a 10Gbps NIC:
> GRO: ~2.7 Gbps
> LRO: ~8.1 Gbps
> 
> Tested-by: Madhur Agrawal <madhur.agrawal@airoha.com>
> Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>

commenting on sashiko's report:
https://netdev-ai.bots.linux.dev/sashiko/#/patchset/20260610-airoha-eth-lro-v2-1-54be99b9a2d5%40kernel.org

[...]

>  
> +static int airoha_qdma_lro_rx_process(struct sk_buff *skb,
> +				      struct airoha_qdma_desc *desc)
> +{
> +	u32 desc_ctrl = le32_to_cpu(READ_ONCE(desc->ctrl));
> +	u32 len, th_off, tcp_ack_seq, agg_count, data_off;
> +	struct skb_shared_info *shinfo = skb_shinfo(skb);
> +	u32 msg1 = le32_to_cpu(READ_ONCE(desc->msg1));
> +	u32 msg2 = le32_to_cpu(READ_ONCE(desc->msg2));
> +	u32 msg3 = le32_to_cpu(READ_ONCE(desc->msg3));
> +	u16 tcp_win, l2_len;
> +	struct tcphdr *th;
> +	bool ipv4, ipv6;
> +
> +	agg_count = FIELD_GET(QDMA_ETH_RXMSG_AGG_COUNT_MASK, msg2);
> +	if (agg_count <= 1)
> +		return 0;
> +
> +	ipv4 = FIELD_GET(QDMA_ETH_RXMSG_IP4_MASK, msg1);
> +	ipv6 = FIELD_GET(QDMA_ETH_RXMSG_IP6_MASK, msg1);
> +	if (!ipv4 && !ipv6)
> +		return -EOPNOTSUPP;
> +
> +	l2_len = FIELD_GET(QDMA_ETH_RXMSG_L2_LEN_MASK, msg2);
> +	len = FIELD_GET(QDMA_DESC_LEN_MASK, desc_ctrl);
> +	if (ipv4) {
> +		struct iphdr *iph, _iph;
> +
> +		iph = skb_header_pointer(skb, l2_len, sizeof(*iph), &_iph);
> +		if (!iph)
> +			return -EINVAL;
> +
> +		if (iph->protocol != IPPROTO_TCP)
> +			return -EOPNOTSUPP;
> +
> +		if (iph->ihl < 5)
> +			return -EINVAL;
> +
> +		th_off = l2_len + (iph->ihl << 2);
> +		if (!pskb_may_pull(skb, th_off))
> +			return -EINVAL;
> +
> +		iph = (struct iphdr *)(skb->data + l2_len);
> +		iph->tot_len = cpu_to_be16(len - l2_len);
> +		iph->check = 0;
> +		iph->check = ip_fast_csum((void *)iph, iph->ihl);
> +	} else {
> +		struct ipv6hdr *ip6h;
> +
> +		th_off = l2_len + sizeof(*ip6h);
> +		if (!pskb_may_pull(skb, th_off))
> +			return -EINVAL;
> +
> +		ip6h = (struct ipv6hdr *)(skb->data + l2_len);
> +		if (ip6h->nexthdr != NEXTHDR_TCP)
> +			return -EOPNOTSUPP;
> +
> +		ip6h->payload_len = cpu_to_be16(len - th_off);
> +	}

- The IPv6 path only checks ip6h->nexthdr != NEXTHDR_TCP and does not walk
  extension headers (HBH, Routing, Fragment, Destination Options). 
  Can this misclassify any IPv6 packet that the HW happens to mark with
  agg_count > 1 and includes extension headers?
  - AFAIK the hw does not aggregate IPv6 packets with extensions headers

- Similarly, the IPv4 path does not consult QDMA_ETH_RXMSG_IP4F_MASK, so
  IPv4 fragments that the HW reports as aggregated would also fall through
  the protocol check.
  - AFAIK the hw does not aggregate IPv4 fragments

- A related observation: this routine is invoked unconditionally for every
  RX packet from airoha_qdma_rx_process(), with no airoha_qdma_is_lro_queue(q)
  gate.  Should non-LRO RX queues skip this entirely so that HW corner
  cases on those queues cannot accidentally take this path?
  - LRO will just return agg_count = 1 for non LRO queues.

> +
> +	tcp_win = FIELD_GET(QDMA_ETH_RXMSG_TCP_WIN_MASK, msg3);
> +	tcp_ack_seq = le32_to_cpu(READ_ONCE(desc->data));
> +
> +	if (!pskb_may_pull(skb, th_off + sizeof(*th)))
> +		return -EINVAL;
> +
> +	th = (struct tcphdr *)(skb->data + th_off);
> +	data_off = th_off + (th->doff << 2);
> +	if (len <= data_off)
> +		return -EINVAL;
> +
> +	th->ack_seq = cpu_to_be32(tcp_ack_seq);
> +	th->window = cpu_to_be16(tcp_win);
> +
> +	/* Check tcp timestamp option */
> +	if (th->doff == (sizeof(*th) + TCPOLEN_TSTAMP_ALIGNED) / 4) {
> +		u32 topt;
> +
> +		if (!pskb_may_pull(skb, data_off))
> +			return -EINVAL;
> +
> +		th = (struct tcphdr *)(skb->data + th_off);
> +		topt = get_unaligned_be32(th + 1);
> +		if (topt == ((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
> +			     (TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP)) {
> +			u8 *ptr = (u8 *)th + sizeof(*th) + 2 * sizeof(__be32);
> +			__le32 tcp_ts_reply = READ_ONCE(desc->tcp_ts_reply);
> +
> +			put_unaligned_be32(le32_to_cpu(tcp_ts_reply), ptr);
> +		}
> +	}

- Is the timestamp echo-reply refresh intentionally limited to the exact
  doff == 8 case (12 bytes of options consisting of NOP/NOP/Timestamp)?
  - AFAIU HW LRO can only aggregate TCP segments that belong to the same flow
    and have the same TCP option layout across all segments.
    The HW can't aggregate segments with variable layout (e.g. SACK) and they
    will reported with agg_count sets to 1.

> +
> +	shinfo->gso_type = ipv4 ? SKB_GSO_TCPV4 : SKB_GSO_TCPV6;
> +	shinfo->gso_size = (len - data_off) / agg_count;
> +	shinfo->gso_segs = agg_count;
> +
> +	return 0;
> +}
> +
>  static int airoha_qdma_rx_process(struct airoha_queue *q, int budget)
>  {
>  	enum dma_data_direction dir = page_pool_get_dma_dir(q->page_pool);
> @@ -694,9 +836,17 @@ static int airoha_qdma_rx_process(struct airoha_queue *q, int budget)
>  			__skb_put(q->skb, len);
>  			skb_mark_for_recycle(q->skb);
>  			q->skb->dev = netdev;
> -			q->skb->protocol = eth_type_trans(q->skb, netdev);
>  			q->skb->ip_summed = CHECKSUM_UNNECESSARY;
>  			skb_record_rx_queue(q->skb, qid);
> +
> +			if (airoha_qdma_lro_rx_process(q->skb, desc) < 0) {
> +				netdev->stats.rx_dropped++;
> +				dev_kfree_skb(q->skb);
> +				q->skb = NULL;
> +				continue;
> +			}

- On a non-zero return from airoha_qdma_lro_rx_process(), this drops the
  entire packet and bumps rx_dropped.  Several of the early returns are
  -EOPNOTSUPP for "this packet is not plain TCPv4/TCPv6", which previously
  would have been delivered up the stack normally.
  Would treating -EOPNOTSUPP as "deliver without GSO marking" be safer
  than dropping?  Otherwise any HW corner case that sets agg_count > 1
  on a non-TCP packet becomes a silent drop visible only as rx_dropped.
  - This is not doable since the hardware has already merged multiple packets
    into one, the payload contains data from multiple original segments and
    headers have been rewritten.

- A second concern in the same path: on non-LRO scatter-gather queues,
  if a multi-fragment packet's first buffer is dropped here, q->skb is
  left NULL and the next descriptor's buffer enters the "first buffer"
  branch even though it is actually a continuation fragment.  Can this
  lead to the next fragment being treated as a new packet head?
  - This has not been introduced by this patch.

> +
> +			q->skb->protocol = eth_type_trans(q->skb, netdev);

[...]

> +static void airoha_update_netdev_features(struct airoha_gdm_dev *dev)
> +{
> +	struct airoha_eth *eth = dev->eth;
> +	int i, j;
> +
> +	for (i = 0; i < ARRAY_SIZE(eth->ports); i++) {
> +		struct airoha_gdm_port *port = eth->ports[i];
> +
> +		if (!port)
> +			continue;
> +
> +		for (j = 0; j < ARRAY_SIZE(port->devs); j++) {
> +			struct airoha_gdm_dev *iter_dev = port->devs[j];
> +			struct net_device *netdev;
> +
> +			if (!iter_dev || iter_dev == dev)
> +				continue;
> +
> +			if (iter_dev->qdma != dev->qdma)
> +				continue;
> +
> +			netdev = netdev_from_priv(iter_dev);
> +			if (netdev->reg_state != NETREG_REGISTERED)
> +				continue;
> +
> +			netdev_update_features(netdev);
> +		}
> +	}
> +}
> +
>  static int airoha_dev_open(struct net_device *netdev)
>  {
>  	int err, len = ETH_HLEN + netdev->mtu + ETH_FCS_LEN;
> @@ -1778,6 +1970,18 @@ static int airoha_dev_open(struct net_device *netdev)
>  	struct airoha_gdm_port *port = dev->port;
>  	u32 cur_len, pse_port = FE_PSE_PORT_PPE1;
>  	struct airoha_qdma *qdma = dev->qdma;
> +	int qdma_id = qdma - &qdma->eth->qdma[0];
> +
> +	/* HW LRO is configured on the QDMA and it is shared between
> +	 * all the devices using it. Refuse to open a second device on
> +	 * the same QDMA if LRO is enabled on any device sharing it.
> +	 */
> +	if (atomic_read(&qdma->users) &&
> +	    airoha_fe_lro_is_enabled(qdma->eth, qdma_id)) {
> +		netdev_warn(netdev, "required to disable LRO on QDMA%d\n",
> +			    qdma_id);
> +		return -EBUSY;
> +	}

- Consider this sequence on a single QDMA:
    1. devA opens, devA enables NETIF_F_LRO via ethtool
       -> airoha_fe_lro_init_rx_queue() programs REG_CDM_LRO_EN
    2. devA closes -> airoha_dev_stop() runs
       airoha_dev_stop() does not call airoha_fe_lro_disable() when
       atomic_dec_and_test(&qdma->users) succeeds, so REG_CDM_LRO_EN bits
       remain set in HW after the last user goes away.
    3. devB opens on the same QDMA
       The guard above evaluates atomic_read(&qdma->users) == 0 first, so the
       EBUSY branch is short-circuited and devB opens successfully even though
       the HW is still in LRO mode.  devB will then receive HW-aggregated GSO
       packets despite NETIF_F_LRO not being set on its features.
    4. devA tries to reopen
       Now atomic_read(&qdma->users) == 1 and airoha_fe_lro_is_enabled() still
       returns true, so devA gets EBUSY even though the only "LRO user" is
       stale HW state from its previous incarnation.
       Should airoha_dev_stop() call airoha_fe_lro_disable() when the QDMA
       user count drops to zero, or should the open path reset stale LRO state
       when users == 0?
    - This issue is valid and I will fix in the next revision.

>  
>  	netif_tx_start_all_queues(netdev);
>  	err = airoha_set_vip_for_gdm_port(dev, true);
> @@ -1817,6 +2021,8 @@ static int airoha_dev_open(struct net_device *netdev)
>  	airoha_set_gdm_port_fwd_cfg(qdma->eth, REG_GDM_FWD_CFG(port->id),
>  				    pse_port);
>  
> +	airoha_update_netdev_features(dev);
> +
>  	return 0;
>  }
>  
> @@ -1876,6 +2082,8 @@ static int airoha_dev_stop(struct net_device *netdev)
>  		}
>  	}
>  
> +	airoha_update_netdev_features(dev);
> +
>  	return 0;
>  }
>  
> @@ -2154,6 +2362,56 @@ int airoha_get_fe_port(struct airoha_gdm_dev *dev)
>  	}
>  }
>  
> +static netdev_features_t airoha_dev_fix_features(struct net_device *netdev,
> +						 netdev_features_t features)
> +{
> +	struct airoha_gdm_dev *dev = netdev_priv(netdev);
> +	struct airoha_qdma *qdma = dev->qdma;
> +
> +	if (atomic_read(&qdma->users) > 1)
> +		features &= ~NETIF_F_LRO;
> +
> +	return features;
> +}
> +
> +static int airoha_dev_set_features(struct net_device *netdev,
> +				   netdev_features_t features)
> +{
> +	netdev_features_t diff = netdev->features ^ features;
> +	struct airoha_gdm_dev *dev = netdev_priv(netdev);
> +	struct airoha_qdma *qdma = dev->qdma;
> +	struct airoha_eth *eth = qdma->eth;
> +	int qdma_id = qdma - &eth->qdma[0];
> +
> +	if (!(diff & NETIF_F_LRO))
> +		return 0;
> +
> +	if (features & NETIF_F_LRO) {
> +		int i, lro_queue_index = 0;
> +
> +		for (i = 0; i < ARRAY_SIZE(qdma->q_rx); i++) {
> +			struct airoha_queue *q = &qdma->q_rx[i];
> +			u32 size;
> +
> +			if (!q->ndesc)
> +				continue;
> +
> +			if (!airoha_qdma_is_lro_queue(q))
> +				continue;
> +
> +			size = SKB_WITH_OVERHEAD(AIROHA_RX_LEN(q->buf_size));
> +			size = min_t(u32, size, CDM_LRO_AGG_SIZE_MASK);
> +			airoha_fe_lro_init_rx_queue(eth, qdma_id,
> +						    lro_queue_index, i, size);
> +			lro_queue_index++;
> +		}
> +	} else {
> +		airoha_fe_lro_disable(eth, qdma_id);
> +	}
> +
> +	return 0;
> +}
> +
>  static netdev_tx_t airoha_dev_xmit(struct sk_buff *skb,
>  				   struct net_device *netdev)
>  {
> @@ -3082,6 +3340,8 @@ static const struct net_device_ops airoha_netdev_ops = {
>  	.ndo_stop		= airoha_dev_stop,
>  	.ndo_change_mtu		= airoha_dev_change_mtu,
>  	.ndo_select_queue	= airoha_dev_select_queue,
> +	.ndo_fix_features	= airoha_dev_fix_features,
> +	.ndo_set_features	= airoha_dev_set_features,
>  	.ndo_start_xmit		= airoha_dev_xmit,
>  	.ndo_get_stats64        = airoha_dev_get_stats64,
>  	.ndo_set_mac_address	= airoha_dev_set_macaddr,
> @@ -3169,11 +3429,9 @@ static int airoha_alloc_gdm_device(struct airoha_eth *eth,
>  	netdev->ethtool_ops = &airoha_ethtool_ops;
>  	netdev->max_mtu = AIROHA_MAX_MTU;
>  	netdev->watchdog_timeo = 5 * HZ;
> -	netdev->hw_features = NETIF_F_IP_CSUM | NETIF_F_RXCSUM | NETIF_F_TSO6 |
> -			      NETIF_F_IPV6_CSUM | NETIF_F_SG | NETIF_F_TSO |
> -			      NETIF_F_HW_TC;
> -	netdev->features |= netdev->hw_features;
> -	netdev->vlan_features = netdev->hw_features;
> +	netdev->hw_features = AIROHA_HW_FEATURES | NETIF_F_LRO;
> +	netdev->features |= AIROHA_HW_FEATURES;
> +	netdev->vlan_features = AIROHA_HW_FEATURES;
>  	SET_NETDEV_DEV(netdev, eth->dev);

- Is NETIF_F_LRO the right feature flag here?
  - I guess it is ok to use NETIF_F_GRO_HW here. I will work on it.

Regards,
Lorenzo

>  
>  	/* reserve hw queues for HTB offloading */
> diff --git a/drivers/net/ethernet/airoha/airoha_eth.h b/drivers/net/ethernet/airoha/airoha_eth.h
> index 8f42973f9cf5..e78ef751f244 100644
> --- a/drivers/net/ethernet/airoha/airoha_eth.h
> +++ b/drivers/net/ethernet/airoha/airoha_eth.h
> @@ -44,6 +44,18 @@
>  	 (_n) == 15 ? 128 :		\
>  	 (_n) ==  0 ? 1024 : 16)
>  
> +#define AIROHA_LRO_PAGE_ORDER		order_base_2(SZ_16K / PAGE_SIZE)
> +#define AIROHA_MAX_NUM_LRO_QUEUES	8
> +#define AIROHA_RXQ_LRO_EN_MASK		GENMASK(31, 24)
> +#define AIROHA_RXQ_LRO_MAX_AGG_COUNT	64
> +#define AIROHA_RXQ_LRO_MAX_AGG_TIME	100
> +#define AIROHA_RXQ_LRO_MAX_AGE_TIME	2000
> +
> +#define AIROHA_HW_FEATURES			\
> +	(NETIF_F_IP_CSUM | NETIF_F_RXCSUM |	\
> +	 NETIF_F_TSO6 | NETIF_F_IPV6_CSUM |	\
> +	 NETIF_F_SG | NETIF_F_TSO | NETIF_F_HW_TC)
> +
>  #define PSE_RSV_PAGES			128
>  #define PSE_QUEUE_RSV_PAGES		64
>  
> @@ -672,6 +684,18 @@ static inline bool airoha_is_7583(struct airoha_eth *eth)
>  	return eth->soc->version == 0x7583;
>  }
>  
> +static inline bool airoha_qdma_is_lro_queue(struct airoha_queue *q)
> +{
> +	struct airoha_qdma *qdma = q->qdma;
> +	int qid = q - &qdma->q_rx[0];
> +
> +	/* EN7581 SoC supports at most 8 LRO rx queues */
> +	BUILD_BUG_ON(hweight32(AIROHA_RXQ_LRO_EN_MASK) >
> +		     AIROHA_MAX_NUM_LRO_QUEUES);
> +
> +	return !!(AIROHA_RXQ_LRO_EN_MASK & BIT(qid));
> +}
> +
>  int airoha_get_fe_port(struct airoha_gdm_dev *dev);
>  bool airoha_is_valid_gdm_dev(struct airoha_eth *eth,
>  			     struct airoha_gdm_dev *dev);
> diff --git a/drivers/net/ethernet/airoha/airoha_regs.h b/drivers/net/ethernet/airoha/airoha_regs.h
> index 436f3c8779c1..dfc786583774 100644
> --- a/drivers/net/ethernet/airoha/airoha_regs.h
> +++ b/drivers/net/ethernet/airoha/airoha_regs.h
> @@ -122,6 +122,20 @@
>  #define CDM_CRSN_QSEL_REASON_MASK(_n)	\
>  	GENMASK(4 + (((_n) % 4) << 3),	(((_n) % 4) << 3))
>  
> +#define REG_CDM_LRO_RXQ(_n, _m)		(CDM_BASE(_n) + 0x78 + ((_m) & 0x4))
> +#define LRO_RXQ_MASK(_n)		GENMASK(4 + (((_n) & 0x3) << 3), ((_n) & 0x3) << 3)
> +
> +#define REG_CDM_LRO_EN(_n)		(CDM_BASE(_n) + 0x80)
> +#define LRO_RXQ_EN_MASK			GENMASK(7, 0)
> +
> +#define REG_CDM_LRO_LIMIT(_n)		(CDM_BASE(_n) + 0x84)
> +#define CDM_LRO_AGG_NUM_MASK		GENMASK(23, 16)
> +#define CDM_LRO_AGG_SIZE_MASK		GENMASK(15, 0)
> +
> +#define REG_CDM_LRO_AGE_TIME(_n)	(CDM_BASE(_n) + 0x88)
> +#define CDM_LRO_AGE_TIME_MASK		GENMASK(31, 16)
> +#define CDM_LRO_AGG_TIME_MASK		GENMASK(15, 0)
> +
>  #define REG_GDM_FWD_CFG(_n)		GDM_BASE(_n)
>  #define GDM_PAD_EN_MASK			BIT(28)
>  #define GDM_DROP_CRC_ERR_MASK		BIT(23)
> @@ -883,9 +897,15 @@
>  #define QDMA_ETH_RXMSG_SPORT_MASK	GENMASK(25, 21)
>  #define QDMA_ETH_RXMSG_CRSN_MASK	GENMASK(20, 16)
>  #define QDMA_ETH_RXMSG_PPE_ENTRY_MASK	GENMASK(15, 0)
> +/* RX MSG2 */
> +#define QDMA_ETH_RXMSG_AGG_COUNT_MASK	GENMASK(31, 24)
> +#define QDMA_ETH_RXMSG_L2_LEN_MASK	GENMASK(6, 0)
> +/* RX MSG3 */
> +#define QDMA_ETH_RXMSG_AGG_LEN_MASK	GENMASK(31, 16)
> +#define QDMA_ETH_RXMSG_TCP_WIN_MASK	GENMASK(15, 0)
>  
>  struct airoha_qdma_desc {
> -	__le32 rsv;
> +	__le32 tcp_ts_reply;
>  	__le32 ctrl;
>  	__le32 addr;
>  	__le32 data;
> 
> ---
> base-commit: 660a9e399ab02c0cb86d277ed6b0c9d10c350fdd
> change-id: 20260520-airoha-eth-lro-a5d1c3631811
> 
> Best regards,
> -- 
> Lorenzo Bianconi <lorenzo@kernel.org>
> 

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 228 bytes --]

^ permalink raw reply	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2026-06-12 10:05 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2026-06-10 15:00 [PATCH net-next v2] net: airoha: Add TCP LRO support Lorenzo Bianconi
2026-06-12  9:15 ` Lorenzo Bianconi
2026-06-12 10:05 ` Lorenzo Bianconi

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox