* [PATCH RFC net-next v2] net: airoha: Add TCP LRO support
@ 2026-05-26 6:58 Lorenzo Bianconi
2026-05-26 16:01 ` Alexander Lobakin
0 siblings, 1 reply; 4+ messages in thread
From: Lorenzo Bianconi @ 2026-05-26 6:58 UTC (permalink / raw)
To: Andrew Lunn, David S. Miller, Eric Dumazet, Jakub Kicinski,
Paolo Abeni, Lorenzo Bianconi
Cc: linux-arm-kernel, linux-mediatek, netdev, Madhur Agrawal
Add hardware TCP Large Receive Offload (LRO) support to the airoha_eth
driver, leveraging the EN7581/AN7583 SoC's 8 dedicated LRO hardware queues
mapped to RX queues 24–31. LRO hw offloading does not support
Scatter-Gather (SG) so it is required to increase the page_pool allocation
order to 2 for RX queues 24–31 (LRO queues).
Performance comparison between GRO and hw LRO has been carried out using
a 10Gbps NIC:
GRO: ~2.7 Gbps
LRO: ~8.1 Gbps
Please note with respect to the previous implementation, page_pool
allocation order has been reduced from 5 to 2.
Tested-by: Madhur Agrawal <madhur.agrawal@airoha.com>
Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
---
Changes in RFC v2:
- Improve performances fixing buf_size computation.
- Fix possible overflow in REG_CDM_LRO_LIMIT() register configuration.
- Require the device to be not running before configuring LRO.
- Fix configuration order in airoha_fe_lro_is_enabled().
- Check skb header length in airoha_qdma_lro_rx_process().
- Do not check net_device feature in airoha_qdma_rx_process() before
executing airoha_qdma_lro_rx_process() but rely on
airoha_qdma_lro_rx_process() logic.
- Fix possible double recycle in airoha_qdma_rx_process() for LRO
packets.
- Always use AIROHA_RXQ_LRO_MAX_AGG_COUNT macro for max LRO aggregated
fragments in airoha_fe_lro_init_rx_queue().
- Link to v1: https://lore.kernel.org/r/20260520-airoha-eth-lro-v1-1-129cc33766e9@kernel.org
---
drivers/net/ethernet/airoha/airoha_eth.c | 217 +++++++++++++++++++++++++++---
drivers/net/ethernet/airoha/airoha_eth.h | 24 ++++
drivers/net/ethernet/airoha/airoha_regs.h | 22 ++-
3 files changed, 247 insertions(+), 16 deletions(-)
diff --git a/drivers/net/ethernet/airoha/airoha_eth.c b/drivers/net/ethernet/airoha/airoha_eth.c
index 6418fe0c9f80..c8e5abe8942a 100644
--- a/drivers/net/ethernet/airoha/airoha_eth.c
+++ b/drivers/net/ethernet/airoha/airoha_eth.c
@@ -12,6 +12,7 @@
#include <net/dst_metadata.h>
#include <net/page_pool/helpers.h>
#include <net/pkt_cls.h>
+#include <net/tcp.h>
#include <uapi/linux/ppp_defs.h>
#include "airoha_regs.h"
@@ -431,6 +432,48 @@ static void airoha_fe_crsn_qsel_init(struct airoha_eth *eth)
CDM_CRSN_QSEL_Q1));
}
+static void airoha_fe_lro_init_rx_queue(struct airoha_eth *eth, int qdma_id,
+ int lro_queue_index, int qid,
+ int buf_size)
+{
+ int id = qdma_id + 1;
+
+ airoha_fe_rmw(eth, REG_CDM_LRO_LIMIT(id),
+ CDM_LRO_AGG_NUM_MASK | CDM_LRO_AGG_SIZE_MASK,
+ FIELD_PREP(CDM_LRO_AGG_SIZE_MASK, buf_size) |
+ FIELD_PREP(CDM_LRO_AGG_NUM_MASK,
+ AIROHA_RXQ_LRO_MAX_AGG_COUNT));
+ airoha_fe_rmw(eth, REG_CDM_LRO_AGE_TIME(id),
+ CDM_LRO_AGE_TIME_MASK | CDM_LRO_AGG_TIME_MASK,
+ FIELD_PREP(CDM_LRO_AGE_TIME_MASK,
+ AIROHA_RXQ_LRO_MAX_AGE_TIME) |
+ FIELD_PREP(CDM_LRO_AGG_TIME_MASK,
+ AIROHA_RXQ_LRO_MAX_AGG_TIME));
+ airoha_fe_rmw(eth, REG_CDM_LRO_RXQ(id, lro_queue_index),
+ LRO_RXQ_MASK(lro_queue_index),
+ __field_prep(LRO_RXQ_MASK(lro_queue_index), qid));
+ airoha_fe_set(eth, REG_CDM_LRO_EN(id), BIT(lro_queue_index));
+}
+
+static void airoha_fe_lro_disable(struct airoha_eth *eth, int qdma_id)
+{
+ int i, id = qdma_id + 1;
+
+ airoha_fe_clear(eth, REG_CDM_LRO_EN(id), LRO_RXQ_EN_MASK);
+ airoha_fe_clear(eth, REG_CDM_LRO_LIMIT(id),
+ CDM_LRO_AGG_NUM_MASK | CDM_LRO_AGG_SIZE_MASK);
+ airoha_fe_clear(eth, REG_CDM_LRO_AGE_TIME(id),
+ CDM_LRO_AGE_TIME_MASK | CDM_LRO_AGG_TIME_MASK);
+ for (i = 0; i < AIROHA_MAX_NUM_LRO_QUEUES; i++)
+ airoha_fe_clear(eth, REG_CDM_LRO_RXQ(id, i), LRO_RXQ_MASK(i));
+}
+
+static bool airoha_fe_lro_is_enabled(struct airoha_eth *eth, int qdma_id)
+{
+ return airoha_fe_get(eth, REG_CDM_LRO_EN(qdma_id + 1),
+ LRO_RXQ_EN_MASK);
+}
+
static int airoha_fe_init(struct airoha_eth *eth)
{
airoha_fe_maccr_init(eth);
@@ -587,6 +630,85 @@ static int airoha_qdma_get_gdm_port(struct airoha_eth *eth,
return port >= ARRAY_SIZE(eth->ports) ? -EINVAL : port;
}
+static int airoha_qdma_lro_rx_process(struct airoha_queue *q,
+ struct airoha_qdma_desc *desc)
+{
+ u32 desc_ctrl = le32_to_cpu(READ_ONCE(desc->ctrl));
+ u32 msg1 = le32_to_cpu(READ_ONCE(desc->msg1));
+ u32 msg2 = le32_to_cpu(READ_ONCE(desc->msg2));
+ u32 msg3 = le32_to_cpu(READ_ONCE(desc->msg3));
+ struct sk_buff *skb = q->skb;
+ u32 len, th_off, tcp_ack_seq;
+ u16 tcp_win, l2_len;
+ struct tcphdr *th;
+ bool ipv4, ipv6;
+
+ if (FIELD_GET(QDMA_ETH_RXMSG_AGG_COUNT_MASK, msg2) <= 1)
+ return 0;
+
+ ipv4 = FIELD_GET(QDMA_ETH_RXMSG_IP4_MASK, msg1);
+ ipv6 = FIELD_GET(QDMA_ETH_RXMSG_IP6_MASK, msg1);
+ if (!ipv4 && !ipv6)
+ return -EOPNOTSUPP;
+
+ l2_len = FIELD_GET(QDMA_ETH_RXMSG_L2_LEN_MASK, msg2);
+ len = FIELD_GET(QDMA_DESC_LEN_MASK, desc_ctrl);
+ if (ipv4) {
+ struct iphdr *iph;
+
+ if (!pskb_may_pull(skb, l2_len + sizeof(*iph)))
+ return -EINVAL;
+
+ iph = (struct iphdr *)(skb->data + l2_len);
+ if (iph->protocol != IPPROTO_TCP)
+ return -EOPNOTSUPP;
+
+ iph->tot_len = cpu_to_be16(len - l2_len);
+ iph->check = 0;
+ iph->check = ip_fast_csum((void *)iph, iph->ihl);
+ th_off = l2_len + (iph->ihl << 2);
+ } else {
+ struct ipv6hdr *ip6h;
+
+ if (!pskb_may_pull(skb, l2_len + sizeof(*ip6h)))
+ return -EINVAL;
+
+ ip6h = (struct ipv6hdr *)(skb->data + l2_len);
+ if (ip6h->nexthdr != NEXTHDR_TCP)
+ return -EOPNOTSUPP;
+
+ ip6h->payload_len = cpu_to_be16(len - l2_len - sizeof(*ip6h));
+ th_off = l2_len + sizeof(*ip6h);
+ }
+
+ tcp_win = FIELD_GET(QDMA_ETH_RXMSG_TCP_WIN_MASK, msg3);
+ tcp_ack_seq = le32_to_cpu(READ_ONCE(desc->data));
+
+ if (!pskb_may_pull(skb, th_off + sizeof(*th)))
+ return -EINVAL;
+
+ th = (struct tcphdr *)(skb->data + th_off);
+ th->ack_seq = cpu_to_be32(tcp_ack_seq);
+ th->window = cpu_to_be16(tcp_win);
+
+ /* Check tcp timestamp option */
+ if (th->doff == (sizeof(*th) + TCPOLEN_TSTAMP_ALIGNED) / 4) {
+ __be32 *topt = (__be32 *)(th + 1);
+
+ if (*topt == cpu_to_be32((TCPOPT_NOP << 24) |
+ (TCPOPT_NOP << 16) |
+ (TCPOPT_TIMESTAMP << 8) |
+ TCPOLEN_TIMESTAMP)) {
+ __le32 tcp_ts_reply = READ_ONCE(desc->tcp_ts_reply);
+
+ put_unaligned_be32(le32_to_cpu(tcp_ts_reply),
+ topt + 2);
+ }
+ }
+
+ return 0;
+}
+
static int airoha_qdma_rx_process(struct airoha_queue *q, int budget)
{
enum dma_data_direction dir = page_pool_get_dma_dir(q->page_pool);
@@ -634,11 +756,15 @@ static int airoha_qdma_rx_process(struct airoha_queue *q, int budget)
skb_reserve(q->skb, AIROHA_RX_HEADROOM);
__skb_put(q->skb, len);
- skb_mark_for_recycle(q->skb);
q->skb->dev = port->dev;
- q->skb->protocol = eth_type_trans(q->skb, port->dev);
q->skb->ip_summed = CHECKSUM_UNNECESSARY;
skb_record_rx_queue(q->skb, qid);
+
+ if (airoha_qdma_lro_rx_process(q, desc) < 0)
+ goto free_frag;
+
+ q->skb->protocol = eth_type_trans(q->skb, port->dev);
+ skb_mark_for_recycle(q->skb);
} else { /* scattered frame */
struct skb_shared_info *shinfo = skb_shinfo(q->skb);
int nr_frags = shinfo->nr_frags;
@@ -727,23 +853,18 @@ static int airoha_qdma_rx_napi_poll(struct napi_struct *napi, int budget)
static int airoha_qdma_init_rx_queue(struct airoha_queue *q,
struct airoha_qdma *qdma, int ndesc)
{
- const struct page_pool_params pp_params = {
- .order = 0,
- .pool_size = 256,
+ struct page_pool_params pp_params = {
.flags = PP_FLAG_DMA_MAP | PP_FLAG_DMA_SYNC_DEV,
.dma_dir = DMA_FROM_DEVICE,
- .max_len = PAGE_SIZE,
.nid = NUMA_NO_NODE,
.dev = qdma->eth->dev,
.napi = &q->napi,
};
+ int pp_order, qid = q - &qdma->q_rx[0], thr;
struct airoha_eth *eth = qdma->eth;
- int qid = q - &qdma->q_rx[0], thr;
dma_addr_t dma_addr;
- q->buf_size = PAGE_SIZE / 2;
q->qdma = qdma;
-
q->entry = devm_kzalloc(eth->dev, ndesc * sizeof(*q->entry),
GFP_KERNEL);
if (!q->entry)
@@ -754,6 +875,11 @@ static int airoha_qdma_init_rx_queue(struct airoha_queue *q,
if (!q->desc)
return -ENOMEM;
+ pp_order = airoha_qdma_is_lro_queue(q) ? AIROHA_LRO_PAGE_ORDER : 0;
+ pp_params.order = pp_order;
+ pp_params.pool_size = 256;
+ pp_params.max_len = PAGE_SIZE << pp_order;
+
q->page_pool = page_pool_create(&pp_params);
if (IS_ERR(q->page_pool)) {
int err = PTR_ERR(q->page_pool);
@@ -762,6 +888,8 @@ static int airoha_qdma_init_rx_queue(struct airoha_queue *q,
return err;
}
+ q->buf_size = airoha_qdma_is_lro_queue(q) ? pp_params.max_len
+ : pp_params.max_len / 2;
q->ndesc = ndesc;
netif_napi_add(eth->napi_dev, &q->napi, airoha_qdma_rx_napi_poll);
@@ -1993,6 +2121,67 @@ int airoha_get_fe_port(struct airoha_gdm_port *port)
}
}
+static int airoha_dev_set_features(struct net_device *dev,
+ netdev_features_t features)
+{
+ netdev_features_t diff = dev->features ^ features;
+ struct airoha_gdm_port *port = netdev_priv(dev);
+ struct airoha_qdma *qdma = port->qdma;
+ struct airoha_eth *eth = qdma->eth;
+ int qdma_id = qdma - ð->qdma[0];
+ int i;
+
+ if (!(diff & NETIF_F_LRO))
+ return 0;
+
+ if (netif_running(dev))
+ return -EBUSY;
+
+ /* reset LRO configuration */
+ if (features & NETIF_F_LRO) {
+ int lro_queue_index = 0;
+
+ if (airoha_fe_lro_is_enabled(eth, qdma_id))
+ return 0;
+
+ for (i = 0; i < ARRAY_SIZE(qdma->q_rx); i++) {
+ struct airoha_queue *q = &qdma->q_rx[i];
+ u32 size;
+
+ if (!q->ndesc)
+ continue;
+
+ if (!airoha_qdma_is_lro_queue(q))
+ continue;
+
+ size = SKB_WITH_OVERHEAD(AIROHA_RX_LEN(q->buf_size));
+ size = min_t(u32, size, CDM_LRO_AGG_SIZE_MASK);
+ airoha_fe_lro_init_rx_queue(eth, qdma_id,
+ lro_queue_index, i, size);
+ lro_queue_index++;
+ }
+ } else {
+ for (i = 0; i < ARRAY_SIZE(eth->ports); i++) {
+ struct airoha_gdm_port *p = eth->ports[i];
+
+ if (!p)
+ continue;
+
+ if (p->qdma != qdma)
+ continue;
+
+ if (p->dev == dev)
+ continue;
+
+ if (p->dev->features & NETIF_F_LRO)
+ return 0;
+ }
+ airoha_fe_lro_disable(eth, qdma_id);
+ }
+
+ return 0;
+}
+
static netdev_tx_t airoha_dev_xmit(struct sk_buff *skb,
struct net_device *dev)
{
@@ -2892,6 +3081,7 @@ static const struct net_device_ops airoha_netdev_ops = {
.ndo_stop = airoha_dev_stop,
.ndo_change_mtu = airoha_dev_change_mtu,
.ndo_select_queue = airoha_dev_select_queue,
+ .ndo_set_features = airoha_dev_set_features,
.ndo_start_xmit = airoha_dev_xmit,
.ndo_get_stats64 = airoha_dev_get_stats64,
.ndo_set_mac_address = airoha_dev_set_macaddr,
@@ -2989,12 +3179,9 @@ static int airoha_alloc_gdm_port(struct airoha_eth *eth,
dev->ethtool_ops = &airoha_ethtool_ops;
dev->max_mtu = AIROHA_MAX_MTU;
dev->watchdog_timeo = 5 * HZ;
- dev->hw_features = NETIF_F_IP_CSUM | NETIF_F_RXCSUM |
- NETIF_F_TSO6 | NETIF_F_IPV6_CSUM |
- NETIF_F_SG | NETIF_F_TSO |
- NETIF_F_HW_TC;
- dev->features |= dev->hw_features;
- dev->vlan_features = dev->hw_features;
+ dev->hw_features = AIROHA_HW_FEATURES | NETIF_F_LRO;
+ dev->features |= AIROHA_HW_FEATURES;
+ dev->vlan_features = AIROHA_HW_FEATURES;
dev->dev.of_node = np;
SET_NETDEV_DEV(dev, eth->dev);
diff --git a/drivers/net/ethernet/airoha/airoha_eth.h b/drivers/net/ethernet/airoha/airoha_eth.h
index d3781103abb5..aed5077d3db5 100644
--- a/drivers/net/ethernet/airoha/airoha_eth.h
+++ b/drivers/net/ethernet/airoha/airoha_eth.h
@@ -43,6 +43,18 @@
(_n) == 15 ? 128 : \
(_n) == 0 ? 1024 : 16)
+#define AIROHA_LRO_PAGE_ORDER 2
+#define AIROHA_MAX_NUM_LRO_QUEUES 8
+#define AIROHA_RXQ_LRO_EN_MASK 0xff000000
+#define AIROHA_RXQ_LRO_MAX_AGG_COUNT 64
+#define AIROHA_RXQ_LRO_MAX_AGG_TIME 100
+#define AIROHA_RXQ_LRO_MAX_AGE_TIME 2000 /* 1ms */
+
+#define AIROHA_HW_FEATURES \
+ (NETIF_F_IP_CSUM | NETIF_F_RXCSUM | \
+ NETIF_F_TSO6 | NETIF_F_IPV6_CSUM | \
+ NETIF_F_SG | NETIF_F_TSO | NETIF_F_HW_TC)
+
#define PSE_RSV_PAGES 128
#define PSE_QUEUE_RSV_PAGES 64
@@ -661,6 +673,18 @@ static inline bool airoha_is_7583(struct airoha_eth *eth)
return eth->soc->version == 0x7583;
}
+static inline bool airoha_qdma_is_lro_queue(struct airoha_queue *q)
+{
+ struct airoha_qdma *qdma = q->qdma;
+ int qid = q - &qdma->q_rx[0];
+
+ /* EN7581 SoC supports at most 8 LRO rx queues */
+ BUILD_BUG_ON(hweight32(AIROHA_RXQ_LRO_EN_MASK) >
+ AIROHA_MAX_NUM_LRO_QUEUES);
+
+ return !!(AIROHA_RXQ_LRO_EN_MASK & BIT(qid));
+}
+
int airoha_get_fe_port(struct airoha_gdm_port *port);
bool airoha_is_valid_gdm_port(struct airoha_eth *eth,
struct airoha_gdm_port *port);
diff --git a/drivers/net/ethernet/airoha/airoha_regs.h b/drivers/net/ethernet/airoha/airoha_regs.h
index 436f3c8779c1..dfc786583774 100644
--- a/drivers/net/ethernet/airoha/airoha_regs.h
+++ b/drivers/net/ethernet/airoha/airoha_regs.h
@@ -122,6 +122,20 @@
#define CDM_CRSN_QSEL_REASON_MASK(_n) \
GENMASK(4 + (((_n) % 4) << 3), (((_n) % 4) << 3))
+#define REG_CDM_LRO_RXQ(_n, _m) (CDM_BASE(_n) + 0x78 + ((_m) & 0x4))
+#define LRO_RXQ_MASK(_n) GENMASK(4 + (((_n) & 0x3) << 3), ((_n) & 0x3) << 3)
+
+#define REG_CDM_LRO_EN(_n) (CDM_BASE(_n) + 0x80)
+#define LRO_RXQ_EN_MASK GENMASK(7, 0)
+
+#define REG_CDM_LRO_LIMIT(_n) (CDM_BASE(_n) + 0x84)
+#define CDM_LRO_AGG_NUM_MASK GENMASK(23, 16)
+#define CDM_LRO_AGG_SIZE_MASK GENMASK(15, 0)
+
+#define REG_CDM_LRO_AGE_TIME(_n) (CDM_BASE(_n) + 0x88)
+#define CDM_LRO_AGE_TIME_MASK GENMASK(31, 16)
+#define CDM_LRO_AGG_TIME_MASK GENMASK(15, 0)
+
#define REG_GDM_FWD_CFG(_n) GDM_BASE(_n)
#define GDM_PAD_EN_MASK BIT(28)
#define GDM_DROP_CRC_ERR_MASK BIT(23)
@@ -883,9 +897,15 @@
#define QDMA_ETH_RXMSG_SPORT_MASK GENMASK(25, 21)
#define QDMA_ETH_RXMSG_CRSN_MASK GENMASK(20, 16)
#define QDMA_ETH_RXMSG_PPE_ENTRY_MASK GENMASK(15, 0)
+/* RX MSG2 */
+#define QDMA_ETH_RXMSG_AGG_COUNT_MASK GENMASK(31, 24)
+#define QDMA_ETH_RXMSG_L2_LEN_MASK GENMASK(6, 0)
+/* RX MSG3 */
+#define QDMA_ETH_RXMSG_AGG_LEN_MASK GENMASK(31, 16)
+#define QDMA_ETH_RXMSG_TCP_WIN_MASK GENMASK(15, 0)
struct airoha_qdma_desc {
- __le32 rsv;
+ __le32 tcp_ts_reply;
__le32 ctrl;
__le32 addr;
__le32 data;
---
base-commit: 3baa7ba4ab98af452925926ffc2ee58c683e3f28
change-id: 20260520-airoha-eth-lro-a5d1c3631811
Best regards,
--
Lorenzo Bianconi <lorenzo@kernel.org>
^ permalink raw reply related [flat|nested] 4+ messages in thread
* Re: [PATCH RFC net-next v2] net: airoha: Add TCP LRO support
2026-05-26 6:58 [PATCH RFC net-next v2] net: airoha: Add TCP LRO support Lorenzo Bianconi
@ 2026-05-26 16:01 ` Alexander Lobakin
2026-05-26 21:08 ` Lorenzo Bianconi
0 siblings, 1 reply; 4+ messages in thread
From: Alexander Lobakin @ 2026-05-26 16:01 UTC (permalink / raw)
To: Lorenzo Bianconi
Cc: Andrew Lunn, David S. Miller, Eric Dumazet, Jakub Kicinski,
Paolo Abeni, linux-arm-kernel, linux-mediatek, netdev,
Madhur Agrawal
From: Lorenzo Bianconi <lorenzo@kernel.org>
Date: Tue, 26 May 2026 08:58:05 +0200
> Add hardware TCP Large Receive Offload (LRO) support to the airoha_eth
> driver, leveraging the EN7581/AN7583 SoC's 8 dedicated LRO hardware queues
> mapped to RX queues 24–31. LRO hw offloading does not support
> Scatter-Gather (SG) so it is required to increase the page_pool allocation
> order to 2 for RX queues 24–31 (LRO queues).
>
> Performance comparison between GRO and hw LRO has been carried out using
> a 10Gbps NIC:
>
> GRO: ~2.7 Gbps
> LRO: ~8.1 Gbps
>
> Please note with respect to the previous implementation, page_pool
> allocation order has been reduced from 5 to 2.
>
> Tested-by: Madhur Agrawal <madhur.agrawal@airoha.com>
> Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
[...]
> @@ -587,6 +630,85 @@ static int airoha_qdma_get_gdm_port(struct airoha_eth *eth,
> return port >= ARRAY_SIZE(eth->ports) ? -EINVAL : port;
> }
>
> +static int airoha_qdma_lro_rx_process(struct airoha_queue *q,
> + struct airoha_qdma_desc *desc)
> +{
> + u32 desc_ctrl = le32_to_cpu(READ_ONCE(desc->ctrl));
> + u32 msg1 = le32_to_cpu(READ_ONCE(desc->msg1));
> + u32 msg2 = le32_to_cpu(READ_ONCE(desc->msg2));
> + u32 msg3 = le32_to_cpu(READ_ONCE(desc->msg3));
Why are these READ_ONCE()s needed? Does desc come from the HW (sorry I
didn't follow the whole code flow) or...?
> + struct sk_buff *skb = q->skb;
> + u32 len, th_off, tcp_ack_seq;
> + u16 tcp_win, l2_len;
> + struct tcphdr *th;
> + bool ipv4, ipv6;
> +
> + if (FIELD_GET(QDMA_ETH_RXMSG_AGG_COUNT_MASK, msg2) <= 1)
> + return 0;
> +
> + ipv4 = FIELD_GET(QDMA_ETH_RXMSG_IP4_MASK, msg1);
> + ipv6 = FIELD_GET(QDMA_ETH_RXMSG_IP6_MASK, msg1);
> + if (!ipv4 && !ipv6)
> + return -EOPNOTSUPP;
> +
> + l2_len = FIELD_GET(QDMA_ETH_RXMSG_L2_LEN_MASK, msg2);
> + len = FIELD_GET(QDMA_DESC_LEN_MASK, desc_ctrl);
> + if (ipv4) {
> + struct iphdr *iph;
> +
> + if (!pskb_may_pull(skb, l2_len + sizeof(*iph)))
> + return -EINVAL;
> +
> + iph = (struct iphdr *)(skb->data + l2_len);
> + if (iph->protocol != IPPROTO_TCP)
> + return -EOPNOTSUPP;
> +
> + iph->tot_len = cpu_to_be16(len - l2_len);
> + iph->check = 0;
> + iph->check = ip_fast_csum((void *)iph, iph->ihl);
> + th_off = l2_len + (iph->ihl << 2);
> + } else {
> + struct ipv6hdr *ip6h;
> +
> + if (!pskb_may_pull(skb, l2_len + sizeof(*ip6h)))
> + return -EINVAL;
> +
> + ip6h = (struct ipv6hdr *)(skb->data + l2_len);
> + if (ip6h->nexthdr != NEXTHDR_TCP)
> + return -EOPNOTSUPP;
> +
> + ip6h->payload_len = cpu_to_be16(len - l2_len - sizeof(*ip6h));
> + th_off = l2_len + sizeof(*ip6h);
> + }
> +
> + tcp_win = FIELD_GET(QDMA_ETH_RXMSG_TCP_WIN_MASK, msg3);
> + tcp_ack_seq = le32_to_cpu(READ_ONCE(desc->data));
> +
> + if (!pskb_may_pull(skb, th_off + sizeof(*th)))
> + return -EINVAL;
> +
> + th = (struct tcphdr *)(skb->data + th_off);
> + th->ack_seq = cpu_to_be32(tcp_ack_seq);
> + th->window = cpu_to_be16(tcp_win);
> +
> + /* Check tcp timestamp option */
> + if (th->doff == (sizeof(*th) + TCPOLEN_TSTAMP_ALIGNED) / 4) {
> + __be32 *topt = (__be32 *)(th + 1);
Make sure you checked the code with sparse (sometimes it's needed to
mark casts as __force, not this one tho)
> +
> + if (*topt == cpu_to_be32((TCPOPT_NOP << 24) |
Shouldn't this be `((u32)TCPOPT_NOP) << 24` to avoid sign issues?
> + (TCPOPT_NOP << 16) |
> + (TCPOPT_TIMESTAMP << 8) |
> + TCPOLEN_TIMESTAMP)) {
> + __le32 tcp_ts_reply = READ_ONCE(desc->tcp_ts_reply);
> +
> + put_unaligned_be32(le32_to_cpu(tcp_ts_reply),
> + topt + 2);
> + }
> + }
> +
> + return 0;
> +}
Thanks,
Olek
^ permalink raw reply [flat|nested] 4+ messages in thread
* Re: [PATCH RFC net-next v2] net: airoha: Add TCP LRO support
2026-05-26 16:01 ` Alexander Lobakin
@ 2026-05-26 21:08 ` Lorenzo Bianconi
2026-05-27 14:12 ` Alexander Lobakin
0 siblings, 1 reply; 4+ messages in thread
From: Lorenzo Bianconi @ 2026-05-26 21:08 UTC (permalink / raw)
To: Alexander Lobakin
Cc: Andrew Lunn, David S. Miller, Eric Dumazet, Jakub Kicinski,
Paolo Abeni, linux-arm-kernel, linux-mediatek, netdev,
Madhur Agrawal
[-- Attachment #1: Type: text/plain, Size: 4912 bytes --]
> From: Lorenzo Bianconi <lorenzo@kernel.org>
> Date: Tue, 26 May 2026 08:58:05 +0200
>
> > Add hardware TCP Large Receive Offload (LRO) support to the airoha_eth
> > driver, leveraging the EN7581/AN7583 SoC's 8 dedicated LRO hardware queues
> > mapped to RX queues 24–31. LRO hw offloading does not support
> > Scatter-Gather (SG) so it is required to increase the page_pool allocation
> > order to 2 for RX queues 24–31 (LRO queues).
> >
> > Performance comparison between GRO and hw LRO has been carried out using
> > a 10Gbps NIC:
> >
> > GRO: ~2.7 Gbps
> > LRO: ~8.1 Gbps
> >
> > Please note with respect to the previous implementation, page_pool
> > allocation order has been reduced from 5 to 2.
> >
> > Tested-by: Madhur Agrawal <madhur.agrawal@airoha.com>
> > Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
>
> [...]
>
> > @@ -587,6 +630,85 @@ static int airoha_qdma_get_gdm_port(struct airoha_eth *eth,
> > return port >= ARRAY_SIZE(eth->ports) ? -EINVAL : port;
> > }
> >
> > +static int airoha_qdma_lro_rx_process(struct airoha_queue *q,
> > + struct airoha_qdma_desc *desc)
> > +{
> > + u32 desc_ctrl = le32_to_cpu(READ_ONCE(desc->ctrl));
> > + u32 msg1 = le32_to_cpu(READ_ONCE(desc->msg1));
> > + u32 msg2 = le32_to_cpu(READ_ONCE(desc->msg2));
> > + u32 msg3 = le32_to_cpu(READ_ONCE(desc->msg3));
>
> Why are these READ_ONCE()s needed? Does desc come from the HW (sorry I
> didn't follow the whole code flow) or...?
Correct, ctrl, msg1, msg2 and msg3 are subfields of the DMA descriptor read by
airoha_qdma_rx_process() from the NIC. I guess here we have a similar issue as
the one fixed in [0]
[0] https://git.kernel.org/pub/scm/linux/kernel/git/netdev/net.git/commit/?id=4ae0604a0673e11e2075b178387151fcad5111b5
>
> > + struct sk_buff *skb = q->skb;
> > + u32 len, th_off, tcp_ack_seq;
> > + u16 tcp_win, l2_len;
> > + struct tcphdr *th;
> > + bool ipv4, ipv6;
> > +
> > + if (FIELD_GET(QDMA_ETH_RXMSG_AGG_COUNT_MASK, msg2) <= 1)
> > + return 0;
> > +
> > + ipv4 = FIELD_GET(QDMA_ETH_RXMSG_IP4_MASK, msg1);
> > + ipv6 = FIELD_GET(QDMA_ETH_RXMSG_IP6_MASK, msg1);
> > + if (!ipv4 && !ipv6)
> > + return -EOPNOTSUPP;
> > +
> > + l2_len = FIELD_GET(QDMA_ETH_RXMSG_L2_LEN_MASK, msg2);
> > + len = FIELD_GET(QDMA_DESC_LEN_MASK, desc_ctrl);
> > + if (ipv4) {
> > + struct iphdr *iph;
> > +
> > + if (!pskb_may_pull(skb, l2_len + sizeof(*iph)))
> > + return -EINVAL;
> > +
> > + iph = (struct iphdr *)(skb->data + l2_len);
> > + if (iph->protocol != IPPROTO_TCP)
> > + return -EOPNOTSUPP;
> > +
> > + iph->tot_len = cpu_to_be16(len - l2_len);
> > + iph->check = 0;
> > + iph->check = ip_fast_csum((void *)iph, iph->ihl);
> > + th_off = l2_len + (iph->ihl << 2);
> > + } else {
> > + struct ipv6hdr *ip6h;
> > +
> > + if (!pskb_may_pull(skb, l2_len + sizeof(*ip6h)))
> > + return -EINVAL;
> > +
> > + ip6h = (struct ipv6hdr *)(skb->data + l2_len);
> > + if (ip6h->nexthdr != NEXTHDR_TCP)
> > + return -EOPNOTSUPP;
> > +
> > + ip6h->payload_len = cpu_to_be16(len - l2_len - sizeof(*ip6h));
> > + th_off = l2_len + sizeof(*ip6h);
> > + }
> > +
> > + tcp_win = FIELD_GET(QDMA_ETH_RXMSG_TCP_WIN_MASK, msg3);
> > + tcp_ack_seq = le32_to_cpu(READ_ONCE(desc->data));
> > +
> > + if (!pskb_may_pull(skb, th_off + sizeof(*th)))
> > + return -EINVAL;
> > +
> > + th = (struct tcphdr *)(skb->data + th_off);
> > + th->ack_seq = cpu_to_be32(tcp_ack_seq);
> > + th->window = cpu_to_be16(tcp_win);
> > +
> > + /* Check tcp timestamp option */
> > + if (th->doff == (sizeof(*th) + TCPOLEN_TSTAMP_ALIGNED) / 4) {
> > + __be32 *topt = (__be32 *)(th + 1);
>
> Make sure you checked the code with sparse (sometimes it's needed to
> mark casts as __force, not this one tho)
$ make C=2 CHECK=sparse drivers/net/ethernet/airoha/
CHECK scripts/mod/empty.c
DESCEND objtool
INSTALL libsubcmd_headers
DESCEND bpf/resolve_btfids
INSTALL libsubcmd_headers
CHECK drivers/net/ethernet/airoha/airoha_eth.c
CHECK drivers/net/ethernet/airoha/airoha_ppe.c
CHECK drivers/net/ethernet/airoha/airoha_ppe_debugfs.c
CHECK drivers/net/ethernet/airoha/airoha_npu.c
$ sparse --version
v0.6.5-rc1
>
> > +
> > + if (*topt == cpu_to_be32((TCPOPT_NOP << 24) |
>
> Shouldn't this be `((u32)TCPOPT_NOP) << 24` to avoid sign issues?
I guess this is same approach used in [1]. Am I missing something?
[1] https://github.com/torvalds/linux/blob/master/net/ipv4/tcp_ipv4.c#L823
Regards,
Lorenzo
>
> > + (TCPOPT_NOP << 16) |
> > + (TCPOPT_TIMESTAMP << 8) |
> > + TCPOLEN_TIMESTAMP)) {
> > + __le32 tcp_ts_reply = READ_ONCE(desc->tcp_ts_reply);
> > +
> > + put_unaligned_be32(le32_to_cpu(tcp_ts_reply),
> > + topt + 2);
> > + }
> > + }
> > +
> > + return 0;
> > +}
> Thanks,
> Olek
[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 228 bytes --]
^ permalink raw reply [flat|nested] 4+ messages in thread
* Re: [PATCH RFC net-next v2] net: airoha: Add TCP LRO support
2026-05-26 21:08 ` Lorenzo Bianconi
@ 2026-05-27 14:12 ` Alexander Lobakin
0 siblings, 0 replies; 4+ messages in thread
From: Alexander Lobakin @ 2026-05-27 14:12 UTC (permalink / raw)
To: Lorenzo Bianconi
Cc: Andrew Lunn, David S. Miller, Eric Dumazet, Jakub Kicinski,
Paolo Abeni, linux-arm-kernel, linux-mediatek, netdev,
Madhur Agrawal
From: Lorenzo Bianconi <lorenzo@kernel.org>
Date: Tue, 26 May 2026 23:08:14 +0200
>> From: Lorenzo Bianconi <lorenzo@kernel.org>
>> Date: Tue, 26 May 2026 08:58:05 +0200
[...]
>>> @@ -587,6 +630,85 @@ static int airoha_qdma_get_gdm_port(struct airoha_eth *eth,
>>> return port >= ARRAY_SIZE(eth->ports) ? -EINVAL : port;
>>> }
>>>
>>> +static int airoha_qdma_lro_rx_process(struct airoha_queue *q,
>>> + struct airoha_qdma_desc *desc)
>>> +{
>>> + u32 desc_ctrl = le32_to_cpu(READ_ONCE(desc->ctrl));
>>> + u32 msg1 = le32_to_cpu(READ_ONCE(desc->msg1));
>>> + u32 msg2 = le32_to_cpu(READ_ONCE(desc->msg2));
>>> + u32 msg3 = le32_to_cpu(READ_ONCE(desc->msg3));
>>
>> Why are these READ_ONCE()s needed? Does desc come from the HW (sorry I
>> didn't follow the whole code flow) or...?
>
> Correct, ctrl, msg1, msg2 and msg3 are subfields of the DMA descriptor read by
> airoha_qdma_rx_process() from the NIC. I guess here we have a similar issue as
> the one fixed in [0]
>
> [0] https://git.kernel.org/pub/scm/linux/kernel/git/netdev/net.git/commit/?id=4ae0604a0673e11e2075b178387151fcad5111b5
Hmm, I never believed that we need READ_ONCE()s when reading HW
descriptors, I always thought dma_alloc_coherent() is enough.
But maybe I've been mistaking...
[...]
>>> +
>>> + if (*topt == cpu_to_be32((TCPOPT_NOP << 24) |
>>
>> Shouldn't this be `((u32)TCPOPT_NOP) << 24` to avoid sign issues?
>
> I guess this is same approach used in [1]. Am I missing something?
>
> [1] https://github.com/torvalds/linux/blob/master/net/ipv4/tcp_ipv4.c#L823
Yeah I also notice this in one more patch when scrolling netdev
yesterday. I guess it's okay here since TCPOPT_NOP is small and
constant and doesn't reach bit 31.
>
> Regards,
> Lorenzo
Thanks,
Olek
^ permalink raw reply [flat|nested] 4+ messages in thread
end of thread, other threads:[~2026-05-27 14:15 UTC | newest]
Thread overview: 4+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2026-05-26 6:58 [PATCH RFC net-next v2] net: airoha: Add TCP LRO support Lorenzo Bianconi
2026-05-26 16:01 ` Alexander Lobakin
2026-05-26 21:08 ` Lorenzo Bianconi
2026-05-27 14:12 ` Alexander Lobakin
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox