public inbox for dev@dpdk.org
 help / color / mirror / Atom feed
From: scott.k.mitch1@gmail.com
To: dev@dpdk.org
Cc: stephen@networkplumber.org, Scott Mitchell <scott.k.mitch1@gmail.com>
Subject: [PATCH v4 4/4] net/af_packet: add software checksum offload support
Date: Mon,  2 Feb 2026 00:14:56 -0800	[thread overview]
Message-ID: <20260202081456.4322-5-scott.k.mitch1@gmail.com> (raw)
In-Reply-To: <20260202081456.4322-1-scott.k.mitch1@gmail.com>

From: Scott Mitchell <scott.k.mitch1@gmail.com>

Add software checksum offload support and configurable TX poll
behavior to improve flexibility and performance.

Add rte_net_ip_udptcp_cksum_mbuf in rte_net.h which is shared
between rte_eth_tap and rte_eth_af_packet that supports
IPv4/UDP/TCP checksums in software due to hardware offload
and context propagation not being supported.

Signed-off-by: Scott Mitchell <scott.k.mitch1@gmail.com>
---
 doc/guides/nics/features/afpacket.ini     |  2 +
 doc/guides/rel_notes/release_26_03.rst    |  2 +
 drivers/net/af_packet/rte_eth_af_packet.c | 15 +++-
 drivers/net/tap/rte_eth_tap.c             | 61 +--------------
 lib/net/rte_net.h                         | 92 +++++++++++++++++++++++
 5 files changed, 112 insertions(+), 60 deletions(-)

diff --git a/doc/guides/nics/features/afpacket.ini b/doc/guides/nics/features/afpacket.ini
index 391f79b173..4bb81c84ff 100644
--- a/doc/guides/nics/features/afpacket.ini
+++ b/doc/guides/nics/features/afpacket.ini
@@ -7,5 +7,7 @@
 Link status          = Y
 Promiscuous mode     = Y
 MTU update           = Y
+L3 checksum offload  = Y
+L4 checksum offload  = Y
 Basic stats          = Y
 Stats per queue      = Y
diff --git a/doc/guides/rel_notes/release_26_03.rst b/doc/guides/rel_notes/release_26_03.rst
index 3b6be19645..2946acce99 100644
--- a/doc/guides/rel_notes/release_26_03.rst
+++ b/doc/guides/rel_notes/release_26_03.rst
@@ -60,6 +60,8 @@ New Features
   * Fixed kernel memory barrier protocol for memory availability
   * Fixed shared memory frame overhead offset calculation
   * Added ``txpollnotrdy`` devarg to avoid ``poll()`` blocking calls
+  * Added checksum offload support for ``IPV4_CKSUM``, ``UDP_CKSUM``,
+    and ``TCP_CKSUM``
 
 Removed Items
 -------------
diff --git a/drivers/net/af_packet/rte_eth_af_packet.c b/drivers/net/af_packet/rte_eth_af_packet.c
index 9df1b1fd4c..128f93bec6 100644
--- a/drivers/net/af_packet/rte_eth_af_packet.c
+++ b/drivers/net/af_packet/rte_eth_af_packet.c
@@ -10,6 +10,8 @@
 #include <rte_string_fns.h>
 #include <rte_mbuf.h>
 #include <rte_atomic.h>
+#include <rte_ip.h>
+#include <rte_net.h>
 #include <rte_bitops.h>
 #include <ethdev_driver.h>
 #include <ethdev_vdev.h>
@@ -101,6 +103,7 @@ struct pmd_internals {
 	struct pkt_tx_queue *tx_queue;
 	uint8_t vlan_strip;
 	uint8_t timestamp_offloading;
+	bool tx_sw_cksum;
 };
 
 static const char *valid_arguments[] = {
@@ -311,6 +314,9 @@ eth_af_packet_tx(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 		ppd->tp_len = mbuf->pkt_len;
 		ppd->tp_snaplen = mbuf->pkt_len;
 
+		if (pkt_q->sw_cksum && !rte_net_ip_udptcp_cksum_mbuf(mbuf, false))
+			continue;
+
 		struct rte_mbuf *tmp_mbuf = mbuf;
 		do {
 			uint16_t data_len = rte_pktmbuf_data_len(tmp_mbuf);
@@ -396,10 +402,13 @@ eth_dev_configure(struct rte_eth_dev *dev __rte_unused)
 {
 	struct rte_eth_conf *dev_conf = &dev->data->dev_conf;
 	const struct rte_eth_rxmode *rxmode = &dev_conf->rxmode;
+	const struct rte_eth_txmode *txmode = &dev_conf->txmode;
 	struct pmd_internals *internals = dev->data->dev_private;
 
 	internals->vlan_strip = !!(rxmode->offloads & RTE_ETH_RX_OFFLOAD_VLAN_STRIP);
 	internals->timestamp_offloading = !!(rxmode->offloads & RTE_ETH_RX_OFFLOAD_TIMESTAMP);
+	internals->tx_sw_cksum = !!(txmode->offloads & (RTE_ETH_TX_OFFLOAD_IPV4_CKSUM |
+			RTE_ETH_TX_OFFLOAD_UDP_CKSUM | RTE_ETH_TX_OFFLOAD_TCP_CKSUM));
 	return 0;
 }
 
@@ -417,7 +426,10 @@ eth_dev_info(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info)
 	dev_info->max_tx_queues = (uint16_t)internals->nb_queues;
 	dev_info->min_rx_bufsize = ETH_AF_PACKET_ETH_OVERHEAD;
 	dev_info->tx_offload_capa = RTE_ETH_TX_OFFLOAD_MULTI_SEGS |
-		RTE_ETH_TX_OFFLOAD_VLAN_INSERT;
+		RTE_ETH_TX_OFFLOAD_VLAN_INSERT |
+		RTE_ETH_TX_OFFLOAD_IPV4_CKSUM |
+		RTE_ETH_TX_OFFLOAD_UDP_CKSUM |
+		RTE_ETH_TX_OFFLOAD_TCP_CKSUM;
 	dev_info->rx_offload_capa = RTE_ETH_RX_OFFLOAD_VLAN_STRIP |
 		RTE_ETH_RX_OFFLOAD_TIMESTAMP;
 
@@ -618,6 +630,7 @@ eth_tx_queue_setup(struct rte_eth_dev *dev,
 {
 
 	struct pmd_internals *internals = dev->data->dev_private;
+	internals->tx_queue[tx_queue_id].sw_cksum = internals->tx_sw_cksum;
 
 	dev->data->tx_queues[tx_queue_id] = &internals->tx_queue[tx_queue_id];
 	return 0;
diff --git a/drivers/net/tap/rte_eth_tap.c b/drivers/net/tap/rte_eth_tap.c
index 730f1859bd..55f496babe 100644
--- a/drivers/net/tap/rte_eth_tap.c
+++ b/drivers/net/tap/rte_eth_tap.c
@@ -560,70 +560,13 @@ tap_write_mbufs(struct tx_queue *txq, uint16_t num_mbufs,
 		if (txq->csum && (mbuf->ol_flags & RTE_MBUF_F_TX_IP_CKSUM ||
 				l4_ol_flags == RTE_MBUF_F_TX_UDP_CKSUM ||
 				l4_ol_flags == RTE_MBUF_F_TX_TCP_CKSUM)) {
-			unsigned int hdrlens = mbuf->l2_len + mbuf->l3_len;
-			uint16_t *l4_cksum;
-			void *l3_hdr;
-
-			if (l4_ol_flags == RTE_MBUF_F_TX_UDP_CKSUM)
-				hdrlens += sizeof(struct rte_udp_hdr);
-			else if (l4_ol_flags == RTE_MBUF_F_TX_TCP_CKSUM)
-				hdrlens += sizeof(struct rte_tcp_hdr);
-			else if (l4_ol_flags != RTE_MBUF_F_TX_L4_NO_CKSUM)
-				return -1;
-
-			/* Support only packets with at least layer 4
-			 * header included in the first segment
-			 */
-			if (rte_pktmbuf_data_len(mbuf) < hdrlens)
-				return -1;
-
-			/* To change checksums (considering that a mbuf can be
-			 * indirect, for example), copy l2, l3 and l4 headers
-			 * in a new segment and chain it to existing data
-			 */
-			seg = rte_pktmbuf_copy(mbuf, mbuf->pool, 0, hdrlens);
+			/* Compute checksums in software, copying headers if needed */
+			seg = rte_net_ip_udptcp_cksum_mbuf(mbuf, true);
 			if (seg == NULL)
 				return -1;
-			rte_pktmbuf_adj(mbuf, hdrlens);
-			rte_pktmbuf_chain(seg, mbuf);
 			pmbufs[i] = mbuf = seg;
-
-			l3_hdr = rte_pktmbuf_mtod_offset(mbuf, void *, mbuf->l2_len);
-			if (mbuf->ol_flags & RTE_MBUF_F_TX_IP_CKSUM) {
-				struct rte_ipv4_hdr *iph = l3_hdr;
-
-				iph->hdr_checksum = 0;
-				iph->hdr_checksum = rte_ipv4_cksum(iph);
-			}
-
-			if (l4_ol_flags == RTE_MBUF_F_TX_L4_NO_CKSUM)
-				goto skip_l4_cksum;
-
-			if (l4_ol_flags == RTE_MBUF_F_TX_UDP_CKSUM) {
-				struct rte_udp_hdr *udp_hdr;
-
-				udp_hdr = rte_pktmbuf_mtod_offset(mbuf, struct rte_udp_hdr *,
-					mbuf->l2_len + mbuf->l3_len);
-				l4_cksum = &udp_hdr->dgram_cksum;
-			} else {
-				struct rte_tcp_hdr *tcp_hdr;
-
-				tcp_hdr = rte_pktmbuf_mtod_offset(mbuf, struct rte_tcp_hdr *,
-					mbuf->l2_len + mbuf->l3_len);
-				l4_cksum = &tcp_hdr->cksum;
-			}
-
-			*l4_cksum = 0;
-			if (mbuf->ol_flags & RTE_MBUF_F_TX_IPV4) {
-				*l4_cksum = rte_ipv4_udptcp_cksum_mbuf(mbuf, l3_hdr,
-					mbuf->l2_len + mbuf->l3_len);
-			} else {
-				*l4_cksum = rte_ipv6_udptcp_cksum_mbuf(mbuf, l3_hdr,
-					mbuf->l2_len + mbuf->l3_len);
-			}
 		}
 
-skip_l4_cksum:
 		for (j = 0; j < mbuf->nb_segs; j++) {
 			iovecs[k].iov_len = rte_pktmbuf_data_len(seg);
 			iovecs[k].iov_base = rte_pktmbuf_mtod(seg, void *);
diff --git a/lib/net/rte_net.h b/lib/net/rte_net.h
index 65d724b84b..44f42010c8 100644
--- a/lib/net/rte_net.h
+++ b/lib/net/rte_net.h
@@ -246,6 +246,98 @@ rte_net_intel_cksum_prepare(struct rte_mbuf *m)
 	return rte_net_intel_cksum_flags_prepare(m, m->ol_flags);
 }
 
+/**
+ * Compute IPv4 header and UDP/TCP checksums in software.
+ *
+ * Computes checksums based on mbuf offload flags:
+ * - RTE_MBUF_F_TX_IP_CKSUM: Compute IPv4 header checksum
+ * - RTE_MBUF_F_TX_UDP_CKSUM: Compute UDP checksum (IPv4 or IPv6)
+ * - RTE_MBUF_F_TX_TCP_CKSUM: Compute TCP checksum (IPv4 or IPv6)
+ *
+ * @param mbuf
+ *   The packet mbuf. Must have l2_len and l3_len set correctly.
+ * @param copy
+ *   If true, copy L2/L3/L4 headers to a new segment before computing
+ *   checksums. This is safe for indirect mbufs but has overhead.
+ *   If false, compute checksums in place. This is only safe if the
+ *   mbuf will be copied afterward (e.g., to a device ring buffer).
+ * @return
+ *   - On success: Returns mbuf (new segment if copy=true, original if copy=false)
+ *   - On error: Returns NULL (allocation failed or malformed packet)
+ */
+__rte_experimental
+static inline struct rte_mbuf *
+rte_net_ip_udptcp_cksum_mbuf(struct rte_mbuf *mbuf, bool copy)
+{
+	const uint64_t l4_ol_flags = mbuf->ol_flags & RTE_MBUF_F_TX_L4_MASK;
+	const uint64_t l4_offset = mbuf->l2_len + mbuf->l3_len;
+	uint32_t hdrlens = l4_offset;
+	void *l3_hdr = NULL;
+
+	/* Determine total header length needed */
+	if (l4_ol_flags == RTE_MBUF_F_TX_UDP_CKSUM)
+		hdrlens += sizeof(struct rte_udp_hdr);
+	else if (l4_ol_flags == RTE_MBUF_F_TX_TCP_CKSUM)
+		hdrlens += sizeof(struct rte_tcp_hdr);
+	else if (l4_ol_flags != RTE_MBUF_F_TX_L4_NO_CKSUM)
+		return NULL; /* Unsupported L4 checksum type */
+	else if (!(mbuf->ol_flags & RTE_MBUF_F_TX_IP_CKSUM))
+		return mbuf; /* Nothing to do */
+
+	/* Validate we at least have L2+L3 headers before doing any work */
+	if (unlikely(rte_pktmbuf_data_len(mbuf) < l4_offset))
+		return NULL;
+
+	if (copy) {
+		/*
+		 * Copy headers to new segment to handle indirect mbufs.
+		 * This ensures we can safely modify checksums without
+		 * corrupting shared/read-only data.
+		 */
+		struct rte_mbuf *seg = rte_pktmbuf_copy(mbuf, mbuf->pool, 0, hdrlens);
+		if (!seg)
+			return NULL;
+
+		rte_pktmbuf_adj(mbuf, hdrlens);
+		rte_pktmbuf_chain(seg, mbuf);
+		mbuf = seg;
+	} else if (unlikely(!RTE_MBUF_DIRECT(mbuf) || rte_mbuf_refcnt_read(mbuf) > 1))
+		return NULL;
+
+	l3_hdr = rte_pktmbuf_mtod_offset(mbuf, void *, mbuf->l2_len);
+
+	/* IPv4 header checksum */
+	if (mbuf->ol_flags & RTE_MBUF_F_TX_IP_CKSUM) {
+		struct rte_ipv4_hdr *iph = (struct rte_ipv4_hdr *)l3_hdr;
+		iph->hdr_checksum = 0;
+		iph->hdr_checksum = rte_ipv4_cksum(iph);
+	}
+
+	/* L4 checksum (UDP or TCP) - skip if headers not in first segment */
+	if (l4_ol_flags == RTE_MBUF_F_TX_UDP_CKSUM && rte_pktmbuf_data_len(mbuf) >= hdrlens) {
+		struct rte_udp_hdr *udp_hdr = rte_pktmbuf_mtod_offset(mbuf, struct rte_udp_hdr *,
+				l4_offset);
+		udp_hdr->dgram_cksum = 0;
+		udp_hdr->dgram_cksum = (mbuf->ol_flags & RTE_MBUF_F_TX_IPV4) ?
+			rte_ipv4_udptcp_cksum_mbuf(mbuf, (const struct rte_ipv4_hdr *)l3_hdr,
+					l4_offset) :
+			rte_ipv6_udptcp_cksum_mbuf(mbuf, (const struct rte_ipv6_hdr *)l3_hdr,
+					l4_offset);
+	} else if (l4_ol_flags == RTE_MBUF_F_TX_TCP_CKSUM &&
+			rte_pktmbuf_data_len(mbuf) >= hdrlens) {
+		struct rte_tcp_hdr *tcp_hdr = rte_pktmbuf_mtod_offset(mbuf, struct rte_tcp_hdr *,
+				l4_offset);
+		tcp_hdr->cksum = 0;
+		tcp_hdr->cksum = (mbuf->ol_flags & RTE_MBUF_F_TX_IPV4) ?
+			rte_ipv4_udptcp_cksum_mbuf(mbuf,  (const struct rte_ipv4_hdr *)l3_hdr,
+					l4_offset) :
+			rte_ipv6_udptcp_cksum_mbuf(mbuf, (const struct rte_ipv6_hdr *)l3_hdr,
+					l4_offset);
+	}
+
+	return mbuf;
+}
+
 #ifdef __cplusplus
 }
 #endif
-- 
2.39.5 (Apple Git-154)


  parent reply	other threads:[~2026-02-02  8:15 UTC|newest]

Thread overview: 65+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2026-01-27 18:13 [PATCH v1 0/3] net/af_packet: correctness fixes and improvements scott.k.mitch1
2026-01-27 18:13 ` [PATCH v1 1/3] net/af_packet: fix thread safety and frame calculations scott.k.mitch1
2026-01-27 18:39   ` Stephen Hemminger
2026-01-28  1:35     ` Scott Mitchell
2026-01-27 18:13 ` [PATCH v1 2/3] net/af_packet: RX/TX rte_memcpy, bulk free, prefetch scott.k.mitch1
2026-01-27 18:54   ` Stephen Hemminger
2026-01-28  1:23     ` Scott Mitchell
2026-01-28  9:49       ` Morten Brørup
2026-01-28 15:37         ` Scott Mitchell
2026-01-28 16:57           ` Stephen Hemminger
2026-01-27 18:13 ` [PATCH v1 3/3] net/af_packet: software checksum and tx poll control scott.k.mitch1
2026-01-27 18:57   ` Stephen Hemminger
2026-01-28  7:05     ` Scott Mitchell
2026-01-28 17:36       ` Stephen Hemminger
2026-01-28 18:59         ` Scott Mitchell
2026-01-27 20:45   ` [REVIEW] " Stephen Hemminger
2026-01-28  9:36 ` [PATCH v2 0/4] af_packet correctness, performance, cksum scott.k.mitch1
2026-01-28  9:36   ` [PATCH v2 1/4] net/af_packet: fix thread safety and frame calculations scott.k.mitch1
2026-01-28 16:59     ` Stephen Hemminger
2026-01-28 18:00       ` Scott Mitchell
2026-01-28 18:28         ` Stephen Hemminger
2026-01-28  9:36   ` [PATCH v2 2/4] net/af_packet: RX/TX unlikely, bulk free, prefetch scott.k.mitch1
2026-01-28  9:36   ` [PATCH v2 3/4] net/af_packet: tx poll control scott.k.mitch1
2026-01-28  9:36   ` [PATCH v2 4/4] net/af_packet: software checksum scott.k.mitch1
2026-01-28 18:27     ` Stephen Hemminger
2026-01-28 19:08       ` Scott Mitchell
2026-01-28 19:10   ` [PATCH v3 0/4] af_packet correctness, performance, cksum scott.k.mitch1
2026-01-28 19:10     ` [PATCH v3 1/4] net/af_packet: fix thread safety and frame calculations scott.k.mitch1
2026-01-28 19:10     ` [PATCH v3 2/4] net/af_packet: RX/TX unlikely, bulk free, prefetch scott.k.mitch1
2026-01-29  1:07       ` Stephen Hemminger
2026-02-02  5:29         ` Scott Mitchell
2026-01-28 19:10     ` [PATCH v3 3/4] net/af_packet: tx poll control scott.k.mitch1
2026-01-28 19:10     ` [PATCH v3 4/4] net/af_packet: software checksum scott.k.mitch1
2026-01-28 21:57       ` [REVIEW] " Stephen Hemminger
2026-02-02  7:55         ` Scott Mitchell
2026-02-02 16:58           ` Stephen Hemminger
2026-02-02  8:14     ` [PATCH v4 0/4] af_packet correctness, performance, cksum scott.k.mitch1
2026-02-02  8:14       ` [PATCH v4 1/4] net/af_packet: fix thread safety and frame calculations scott.k.mitch1
2026-02-02  8:14       ` [PATCH v4 2/4] net/af_packet: RX/TX bulk free, unlikely hint scott.k.mitch1
2026-02-02  8:14       ` [PATCH v4 3/4] net/af_packet: tx poll control scott.k.mitch1
2026-02-02  8:14       ` scott.k.mitch1 [this message]
2026-02-02 17:00         ` [PATCH v4 4/4] net/af_packet: add software checksum offload support Stephen Hemminger
2026-02-02 18:47         ` Stephen Hemminger
2026-02-03  6:41           ` Scott Mitchell
2026-02-02 18:53       ` [PATCH v4 0/4] af_packet correctness, performance, cksum Stephen Hemminger
2026-02-03  7:07       ` [PATCH v5 " scott.k.mitch1
2026-02-03  7:07         ` [PATCH v5 1/4] net/af_packet: fix thread safety and frame calculations scott.k.mitch1
2026-02-03  7:07         ` [PATCH v5 2/4] net/af_packet: RX/TX bulk free, unlikely hint scott.k.mitch1
2026-02-03  7:07         ` [PATCH v5 3/4] net/af_packet: tx poll control scott.k.mitch1
2026-02-03  7:07         ` [PATCH v5 4/4] net/af_packet: add software checksum offload support scott.k.mitch1
2026-02-03  8:20           ` Scott Mitchell
2026-02-03 14:12             ` Stephen Hemminger
2026-02-04  2:59               ` Scott Mitchell
2026-02-03 14:13           ` Stephen Hemminger
2026-02-04  1:39             ` Scott Mitchell
2026-02-05 21:27               ` Stephen Hemminger
2026-02-06  1:11         ` [PATCH v6 0/4] af_packet correctness, performance, cksum scott.k.mitch1
2026-02-06  1:11           ` [PATCH v6 1/4] net/af_packet: fix thread safety and frame calculations scott.k.mitch1
2026-02-06  1:11           ` [PATCH v6 2/4] net/af_packet: RX/TX bulk free, unlikely hint scott.k.mitch1
2026-02-06  1:11           ` [PATCH v6 3/4] net/af_packet: tx poll control scott.k.mitch1
2026-02-06  1:11           ` [PATCH v6 4/4] net/af_packet: add software checksum offload support scott.k.mitch1
2026-02-06  1:49           ` [PATCH v6 0/4] af_packet correctness, performance, cksum Stephen Hemminger
2026-02-06  4:45             ` Scott Mitchell
2026-02-06 14:36             ` Morten Brørup
2026-02-06 16:11               ` Stephen Hemminger

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20260202081456.4322-5-scott.k.mitch1@gmail.com \
    --to=scott.k.mitch1@gmail.com \
    --cc=dev@dpdk.org \
    --cc=stephen@networkplumber.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox