public inbox for dev@dpdk.org
 help / color / mirror / Atom feed
From: scott.k.mitch1@gmail.com
To: dev@dpdk.org
Cc: stephen@networkplumber.org, Scott Mitchell <scott.k.mitch1@gmail.com>
Subject: [PATCH v3 3/4] net/af_packet: tx poll control
Date: Wed, 28 Jan 2026 11:10:31 -0800	[thread overview]
Message-ID: <20260128191032.78916-4-scott.k.mitch1@gmail.com> (raw)
In-Reply-To: <20260128191032.78916-1-scott.k.mitch1@gmail.com>

From: Scott Mitchell <scott.k.mitch1@gmail.com>

Add txpollnotrdy devarg (default=true) to control whether poll()
is called when the TX ring is not ready. This allows users to
avoid blocking behavior if application threads are in asynchronous
poll mode where blocking the thread has negative side effects and
backpressure is applied via different means.

Signed-off-by: Scott Mitchell <scott.k.mitch1@gmail.com>
---
 doc/guides/nics/af_packet.rst             |  6 +++-
 drivers/net/af_packet/rte_eth_af_packet.c | 34 ++++++++++++++++++-----
 2 files changed, 32 insertions(+), 8 deletions(-)

diff --git a/doc/guides/nics/af_packet.rst b/doc/guides/nics/af_packet.rst
index 1505b98ff7..782a962c3f 100644
--- a/doc/guides/nics/af_packet.rst
+++ b/doc/guides/nics/af_packet.rst
@@ -29,6 +29,10 @@ Some of these, in turn, will be used to configure the PACKET_MMAP settings.
 *   ``framesz`` - PACKET_MMAP frame size (optional, default 2048B; Note: multiple
     of 16B);
 *   ``framecnt`` - PACKET_MMAP frame count (optional, default 512).
+*   ``txpollnotrdy`` - Control behavior if tx is attempted but there is no
+    space available to write to the kernel. If 1, call poll() and block until
+    space is available to tx. If 0, don't call poll() and return from tx (optional,
+    default 1).
 
 For details regarding ``fanout_mode`` argument, you can consult the
 `PACKET_FANOUT documentation <https://www.man7.org/linux/man-pages/man7/packet.7.html>`_.
@@ -75,7 +79,7 @@ framecnt=512):
 
 .. code-block:: console
 
-    --vdev=eth_af_packet0,iface=tap0,blocksz=4096,framesz=2048,framecnt=512,qpairs=1,qdisc_bypass=0,fanout_mode=hash
+    --vdev=eth_af_packet0,iface=tap0,blocksz=4096,framesz=2048,framecnt=512,qpairs=1,qdisc_bypass=0,fanout_mode=hash,txpollnotrdy=0
 
 Features and Limitations
 ------------------------
diff --git a/drivers/net/af_packet/rte_eth_af_packet.c b/drivers/net/af_packet/rte_eth_af_packet.c
index e357ae168b..be8e3260aa 100644
--- a/drivers/net/af_packet/rte_eth_af_packet.c
+++ b/drivers/net/af_packet/rte_eth_af_packet.c
@@ -18,6 +18,7 @@
 #include <bus_vdev_driver.h>
 
 #include <errno.h>
+#include <stdbool.h>
 #include <linux/if_ether.h>
 #include <linux/if_packet.h>
 #include <arpa/inet.h>
@@ -39,9 +40,11 @@
 #define ETH_AF_PACKET_FRAMECOUNT_ARG	"framecnt"
 #define ETH_AF_PACKET_QDISC_BYPASS_ARG	"qdisc_bypass"
 #define ETH_AF_PACKET_FANOUT_MODE_ARG	"fanout_mode"
+#define ETH_AF_PACKET_TX_POLL_NOT_READY_ARG	"txpollnotrdy"
 
 #define DFLT_FRAME_SIZE		(1 << 11)
 #define DFLT_FRAME_COUNT	(1 << 9)
+#define DFLT_TX_POLL_NOT_RDY	true
 
 static const uint16_t ETH_AF_PACKET_FRAME_SIZE_MAX = RTE_IPV4_MAX_PKT_LEN;
 #define ETH_AF_PACKET_FRAME_OVERHEAD (TPACKET2_HDRLEN - sizeof(struct sockaddr_ll))
@@ -78,6 +81,9 @@ struct __rte_cache_aligned pkt_tx_queue {
 	unsigned int framecount;
 	unsigned int framenum;
 
+	bool txpollnotrdy;
+	bool sw_cksum;
+
 	volatile unsigned long tx_pkts;
 	volatile unsigned long err_pkts;
 	volatile unsigned long tx_bytes;
@@ -106,6 +112,7 @@ static const char *valid_arguments[] = {
 	ETH_AF_PACKET_FRAMECOUNT_ARG,
 	ETH_AF_PACKET_QDISC_BYPASS_ARG,
 	ETH_AF_PACKET_FANOUT_MODE_ARG,
+	ETH_AF_PACKET_TX_POLL_NOT_READY_ARG,
 	NULL
 };
 
@@ -265,10 +272,12 @@ eth_af_packet_tx(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 	uint32_t num_tx_bytes = 0;
 	uint16_t i;
 
-	memset(&pfd, 0, sizeof(pfd));
-	pfd.fd = pkt_q->sockfd;
-	pfd.events = POLLOUT;
-	pfd.revents = 0;
+	if (pkt_q->txpollnotrdy) {
+		memset(&pfd, 0, sizeof(pfd));
+		pfd.fd = pkt_q->sockfd;
+		pfd.events = POLLOUT;
+		pfd.revents = 0;
+	}
 
 	framecount = pkt_q->framecount;
 	framenum = pkt_q->framenum;
@@ -308,8 +317,9 @@ eth_af_packet_tx(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 		 * This results in poll() returning POLLOUT.
 		 */
 		if (unlikely(!tx_ring_status_available(tpacket_read_status(&ppd->tp_status)) &&
-			(poll(&pfd, 1, -1) < 0 || (pfd.revents & POLLERR) != 0 ||
-			 !tx_ring_status_available(tpacket_read_status(&ppd->tp_status))))) {
+			(!pkt_q->txpollnotrdy || poll(&pfd, 1, -1) < 0 ||
+			(pfd.revents & POLLERR) != 0 ||
+			!tx_ring_status_available(tpacket_read_status(&ppd->tp_status))))) {
 			/* Ring is full, stop here. Don't process bufs[i]. */
 			break;
 		}
@@ -820,6 +830,7 @@ rte_pmd_init_internals(struct rte_vdev_device *dev,
                        unsigned int framecnt,
 		       unsigned int qdisc_bypass,
 		       const char *fanout_mode,
+		       bool txpollnotrdy,
                        struct pmd_internals **internals,
                        struct rte_eth_dev **eth_dev,
                        struct rte_kvargs *kvlist)
@@ -1038,6 +1049,7 @@ rte_pmd_init_internals(struct rte_vdev_device *dev,
 			tx_queue->rd[i].iov_len = req->tp_frame_size;
 		}
 		tx_queue->sockfd = qsockfd;
+		tx_queue->txpollnotrdy = txpollnotrdy;
 
 		rc = bind(qsockfd, (const struct sockaddr*)&sockaddr, sizeof(sockaddr));
 		if (rc == -1) {
@@ -1126,6 +1138,7 @@ rte_eth_from_packet(struct rte_vdev_device *dev,
 	unsigned int qpairs = 1;
 	unsigned int qdisc_bypass = 1;
 	const char *fanout_mode = NULL;
+	bool txpollnotrdy = DFLT_TX_POLL_NOT_RDY;
 
 	/* do some parameter checking */
 	if (*sockfd < 0)
@@ -1193,6 +1206,10 @@ rte_eth_from_packet(struct rte_vdev_device *dev,
 			fanout_mode = pair->value;
 			continue;
 		}
+		if (strstr(pair->key, ETH_AF_PACKET_TX_POLL_NOT_READY_ARG) != NULL) {
+			txpollnotrdy = atoi(pair->value) != 0;
+			continue;
+		}
 	}
 
 	if (framesize > blocksize) {
@@ -1261,12 +1278,14 @@ rte_eth_from_packet(struct rte_vdev_device *dev,
 		PMD_LOG(DEBUG, "%s:\tfanout mode %s", name, fanout_mode);
 	else
 		PMD_LOG(DEBUG, "%s:\tfanout mode %s", name, "default PACKET_FANOUT_HASH");
+	PMD_LOG(INFO, "%s:\ttxpollnotrdy %d", name, txpollnotrdy ? 1 : 0);
 
 	if (rte_pmd_init_internals(dev, *sockfd, qpairs,
 				   blocksize, blockcount,
 				   framesize, framecount,
 				   qdisc_bypass,
 				   fanout_mode,
+				   txpollnotrdy,
 				   &internals, &eth_dev,
 				   kvlist) < 0)
 		return -1;
@@ -1364,4 +1383,5 @@ RTE_PMD_REGISTER_PARAM_STRING(net_af_packet,
 	"framesz=<int> "
 	"framecnt=<int> "
 	"qdisc_bypass=<0|1> "
-	"fanout_mode=<hash|lb|cpu|rollover|rnd|qm>");
+	"fanout_mode=<hash|lb|cpu|rollover|rnd|qm> "
+	"txpollnotrdy=<0|1>");
-- 
2.39.5 (Apple Git-154)


  parent reply	other threads:[~2026-01-28 19:12 UTC|newest]

Thread overview: 65+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2026-01-27 18:13 [PATCH v1 0/3] net/af_packet: correctness fixes and improvements scott.k.mitch1
2026-01-27 18:13 ` [PATCH v1 1/3] net/af_packet: fix thread safety and frame calculations scott.k.mitch1
2026-01-27 18:39   ` Stephen Hemminger
2026-01-28  1:35     ` Scott Mitchell
2026-01-27 18:13 ` [PATCH v1 2/3] net/af_packet: RX/TX rte_memcpy, bulk free, prefetch scott.k.mitch1
2026-01-27 18:54   ` Stephen Hemminger
2026-01-28  1:23     ` Scott Mitchell
2026-01-28  9:49       ` Morten Brørup
2026-01-28 15:37         ` Scott Mitchell
2026-01-28 16:57           ` Stephen Hemminger
2026-01-27 18:13 ` [PATCH v1 3/3] net/af_packet: software checksum and tx poll control scott.k.mitch1
2026-01-27 18:57   ` Stephen Hemminger
2026-01-28  7:05     ` Scott Mitchell
2026-01-28 17:36       ` Stephen Hemminger
2026-01-28 18:59         ` Scott Mitchell
2026-01-27 20:45   ` [REVIEW] " Stephen Hemminger
2026-01-28  9:36 ` [PATCH v2 0/4] af_packet correctness, performance, cksum scott.k.mitch1
2026-01-28  9:36   ` [PATCH v2 1/4] net/af_packet: fix thread safety and frame calculations scott.k.mitch1
2026-01-28 16:59     ` Stephen Hemminger
2026-01-28 18:00       ` Scott Mitchell
2026-01-28 18:28         ` Stephen Hemminger
2026-01-28  9:36   ` [PATCH v2 2/4] net/af_packet: RX/TX unlikely, bulk free, prefetch scott.k.mitch1
2026-01-28  9:36   ` [PATCH v2 3/4] net/af_packet: tx poll control scott.k.mitch1
2026-01-28  9:36   ` [PATCH v2 4/4] net/af_packet: software checksum scott.k.mitch1
2026-01-28 18:27     ` Stephen Hemminger
2026-01-28 19:08       ` Scott Mitchell
2026-01-28 19:10   ` [PATCH v3 0/4] af_packet correctness, performance, cksum scott.k.mitch1
2026-01-28 19:10     ` [PATCH v3 1/4] net/af_packet: fix thread safety and frame calculations scott.k.mitch1
2026-01-28 19:10     ` [PATCH v3 2/4] net/af_packet: RX/TX unlikely, bulk free, prefetch scott.k.mitch1
2026-01-29  1:07       ` Stephen Hemminger
2026-02-02  5:29         ` Scott Mitchell
2026-01-28 19:10     ` scott.k.mitch1 [this message]
2026-01-28 19:10     ` [PATCH v3 4/4] net/af_packet: software checksum scott.k.mitch1
2026-01-28 21:57       ` [REVIEW] " Stephen Hemminger
2026-02-02  7:55         ` Scott Mitchell
2026-02-02 16:58           ` Stephen Hemminger
2026-02-02  8:14     ` [PATCH v4 0/4] af_packet correctness, performance, cksum scott.k.mitch1
2026-02-02  8:14       ` [PATCH v4 1/4] net/af_packet: fix thread safety and frame calculations scott.k.mitch1
2026-02-02  8:14       ` [PATCH v4 2/4] net/af_packet: RX/TX bulk free, unlikely hint scott.k.mitch1
2026-02-02  8:14       ` [PATCH v4 3/4] net/af_packet: tx poll control scott.k.mitch1
2026-02-02  8:14       ` [PATCH v4 4/4] net/af_packet: add software checksum offload support scott.k.mitch1
2026-02-02 17:00         ` Stephen Hemminger
2026-02-02 18:47         ` Stephen Hemminger
2026-02-03  6:41           ` Scott Mitchell
2026-02-02 18:53       ` [PATCH v4 0/4] af_packet correctness, performance, cksum Stephen Hemminger
2026-02-03  7:07       ` [PATCH v5 " scott.k.mitch1
2026-02-03  7:07         ` [PATCH v5 1/4] net/af_packet: fix thread safety and frame calculations scott.k.mitch1
2026-02-03  7:07         ` [PATCH v5 2/4] net/af_packet: RX/TX bulk free, unlikely hint scott.k.mitch1
2026-02-03  7:07         ` [PATCH v5 3/4] net/af_packet: tx poll control scott.k.mitch1
2026-02-03  7:07         ` [PATCH v5 4/4] net/af_packet: add software checksum offload support scott.k.mitch1
2026-02-03  8:20           ` Scott Mitchell
2026-02-03 14:12             ` Stephen Hemminger
2026-02-04  2:59               ` Scott Mitchell
2026-02-03 14:13           ` Stephen Hemminger
2026-02-04  1:39             ` Scott Mitchell
2026-02-05 21:27               ` Stephen Hemminger
2026-02-06  1:11         ` [PATCH v6 0/4] af_packet correctness, performance, cksum scott.k.mitch1
2026-02-06  1:11           ` [PATCH v6 1/4] net/af_packet: fix thread safety and frame calculations scott.k.mitch1
2026-02-06  1:11           ` [PATCH v6 2/4] net/af_packet: RX/TX bulk free, unlikely hint scott.k.mitch1
2026-02-06  1:11           ` [PATCH v6 3/4] net/af_packet: tx poll control scott.k.mitch1
2026-02-06  1:11           ` [PATCH v6 4/4] net/af_packet: add software checksum offload support scott.k.mitch1
2026-02-06  1:49           ` [PATCH v6 0/4] af_packet correctness, performance, cksum Stephen Hemminger
2026-02-06  4:45             ` Scott Mitchell
2026-02-06 14:36             ` Morten Brørup
2026-02-06 16:11               ` Stephen Hemminger

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20260128191032.78916-4-scott.k.mitch1@gmail.com \
    --to=scott.k.mitch1@gmail.com \
    --cc=dev@dpdk.org \
    --cc=stephen@networkplumber.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox