public inbox for dev@dpdk.org
 help / color / mirror / Atom feed
From: Stephen Hemminger <stephen@networkplumber.org>
To: dev@dpdk.org
Cc: Stephen Hemminger <stephen@networkplumber.org>,
	Anatoly Burakov <anatoly.burakov@intel.com>
Subject: [PATCH v5 02/10] net/rtap: add TAP device creation and queue management
Date: Mon,  9 Feb 2026 10:39:01 -0800	[thread overview]
Message-ID: <20260209184045.132774-3-stephen@networkplumber.org> (raw)
In-Reply-To: <20260209184045.132774-1-stephen@networkplumber.org>

Add TAP device creation using the Linux TUN/TAP interface with
IFF_MULTI_QUEUE, IFF_NO_PI, and IFF_VNET_HDR flags. Enable NAPI
mode when the kernel supports it.

The driver maintains a keep-alive file descriptor to the TAP device
and opens additional per-queue file descriptors for data path I/O.
This mirrors the multi-queue TAP architecture where each queue pair
(rx + tx) shares a single TAP fd.

Add the rtap_create() function that:
  - Opens the TAP device with configurable interface name
  - Configures the virtio-net header size
  - Reads the kernel-assigned MAC address
  - Supports the 'persist' option to keep the interface after exit
  - Detaches the keep-alive queue from data traffic

Add rtap_queue_open() and rtap_queue_close() for per-queue fd
management used during queue setup and teardown.

Signed-off-by: Stephen Hemminger <stephen@networkplumber.org>
---
 drivers/net/rtap/rtap.h        |   4 +
 drivers/net/rtap/rtap_ethdev.c | 216 ++++++++++++++++++++++++++++++++-
 2 files changed, 219 insertions(+), 1 deletion(-)

diff --git a/drivers/net/rtap/rtap.h b/drivers/net/rtap/rtap.h
index 507ab000f3..39a3188a7b 100644
--- a/drivers/net/rtap/rtap.h
+++ b/drivers/net/rtap/rtap.h
@@ -66,4 +66,8 @@ struct rtap_pmd {
 	struct rte_ether_addr eth_addr; /* address assigned by kernel */
 };
 
+/* rtap_ethdev.c */
+int rtap_queue_open(struct rte_eth_dev *dev, uint16_t queue_id);
+void rtap_queue_close(struct rte_eth_dev *dev, uint16_t queue_id);
+
 #endif /* _RTAP_H_ */
diff --git a/drivers/net/rtap/rtap_ethdev.c b/drivers/net/rtap/rtap_ethdev.c
index ee5b5bad1b..4e7847ff8d 100644
--- a/drivers/net/rtap/rtap_ethdev.c
+++ b/drivers/net/rtap/rtap_ethdev.c
@@ -39,13 +39,140 @@ static const char * const valid_arguments[] = {
 	NULL
 };
 
+/* Creates a new tap device, name returned in ifr */
+static int
+rtap_tap_open(const char *name, struct ifreq *ifr, uint8_t persist)
+{
+	static const char tun_dev[] = "/dev/net/tun";
+	int tap_fd;
+
+	tap_fd = open(tun_dev, O_RDWR | O_CLOEXEC | O_NONBLOCK);
+	if (tap_fd < 0) {
+		PMD_LOG_ERRNO(ERR, "Open %s failed", tun_dev);
+		return -1;
+	}
+
+	int features = 0;
+	if (ioctl(tap_fd, TUNGETFEATURES, &features) < 0) {
+		PMD_LOG_ERRNO(ERR, "ioctl(TUNGETFEATURES): %s", tun_dev);
+		goto error;
+	}
+
+	int flags = IFF_TAP | IFF_MULTI_QUEUE | IFF_NO_PI | IFF_VNET_HDR;
+	if ((features & flags) != flags) {
+		PMD_LOG(ERR, "TUN features %#x missing support for %#x",
+			features, features & flags);
+		goto error;
+	}
+
+#ifdef IFF_NAPI
+	/* If kernel supports using NAPI enable it */
+	if (features & IFF_NAPI)
+		flags |= IFF_NAPI;
+#endif
+	/*
+	 * Sets the device name and packet format.
+	 * Do not want the protocol information (PI)
+	 */
+	strlcpy(ifr->ifr_name, name, IFNAMSIZ);
+	ifr->ifr_flags = flags;
+	if (ioctl(tap_fd, TUNSETIFF, ifr) < 0) {
+		PMD_LOG_ERRNO(ERR, "ioctl(TUNSETIFF) %s", ifr->ifr_name);
+		goto error;
+	}
+
+	/* (Optional) keep the device after application exit */
+	if (persist && ioctl(tap_fd, TUNSETPERSIST, 1) < 0) {
+		PMD_LOG_ERRNO(ERR, "ioctl(TUNSETPERSIST) %s", ifr->ifr_name);
+		goto error;
+	}
+
+	int hdr_size = sizeof(struct virtio_net_hdr);
+	if (ioctl(tap_fd, TUNSETVNETHDRSZ, &hdr_size) < 0) {
+		PMD_LOG(ERR, "ioctl(TUNSETVNETHDRSZ) %s", strerror(errno));
+		goto error;
+	}
+
+	return tap_fd;
+error:
+	close(tap_fd);
+	return -1;
+}
+
+static int
+rtap_dev_start(struct rte_eth_dev *dev)
+{
+	dev->data->dev_link.link_status = RTE_ETH_LINK_UP;
+	for (uint16_t i = 0; i < dev->data->nb_rx_queues; i++) {
+		dev->data->rx_queue_state[i] = RTE_ETH_QUEUE_STATE_STARTED;
+		dev->data->tx_queue_state[i] = RTE_ETH_QUEUE_STATE_STARTED;
+	}
+
+	return 0;
+}
+
+static int
+rtap_dev_stop(struct rte_eth_dev *dev)
+{
+	int *fds = dev->process_private;
+
+	dev->data->dev_link.link_status = RTE_ETH_LINK_DOWN;
+
+	for (uint16_t i = 0; i < dev->data->nb_rx_queues; i++) {
+		dev->data->rx_queue_state[i] = RTE_ETH_QUEUE_STATE_STOPPED;
+		dev->data->tx_queue_state[i] = RTE_ETH_QUEUE_STATE_STOPPED;
+	}
+
+	if (rte_eal_process_type() == RTE_PROC_PRIMARY) {
+		for (uint16_t i = 0; i < RTE_MAX_QUEUES_PER_PORT; i++) {
+			if (fds[i] == -1)
+				continue;
+
+			close(fds[i]);
+			fds[i] = -1;
+		}
+	}
+
+	return 0;
+}
+
+static int
+rtap_dev_configure(struct rte_eth_dev *dev)
+{
+	struct rtap_pmd *pmd = dev->data->dev_private;
+
+	/* rx/tx must be paired */
+	if (dev->data->nb_rx_queues != dev->data->nb_tx_queues)
+		return -EINVAL;
+
+	if (ioctl(pmd->keep_fd, TUNSETOFFLOAD, 0) != 0) {
+		PMD_LOG(ERR, "ioctl(TUNSETOFFLOAD) failed: %s", strerror(errno));
+		return -1;
+	}
+
+	return 0;
+}
+
 static int
 rtap_dev_close(struct rte_eth_dev *dev)
 {
 	struct rtap_pmd *pmd = dev->data->dev_private;
+	int *fds = dev->process_private;
 
 	PMD_LOG(INFO, "Closing %s", pmd->ifname);
 
+	/* Release all io_uring queues (calls rx/tx_queue_release for each) */
+	rte_eth_dev_internal_reset(dev);
+
+	/* Close any remaining queue fds (each process owns its own set) */
+	for (uint16_t i = 0; i < RTE_MAX_QUEUES_PER_PORT; i++) {
+		if (fds[i] == -1)
+			continue;
+		PMD_LOG(DEBUG, "Closed queue %u fd %d", i, fds[i]);
+		close(fds[i]);
+		fds[i] = -1;
+	}
+
 	if (rte_eal_process_type() == RTE_PROC_PRIMARY) {
 		/* mac_addrs must not be freed alone because part of dev_private */
 		dev->data->mac_addrs = NULL;
@@ -63,10 +190,96 @@ rtap_dev_close(struct rte_eth_dev *dev)
 	return 0;
 }
 
+/* Setup another fd to TAP device for the queue */
+int
+rtap_queue_open(struct rte_eth_dev *dev, uint16_t queue_id)
+{
+	struct rtap_pmd *pmd = dev->data->dev_private;
+	int *fds = dev->process_private;
+
+	if (fds[queue_id] != -1) {
+		PMD_LOG(DEBUG, "queue %u already has fd %d", queue_id, fds[queue_id]);
+		return 0;	/* already setup */
+	}
+
+	struct ifreq ifr = { 0 };
+	int tap_fd = rtap_tap_open(pmd->ifname, &ifr, 0);
+	if (tap_fd < 0) {
+		PMD_LOG(ERR, "tap_open failed");
+		return -1;
+	}
+
+	PMD_LOG(DEBUG, "Opened %d for queue %u", tap_fd, queue_id);
+	fds[queue_id] = tap_fd;
+	return 0;
+}
+
+void
+rtap_queue_close(struct rte_eth_dev *dev, uint16_t queue_id)
+{
+	int *fds = dev->process_private;
+	int tap_fd = fds[queue_id];
+
+	if (tap_fd == -1)
+		return; /* already closed */
+	PMD_LOG(DEBUG, "Closed queue %u fd %d", queue_id, tap_fd);
+	close(tap_fd);
+	fds[queue_id] = -1;
+}
+
 static const struct eth_dev_ops rtap_ops = {
+	.dev_start		= rtap_dev_start,
+	.dev_stop		= rtap_dev_stop,
+	.dev_configure		= rtap_dev_configure,
 	.dev_close		= rtap_dev_close,
 };
 
+static int
+rtap_create(struct rte_eth_dev *dev, const char *tap_name, uint8_t persist)
+{
+	struct rte_eth_dev_data *data = dev->data;
+	struct rtap_pmd *pmd = data->dev_private;
+
+	pmd->keep_fd = -1;
+
+	dev->dev_ops = &rtap_ops;
+
+	/* Get the initial fd used to keep the tap device around */
+	struct ifreq ifr = { 0 };
+	pmd->keep_fd = rtap_tap_open(tap_name, &ifr, persist);
+	if (pmd->keep_fd < 0)
+		goto error;
+
+	PMD_LOG(DEBUG, "Created %s keep_fd %d", ifr.ifr_name, pmd->keep_fd);
+
+	/* Use name returned by kernel i.e if tap_name is rtap%d this will be rtap0 */
+	strlcpy(pmd->ifname, ifr.ifr_name, IFNAMSIZ);
+
+	/* Read the MAC address assigned by the kernel */
+	if (ioctl(pmd->keep_fd, SIOCGIFHWADDR, &ifr) < 0) {
+		PMD_LOG_ERRNO(ERR, "Unable to get MAC address for %s", ifr.ifr_name);
+		goto error;
+	}
+	memcpy(&pmd->eth_addr, &ifr.ifr_hwaddr.sa_data, RTE_ETHER_ADDR_LEN);
+	data->mac_addrs = &pmd->eth_addr;
+
+	/* Detach this instance, not used for traffic */
+	ifr.ifr_flags = IFF_DETACH_QUEUE;
+	if (ioctl(pmd->keep_fd, TUNSETQUEUE, &ifr) < 0) {
+		PMD_LOG_ERRNO(ERR, "Unable to detach keep-alive queue for %s", ifr.ifr_name);
+		goto error;
+	}
+
+	PMD_LOG(DEBUG, "%s setup", ifr.ifr_name);
+
+	return 0;
+
+error:
+	if (pmd->keep_fd != -1)
+		close(pmd->keep_fd);
+	return -1;
+}
+
 static int
 rtap_parse_iface(const char *key __rte_unused, const char *value, void *extra_args)
 {
@@ -129,7 +342,8 @@ rtap_probe(struct rte_vdev_device *vdev)
 	eth_dev->process_private = fds;
 	eth_dev->data->dev_flags |= RTE_ETH_DEV_AUTOFILL_QUEUE_XSTATS;
 
-	RTE_SET_USED(persist); /* used in later patches */
+	if (rtap_create(eth_dev, tap_name, persist) < 0)
+		goto error;
 
 	rte_eth_dev_probing_finish(eth_dev);
 	rte_kvargs_free(kvlist);
-- 
2.51.0


  parent reply	other threads:[~2026-02-09 18:41 UTC|newest]

Thread overview: 72+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2024-12-10 21:23 [RFC 0/8] ioring: network driver Stephen Hemminger
2024-12-10 21:23 ` [RFC 1/8] net/ioring: introduce new driver Stephen Hemminger
2024-12-10 21:23 ` [RFC 2/8] net/ioring: implement link state Stephen Hemminger
2024-12-10 21:23 ` [RFC 3/8] net/ioring: implement control functions Stephen Hemminger
2024-12-10 21:23 ` [RFC 4/8] net/ioring: implement management functions Stephen Hemminger
2024-12-10 21:23 ` [RFC 5/8] net/ioring: implement primary secondary fd passing Stephen Hemminger
2024-12-10 21:23 ` [RFC 6/8] net/ioring: implement receive and transmit Stephen Hemminger
2024-12-10 21:23 ` [RFC 7/8] net/ioring: add VLAN support Stephen Hemminger
2024-12-10 21:23 ` [RFC 8/8] net/ioring: implement statistics Stephen Hemminger
2024-12-11 11:34 ` [RFC 0/8] ioring: network driver Konstantin Ananyev
2024-12-11 15:03   ` Stephen Hemminger
2024-12-12 19:06     ` Konstantin Ananyev
2024-12-19 15:40       ` Morten Brørup
2024-12-20 14:34         ` Konstantin Ananyev
2024-12-20 16:19           ` Stephen Hemminger
2024-12-11 16:28 ` [PATCH v2 " Stephen Hemminger
2024-12-11 16:28   ` [PATCH v2 1/8] net/ioring: introduce new driver Stephen Hemminger
2024-12-28 16:39     ` Morten Brørup
2024-12-11 16:28   ` [PATCH v2 2/8] net/ioring: implement link state Stephen Hemminger
2024-12-11 16:28   ` [PATCH v2 3/8] net/ioring: implement control functions Stephen Hemminger
2024-12-11 16:28   ` [PATCH v2 4/8] net/ioring: implement management functions Stephen Hemminger
2024-12-11 16:28   ` [PATCH v2 5/8] net/ioring: implement primary secondary fd passing Stephen Hemminger
2024-12-11 16:28   ` [PATCH v2 6/8] net/ioring: implement receive and transmit Stephen Hemminger
2024-12-11 16:28   ` [PATCH v2 7/8] net/ioring: add VLAN support Stephen Hemminger
2024-12-11 16:28   ` [PATCH v2 8/8] net/ioring: implement statistics Stephen Hemminger
2025-03-11 23:51 ` [PATCH v3 0/9] ioring PMD device Stephen Hemminger
2025-03-11 23:51   ` [PATCH v3 1/9] net/ioring: introduce new driver Stephen Hemminger
2025-03-11 23:51   ` [PATCH v3 2/9] net/ioring: implement link state Stephen Hemminger
2025-03-11 23:51   ` [PATCH v3 3/9] net/ioring: implement control functions Stephen Hemminger
2025-03-11 23:51   ` [PATCH v3 4/9] net/ioring: implement management functions Stephen Hemminger
2025-03-11 23:51   ` [PATCH v3 5/9] net/ioring: implement secondary process support Stephen Hemminger
2025-03-11 23:51   ` [PATCH v3 6/9] net/ioring: implement receive and transmit Stephen Hemminger
2025-03-11 23:51   ` [PATCH v3 7/9] net/ioring: add VLAN support Stephen Hemminger
2025-03-11 23:51   ` [PATCH v3 8/9] net/ioring: implement statistics Stephen Hemminger
2025-03-11 23:51   ` [PATCH v3 9/9] net/ioring: support multi-segment Rx and Tx Stephen Hemminger
2025-03-13 21:50 ` [PATCH v4 00/10] new ioring PMD Stephen Hemminger
2025-03-13 21:50   ` [PATCH v4 01/10] net/ioring: introduce new driver Stephen Hemminger
2025-03-13 21:50   ` [PATCH v4 02/10] net/ioring: implement link state Stephen Hemminger
2025-03-13 21:50   ` [PATCH v4 03/10] net/ioring: implement control functions Stephen Hemminger
2025-03-13 21:50   ` [PATCH v4 04/10] net/ioring: implement management functions Stephen Hemminger
2025-03-13 21:50   ` [PATCH v4 05/10] net/ioring: implement secondary process support Stephen Hemminger
2025-03-13 21:50   ` [PATCH v4 06/10] net/ioring: implement receive and transmit Stephen Hemminger
2025-03-13 21:50   ` [PATCH v4 07/10] net/ioring: implement statistics Stephen Hemminger
2025-03-13 21:50   ` [PATCH v4 08/10] net/ioring: support multi-segment Rx and Tx Stephen Hemminger
2025-03-13 21:51   ` [PATCH v4 09/10] net/ioring: support Tx checksum and segment offload Stephen Hemminger
2025-03-13 21:51   ` [PATCH v4 10/10] net/ioring: add support for Rx offload Stephen Hemminger
2026-02-09 18:38 ` [PATCH v5 00/10] net/rtap: add io_uring based TAP driver Stephen Hemminger
2026-02-09 18:39   ` [PATCH v5 01/10] net/rtap: add driver skeleton and documentation Stephen Hemminger
2026-02-09 18:39   ` Stephen Hemminger [this message]
2026-02-09 18:39   ` [PATCH v5 03/10] net/rtap: add Rx/Tx with scatter/gather support Stephen Hemminger
2026-02-09 18:39   ` [PATCH v5 04/10] net/rtap: add statistics and device info Stephen Hemminger
2026-02-09 18:39   ` [PATCH v5 05/10] net/rtap: add link and device management operations Stephen Hemminger
2026-02-09 18:39   ` [PATCH v5 06/10] net/rtap: add checksum and TSO offload support Stephen Hemminger
2026-02-09 18:39   ` [PATCH v5 07/10] net/rtap: add link state change interrupt Stephen Hemminger
2026-02-09 18:39   ` [PATCH v5 08/10] net/rtap: add multi-process support Stephen Hemminger
2026-02-09 18:39   ` [PATCH v5 09/10] net/rtap: add Rx interrupt support Stephen Hemminger
2026-02-09 18:39   ` [PATCH v5 10/10] test: add unit tests for rtap PMD Stephen Hemminger
2026-02-10  9:18   ` [PATCH v5 00/10] net/rtap: add io_uring based TAP driver Morten Brørup
2026-02-14 23:44 ` [PATCH v6 00/11] " Stephen Hemminger
2026-02-14 23:44   ` [PATCH v6 01/11] net/rtap: add driver skeleton and documentation Stephen Hemminger
2026-02-14 23:44   ` [PATCH v6 02/11] net/rtap: add TAP device creation and queue management Stephen Hemminger
2026-02-14 23:44   ` [PATCH v6 03/11] net/rtap: add Rx/Tx with scatter/gather support Stephen Hemminger
2026-02-14 23:44   ` [PATCH v6 04/11] net/rtap: add statistics and device info Stephen Hemminger
2026-02-14 23:44   ` [PATCH v6 05/11] net/rtap: add link and device management operations Stephen Hemminger
2026-02-14 23:44   ` [PATCH v6 06/11] net/rtap: add checksum and TSO offload support Stephen Hemminger
2026-02-14 23:44   ` [PATCH v6 07/11] net/rtap: add multi-process support Stephen Hemminger
2026-02-14 23:44   ` [PATCH v6 08/11] net/rtap: add link state change interrupt Stephen Hemminger
2026-02-14 23:44   ` [PATCH v6 09/11] net/rtap: add Rx interrupt support Stephen Hemminger
2026-02-14 23:44   ` [PATCH v6 10/11] net/rtap: add extended statistics support Stephen Hemminger
2026-02-14 23:44   ` [PATCH v6 11/11] test: add unit tests for rtap PMD Stephen Hemminger
2026-02-15  8:58   ` [PATCH v6 00/11] net/rtap: add io_uring based TAP driver Konstantin Ananyev
2026-02-15 17:08     ` Stephen Hemminger

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20260209184045.132774-3-stephen@networkplumber.org \
    --to=stephen@networkplumber.org \
    --cc=anatoly.burakov@intel.com \
    --cc=dev@dpdk.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox