All of lore.kernel.org
 help / color / mirror / Atom feed
From: Daniel Borkmann <daniel@iogearbox.net>
To: netdev@vger.kernel.org
Cc: bpf@vger.kernel.org, kuba@kernel.org, davem@davemloft.net,
	razor@blackwall.org, pabeni@redhat.com, willemb@google.com,
	sdf@fomichev.me, john.fastabend@gmail.com, martin.lau@kernel.org,
	jordan@jrife.io, maciej.fijalkowski@intel.com,
	magnus.karlsson@intel.com, dw@davidwei.uk, toke@redhat.com,
	yangzhenze@bytedance.com, wangdongdong.6@bytedance.com
Subject: [PATCH net-next v11 06/14] net: Proxy netif_mp_{open,close}_rxq for leased queues
Date: Fri,  3 Apr 2026 01:10:23 +0200	[thread overview]
Message-ID: <20260402231031.447597-7-daniel@iogearbox.net> (raw)
In-Reply-To: <20260402231031.447597-1-daniel@iogearbox.net>

From: David Wei <dw@davidwei.uk>

When a process in a container wants to setup a memory provider, it will
use the virtual netdev and a leased rxq, and call netif_mp_{open,close}_rxq
to try and restart the queue. At this point, proxy the queue restart on
the real rxq in the physical netdev.

For memory providers (io_uring zero-copy rx and devmem), it causes the
real rxq in the physical netdev to be filled from a memory provider that
has DMA mapped memory from a process within a container.

Signed-off-by: David Wei <dw@davidwei.uk>
Co-developed-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Reviewed-by: Nikolay Aleksandrov <razor@blackwall.org>
---
 net/core/dev.c             |   4 +-
 net/core/dev.h             |   7 +++
 net/core/netdev_rx_queue.c | 104 +++++++++++++++++++++++++++++++------
 3 files changed, 95 insertions(+), 20 deletions(-)

diff --git a/net/core/dev.c b/net/core/dev.c
index f0a83a09477e..f8cc8667dff4 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -12339,10 +12339,8 @@ static void dev_memory_provider_uninstall(struct net_device *dev)
 
 	for (i = 0; i < dev->real_num_rx_queues; i++) {
 		struct netdev_rx_queue *rxq = &dev->_rx[i];
-		struct pp_memory_provider_params *p = &rxq->mp_params;
 
-		if (p->mp_ops && p->mp_ops->uninstall)
-			p->mp_ops->uninstall(rxq->mp_params.mp_priv, rxq);
+		__netif_mp_uninstall_rxq(rxq, &rxq->mp_params);
 	}
 }
 
diff --git a/net/core/dev.h b/net/core/dev.h
index 6516ce2b5517..95edb2d4eff8 100644
--- a/net/core/dev.h
+++ b/net/core/dev.h
@@ -12,6 +12,7 @@ struct net;
 struct netlink_ext_ack;
 struct netdev_queue_config;
 struct cpumask;
+struct pp_memory_provider_params;
 
 /* Random bits of netdevice that don't need to be exposed */
 #define FLOW_LIMIT_HISTORY	(1 << 7)  /* must be ^2 and !overflow buckets */
@@ -101,6 +102,12 @@ int netdev_queue_config_validate(struct net_device *dev, int rxq_idx,
 bool netif_rxq_has_mp(struct net_device *dev, unsigned int rxq_idx);
 bool netif_rxq_is_leased(struct net_device *dev, unsigned int rxq_idx);
 
+void __netif_mp_uninstall_rxq(struct netdev_rx_queue *rxq,
+			      const struct pp_memory_provider_params *p);
+
+void netif_rxq_cleanup_unlease(struct netdev_rx_queue *phys_rxq,
+			       struct netdev_rx_queue *virt_rxq);
+
 /* netdev management, shared between various uAPI entry points */
 struct netdev_name_node {
 	struct hlist_node hlist;
diff --git a/net/core/netdev_rx_queue.c b/net/core/netdev_rx_queue.c
index 06ac3bd5507f..1d6e7e47bf0a 100644
--- a/net/core/netdev_rx_queue.c
+++ b/net/core/netdev_rx_queue.c
@@ -28,6 +28,8 @@ void netdev_rx_queue_unlease(struct netdev_rx_queue *rxq_dst,
 	netdev_assert_locked(rxq_dst->dev);
 	netdev_assert_locked(rxq_src->dev);
 
+	netif_rxq_cleanup_unlease(rxq_src, rxq_dst);
+
 	WRITE_ONCE(rxq_src->lease, NULL);
 	WRITE_ONCE(rxq_dst->lease, NULL);
 
@@ -200,24 +202,15 @@ int netdev_rx_queue_restart(struct net_device *dev, unsigned int rxq_idx)
 }
 EXPORT_SYMBOL_NS_GPL(netdev_rx_queue_restart, "NETDEV_INTERNAL");
 
-int netif_mp_open_rxq(struct net_device *dev, unsigned int rxq_idx,
-		      const struct pp_memory_provider_params *p,
-		      struct netlink_ext_ack *extack)
+static int __netif_mp_open_rxq(struct net_device *dev, unsigned int rxq_idx,
+			       const struct pp_memory_provider_params *p,
+			       struct netlink_ext_ack *extack)
 {
 	const struct netdev_queue_mgmt_ops *qops = dev->queue_mgmt_ops;
 	struct netdev_queue_config qcfg[2];
 	struct netdev_rx_queue *rxq;
 	int ret;
 
-	if (!netdev_need_ops_lock(dev))
-		return -EOPNOTSUPP;
-
-	if (rxq_idx >= dev->real_num_rx_queues) {
-		NL_SET_ERR_MSG(extack, "rx queue index out of range");
-		return -ERANGE;
-	}
-	rxq_idx = array_index_nospec(rxq_idx, dev->real_num_rx_queues);
-
 	if (dev->cfg->hds_config != ETHTOOL_TCP_DATA_SPLIT_ENABLED) {
 		NL_SET_ERR_MSG(extack, "tcp-data-split is disabled");
 		return -EINVAL;
@@ -264,16 +257,48 @@ int netif_mp_open_rxq(struct net_device *dev, unsigned int rxq_idx,
 	return ret;
 }
 
-void netif_mp_close_rxq(struct net_device *dev, unsigned int ifq_idx,
-			const struct pp_memory_provider_params *old_p)
+int netif_mp_open_rxq(struct net_device *dev, unsigned int rxq_idx,
+		      const struct pp_memory_provider_params *p,
+		      struct netlink_ext_ack *extack)
+{
+	struct net_device *orig_dev = dev;
+	int ret;
+
+	if (!netdev_need_ops_lock(dev))
+		return -EOPNOTSUPP;
+
+	if (rxq_idx >= dev->real_num_rx_queues) {
+		NL_SET_ERR_MSG(extack, "rx queue index out of range");
+		return -ERANGE;
+	}
+	rxq_idx = array_index_nospec(rxq_idx, dev->real_num_rx_queues);
+
+	if (!netif_rxq_is_leased(dev, rxq_idx))
+		return __netif_mp_open_rxq(dev, rxq_idx, p, extack);
+
+	if (!netif_get_rx_queue_lease_locked(&dev, &rxq_idx)) {
+		NL_SET_ERR_MSG(extack, "rx queue leased to a virtual netdev");
+		return -EBUSY;
+	}
+	if (!dev->dev.parent) {
+		NL_SET_ERR_MSG(extack, "rx queue belongs to a virtual netdev");
+		ret = -EOPNOTSUPP;
+		goto out;
+	}
+
+	ret = __netif_mp_open_rxq(dev, rxq_idx, p, extack);
+out:
+	netif_put_rx_queue_lease_locked(orig_dev, dev);
+	return ret;
+}
+
+static void __netif_mp_close_rxq(struct net_device *dev, unsigned int ifq_idx,
+				 const struct pp_memory_provider_params *old_p)
 {
 	struct netdev_queue_config qcfg[2];
 	struct netdev_rx_queue *rxq;
 	int err;
 
-	if (WARN_ON_ONCE(ifq_idx >= dev->real_num_rx_queues))
-		return;
-
 	rxq = __netif_get_rx_queue(dev, ifq_idx);
 
 	/* Callers holding a netdev ref may get here after we already
@@ -294,3 +319,48 @@ void netif_mp_close_rxq(struct net_device *dev, unsigned int ifq_idx,
 	err = netdev_rx_queue_reconfig(dev, ifq_idx, &qcfg[0], &qcfg[1]);
 	WARN_ON(err && err != -ENETDOWN);
 }
+
+void netif_mp_close_rxq(struct net_device *dev, unsigned int ifq_idx,
+			const struct pp_memory_provider_params *old_p)
+{
+	struct net_device *orig_dev = dev;
+
+	if (WARN_ON_ONCE(ifq_idx >= dev->real_num_rx_queues))
+		return;
+	if (!netif_rxq_is_leased(dev, ifq_idx))
+		return __netif_mp_close_rxq(dev, ifq_idx, old_p);
+
+	if (WARN_ON_ONCE(!netif_get_rx_queue_lease_locked(&dev, &ifq_idx)))
+		return;
+
+	__netif_mp_close_rxq(dev, ifq_idx, old_p);
+	netif_put_rx_queue_lease_locked(orig_dev, dev);
+}
+
+void __netif_mp_uninstall_rxq(struct netdev_rx_queue *rxq,
+			      const struct pp_memory_provider_params *p)
+{
+	if (p->mp_ops && p->mp_ops->uninstall)
+		p->mp_ops->uninstall(p->mp_priv, rxq);
+}
+
+/* Clean up memory provider state when a queue lease is torn down. If
+ * a memory provider was installed on the physical queue via the lease,
+ * close it now. The memory provider is a property of the queue itself,
+ * and it was _guaranteed_ to be installed on the physical queue via
+ * the lease redirection. The extra __netif_mp_close_rxq is needed
+ * since the physical queue can outlive the virtual queue in the lease
+ * case, so it needs to be reconfigured to clear the memory provider.
+ */
+void netif_rxq_cleanup_unlease(struct netdev_rx_queue *phys_rxq,
+			       struct netdev_rx_queue *virt_rxq)
+{
+	struct pp_memory_provider_params *p = &phys_rxq->mp_params;
+	unsigned int ifq_idx = get_netdev_rx_queue_index(phys_rxq);
+
+	if (!p->mp_ops)
+		return;
+
+	__netif_mp_uninstall_rxq(virt_rxq, p);
+	__netif_mp_close_rxq(phys_rxq->dev, ifq_idx, p);
+}
-- 
2.43.0


  parent reply	other threads:[~2026-04-02 23:11 UTC|newest]

Thread overview: 29+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2026-04-02 23:10 [PATCH net-next v11 00/14] netkit: Support for io_uring zero-copy and AF_XDP Daniel Borkmann
2026-04-02 23:10 ` [PATCH net-next v11 01/14] net: Add queue-create operation Daniel Borkmann
2026-04-02 23:10 ` [PATCH net-next v11 02/14] net: Implement netdev_nl_queue_create_doit Daniel Borkmann
2026-04-02 23:10 ` [PATCH net-next v11 03/14] net: Add lease info to queue-get response Daniel Borkmann
2026-04-08  3:40   ` Jakub Kicinski
2026-04-08  9:09     ` Daniel Borkmann
2026-04-08 22:12       ` Jakub Kicinski
2026-04-09 13:43         ` Daniel Borkmann
2026-04-09 13:52           ` Daniel Borkmann
2026-04-09 14:46             ` Jakub Kicinski
2026-04-09 15:32               ` Daniel Borkmann
2026-04-10  1:51                 ` Jakub Kicinski
2026-04-10 11:10                   ` Daniel Borkmann
2026-04-02 23:10 ` [PATCH net-next v11 04/14] net, ethtool: Disallow leased real rxqs to be resized Daniel Borkmann
2026-04-02 23:10 ` [PATCH net-next v11 05/14] net: Slightly simplify net_mp_{open,close}_rxq Daniel Borkmann
2026-04-02 23:10 ` Daniel Borkmann [this message]
2026-04-02 23:10 ` [PATCH net-next v11 07/14] net: Proxy netdev_queue_get_dma_dev for leased queues Daniel Borkmann
2026-04-02 23:10 ` [PATCH net-next v11 08/14] xsk: Extend xsk_rcv_check validation Daniel Borkmann
2026-04-02 23:10 ` [PATCH net-next v11 09/14] xsk: Proxy pool management for leased queues Daniel Borkmann
2026-04-02 23:10 ` [PATCH net-next v11 10/14] netkit: Add single device mode for netkit Daniel Borkmann
2026-04-02 23:10 ` [PATCH net-next v11 11/14] netkit: Implement rtnl_link_ops->alloc and ndo_queue_create Daniel Borkmann
2026-04-02 23:10 ` [PATCH net-next v11 12/14] netkit: Add netkit notifier to check for unregistering devices Daniel Borkmann
2026-04-02 23:10 ` [PATCH net-next v11 13/14] netkit: Add xsk support for af_xdp applications Daniel Borkmann
2026-04-02 23:10 ` [PATCH net-next v11 14/14] selftests/net: Add queue leasing tests with netkit Daniel Borkmann
2026-04-08 23:22   ` Jakub Kicinski
2026-04-09 15:26     ` David Wei
2026-04-10  1:19       ` Jakub Kicinski
2026-04-07  9:50 ` [PATCH net-next v11 00/14] netkit: Support for io_uring zero-copy and AF_XDP Daniel Borkmann
2026-04-10  2:00 ` patchwork-bot+netdevbpf

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20260402231031.447597-7-daniel@iogearbox.net \
    --to=daniel@iogearbox.net \
    --cc=bpf@vger.kernel.org \
    --cc=davem@davemloft.net \
    --cc=dw@davidwei.uk \
    --cc=john.fastabend@gmail.com \
    --cc=jordan@jrife.io \
    --cc=kuba@kernel.org \
    --cc=maciej.fijalkowski@intel.com \
    --cc=magnus.karlsson@intel.com \
    --cc=martin.lau@kernel.org \
    --cc=netdev@vger.kernel.org \
    --cc=pabeni@redhat.com \
    --cc=razor@blackwall.org \
    --cc=sdf@fomichev.me \
    --cc=toke@redhat.com \
    --cc=wangdongdong.6@bytedance.com \
    --cc=willemb@google.com \
    --cc=yangzhenze@bytedance.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.