netdev.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Daniel Borkmann <daniel@iogearbox.net>
To: netdev@vger.kernel.org
Cc: bpf@vger.kernel.org, kuba@kernel.org, davem@davemloft.net,
	razor@blackwall.org, pabeni@redhat.com, willemb@google.com,
	sdf@fomichev.me, john.fastabend@gmail.com, martin.lau@kernel.org,
	jordan@jrife.io, maciej.fijalkowski@intel.com,
	magnus.karlsson@intel.com, dw@davidwei.uk, toke@redhat.com,
	yangzhenze@bytedance.com, wangdongdong.6@bytedance.com
Subject: [PATCH net-next v2 13/15] netkit: Implement rtnl_link_ops->alloc and ndo_queue_create
Date: Wed, 15 Oct 2025 16:01:38 +0200	[thread overview]
Message-ID: <20251015140140.62273-14-daniel@iogearbox.net> (raw)
In-Reply-To: <20251015140140.62273-1-daniel@iogearbox.net>

From: David Wei <dw@davidwei.uk>

Implement rtnl_link_ops->alloc that allows the number of rx queues to be
set when netkit is created. By default, netkit has only a single rxq (and
single txq). The number of queues is deliberately not allowed to be changed
via ethtool -L and is fixed for the lifetime of a netkit instance.

For netkit device creation, numrxqueues with larger than one rxq can be
specified. These rxqs are then mappable to real rxqs in physical netdevs:

  ip link add type netkit peer numrxqueues 64      # for device pair
  ip link add numrxqueues 64 type netkit single    # for single device

The limit of numrxqueues for netkit is currently set to 256, which allows
binding multiple real rxqs from physical netdevs.

The implementation of ndo_queue_create() adds a new rxq during the bind
queue operation. We allow to create queues either in single device mode or
for the case of dual device mode for the netkit peer device which gets
placed into the target network namespace. For dual device mode the bind
against the primary device does not make sense for the targeted use cases,
and therefore gets rejected.

Signed-off-by: David Wei <dw@davidwei.uk>
Co-developed-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 drivers/net/netkit.c | 113 ++++++++++++++++++++++++++++++++++++++++---
 1 file changed, 105 insertions(+), 8 deletions(-)

diff --git a/drivers/net/netkit.c b/drivers/net/netkit.c
index 96734828bfb8..31235aa3379a 100644
--- a/drivers/net/netkit.c
+++ b/drivers/net/netkit.c
@@ -9,11 +9,19 @@
 #include <linux/bpf_mprog.h>
 #include <linux/indirect_call_wrapper.h>
 
+#include <net/netdev_queues.h>
+#include <net/netdev_rx_queue.h>
 #include <net/netkit.h>
 #include <net/dst.h>
 #include <net/tcx.h>
 
-#define DRV_NAME "netkit"
+#define NETKIT_DRV_NAME	"netkit"
+
+#define NETKIT_NUM_RX_QUEUES_MAX  256
+#define NETKIT_NUM_TX_QUEUES_MAX  1
+
+#define NETKIT_NUM_RX_QUEUES_REAL 1
+#define NETKIT_NUM_TX_QUEUES_REAL 1
 
 struct netkit {
 	__cacheline_group_begin(netkit_fastpath);
@@ -37,6 +45,8 @@ struct netkit_link {
 	struct net_device *dev;
 };
 
+static struct rtnl_link_ops netkit_link_ops;
+
 static __always_inline int
 netkit_run(const struct bpf_mprog_entry *entry, struct sk_buff *skb,
 	   enum netkit_action ret)
@@ -243,13 +253,99 @@ static const struct net_device_ops netkit_netdev_ops = {
 static void netkit_get_drvinfo(struct net_device *dev,
 			       struct ethtool_drvinfo *info)
 {
-	strscpy(info->driver, DRV_NAME, sizeof(info->driver));
+	strscpy(info->driver, NETKIT_DRV_NAME, sizeof(info->driver));
+}
+
+static void netkit_get_channels(struct net_device *dev,
+				struct ethtool_channels *channels)
+{
+	channels->max_rx = dev->num_rx_queues;
+	channels->max_tx = dev->num_tx_queues;
+	channels->max_other = 0;
+	channels->max_combined = 1;
+	channels->rx_count = dev->real_num_rx_queues;
+	channels->tx_count = dev->real_num_tx_queues;
+	channels->other_count = 0;
+	channels->combined_count = 0;
 }
 
 static const struct ethtool_ops netkit_ethtool_ops = {
 	.get_drvinfo		= netkit_get_drvinfo,
+	.get_channels		= netkit_get_channels,
 };
 
+static int netkit_queue_create(struct net_device *dev)
+{
+	struct netkit *nk = netkit_priv(dev);
+	u32 rxq_count_old, rxq_count_new;
+	int err;
+
+	rxq_count_old = dev->real_num_rx_queues;
+	rxq_count_new = rxq_count_old + 1;
+
+	/* Only allow to bind in single device mode or to bind against
+	 * the peer device which then ends up in the target netns.
+	 */
+	if (nk->pair == NETKIT_DEVICE_PAIR && nk->primary)
+		return -EOPNOTSUPP;
+
+	if (netif_running(dev))
+		netif_carrier_off(dev);
+	err = netif_set_real_num_rx_queues(dev, rxq_count_new);
+	if (netif_running(dev))
+		netif_carrier_on(dev);
+
+	return err ? err : rxq_count_new;
+}
+
+static const struct netdev_queue_mgmt_ops netkit_queue_mgmt_ops = {
+	.ndo_queue_create = netkit_queue_create,
+};
+
+static struct net_device *netkit_alloc(struct nlattr *tb[],
+				       const char *ifname,
+				       unsigned char name_assign_type,
+				       unsigned int num_tx_queues,
+				       unsigned int num_rx_queues)
+{
+	const struct rtnl_link_ops *ops = &netkit_link_ops;
+	struct net_device *dev;
+
+	if (num_tx_queues > NETKIT_NUM_TX_QUEUES_MAX ||
+	    num_rx_queues > NETKIT_NUM_RX_QUEUES_MAX)
+		return ERR_PTR(-EOPNOTSUPP);
+
+	dev = alloc_netdev_mqs(ops->priv_size, ifname,
+			       name_assign_type, ops->setup,
+			       num_tx_queues, num_rx_queues);
+	if (dev) {
+		dev->real_num_tx_queues = NETKIT_NUM_TX_QUEUES_REAL;
+		dev->real_num_rx_queues = NETKIT_NUM_RX_QUEUES_REAL;
+	}
+	return dev;
+}
+
+static void netkit_queue_unpeer(struct net_device *dev)
+{
+	struct netdev_rx_queue *src_rxq, *dst_rxq;
+	struct net_device *src_dev;
+	int i;
+
+	if (dev->real_num_rx_queues == 1)
+		return;
+	netdev_lock(dev);
+	for (i = 1; i < dev->real_num_rx_queues; i++) {
+		dst_rxq = __netif_get_rx_queue(dev, i);
+		src_rxq = dst_rxq->peer;
+		src_dev = src_rxq->dev;
+
+		netdev_lock(src_dev);
+		netdev_rx_queue_unpeer(src_dev, src_rxq, dst_rxq);
+		netdev_unlock(src_dev);
+	}
+	netdev_unlock(dev);
+}
+
 static void netkit_setup(struct net_device *dev)
 {
 	static const netdev_features_t netkit_features_hw_vlan =
@@ -280,8 +376,9 @@ static void netkit_setup(struct net_device *dev)
 	dev->priv_flags |= IFF_DISABLE_NETPOLL;
 	dev->lltx = true;
 
-	dev->ethtool_ops = &netkit_ethtool_ops;
-	dev->netdev_ops  = &netkit_netdev_ops;
+	dev->netdev_ops     = &netkit_netdev_ops;
+	dev->ethtool_ops    = &netkit_ethtool_ops;
+	dev->queue_mgmt_ops = &netkit_queue_mgmt_ops;
 
 	dev->features |= netkit_features;
 	dev->hw_features = netkit_features;
@@ -330,8 +427,6 @@ static int netkit_validate(struct nlattr *tb[], struct nlattr *data[],
 	return 0;
 }
 
-static struct rtnl_link_ops netkit_link_ops;
-
 static int netkit_new_link(struct net_device *dev,
 			   struct rtnl_newlink_params *params,
 			   struct netlink_ext_ack *extack)
@@ -865,6 +960,7 @@ static void netkit_release_all(struct net_device *dev)
 static void netkit_uninit(struct net_device *dev)
 {
 	netkit_release_all(dev);
+	netkit_queue_unpeer(dev);
 }
 
 static void netkit_del_link(struct net_device *dev, struct list_head *head)
@@ -1005,8 +1101,9 @@ static const struct nla_policy netkit_policy[IFLA_NETKIT_MAX + 1] = {
 };
 
 static struct rtnl_link_ops netkit_link_ops = {
-	.kind		= DRV_NAME,
+	.kind		= NETKIT_DRV_NAME,
 	.priv_size	= sizeof(struct netkit),
+	.alloc		= netkit_alloc,
 	.setup		= netkit_setup,
 	.newlink	= netkit_new_link,
 	.dellink	= netkit_del_link,
@@ -1042,4 +1139,4 @@ MODULE_DESCRIPTION("BPF-programmable network device");
 MODULE_AUTHOR("Daniel Borkmann <daniel@iogearbox.net>");
 MODULE_AUTHOR("Nikolay Aleksandrov <razor@blackwall.org>");
 MODULE_LICENSE("GPL");
-MODULE_ALIAS_RTNL_LINK(DRV_NAME);
+MODULE_ALIAS_RTNL_LINK(NETKIT_DRV_NAME);
-- 
2.43.0


  parent reply	other threads:[~2025-10-15 14:02 UTC|newest]

Thread overview: 19+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2025-10-15 14:01 [PATCH net-next v2 00/15] netkit: Support for io_uring zero-copy and AF_XDP Daniel Borkmann
2025-10-15 14:01 ` [PATCH net-next v2 01/15] net: Add bind-queue operation Daniel Borkmann
2025-10-15 14:01 ` [PATCH net-next v2 02/15] net: Implement netdev_nl_bind_queue_doit Daniel Borkmann
2025-10-15 14:01 ` [PATCH net-next v2 03/15] net: Add peer info to queue-get response Daniel Borkmann
2025-10-15 14:01 ` [PATCH net-next v2 04/15] net, ethtool: Disallow peered real rxqs to be resized Daniel Borkmann
2025-10-15 14:01 ` [PATCH net-next v2 05/15] net: Proxy net_mp_{open,close}_rxq for mapped queues Daniel Borkmann
2025-10-15 14:01 ` [PATCH net-next v2 06/15] xsk: Move NETDEV_XDP_ACT_ZC into generic header Daniel Borkmann
2025-10-15 14:01 ` [PATCH net-next v2 07/15] xsk: Move pool registration into single function Daniel Borkmann
2025-10-15 14:01 ` [PATCH net-next v2 08/15] xsk: Add small helper xp_pool_bindable Daniel Borkmann
2025-10-15 14:01 ` [PATCH net-next v2 09/15] xsk: Change xsk_rcv_check to check netdev/queue_id from pool Daniel Borkmann
2025-10-15 14:01 ` [PATCH net-next v2 10/15] xsk: Proxy pool management for mapped queues Daniel Borkmann
2025-10-15 14:01 ` [PATCH net-next v2 11/15] netkit: Add single device mode for netkit Daniel Borkmann
2025-10-15 14:01 ` [PATCH net-next v2 12/15] netkit: Document fast vs slowpath members via macros Daniel Borkmann
2025-10-15 14:01 ` Daniel Borkmann [this message]
2025-10-15 14:01 ` [PATCH net-next v2 14/15] netkit: Add io_uring zero-copy support for TCP Daniel Borkmann
2025-10-15 14:01 ` [PATCH net-next v2 15/15] netkit: Add xsk support for af_xdp applications Daniel Borkmann
2025-10-15 18:46 ` [syzbot ci] Re: netkit: Support for io_uring zero-copy and AF_XDP syzbot ci
2025-10-15 19:13   ` Daniel Borkmann
2025-10-16 12:12     ` Daniel Borkmann

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20251015140140.62273-14-daniel@iogearbox.net \
    --to=daniel@iogearbox.net \
    --cc=bpf@vger.kernel.org \
    --cc=davem@davemloft.net \
    --cc=dw@davidwei.uk \
    --cc=john.fastabend@gmail.com \
    --cc=jordan@jrife.io \
    --cc=kuba@kernel.org \
    --cc=maciej.fijalkowski@intel.com \
    --cc=magnus.karlsson@intel.com \
    --cc=martin.lau@kernel.org \
    --cc=netdev@vger.kernel.org \
    --cc=pabeni@redhat.com \
    --cc=razor@blackwall.org \
    --cc=sdf@fomichev.me \
    --cc=toke@redhat.com \
    --cc=wangdongdong.6@bytedance.com \
    --cc=willemb@google.com \
    --cc=yangzhenze@bytedance.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).