From: Akihiko Odaki <akihiko.odaki@daynix.com>
To: Jonathan Corbet <corbet@lwn.net>,
Willem de Bruijn <willemdebruijn.kernel@gmail.com>,
Jason Wang <jasowang@redhat.com>,
"David S. Miller" <davem@davemloft.net>,
Eric Dumazet <edumazet@google.com>,
Jakub Kicinski <kuba@kernel.org>,
Paolo Abeni <pabeni@redhat.com>,
"Michael S. Tsirkin" <mst@redhat.com>,
Xuan Zhuo <xuanzhuo@linux.alibaba.com>,
Shuah Khan <shuah@kernel.org>,
linux-doc@vger.kernel.org, linux-kernel@vger.kernel.org,
netdev@vger.kernel.org, kvm@vger.kernel.org,
virtualization@lists.linux-foundation.org,
linux-kselftest@vger.kernel.org,
Yuri Benditovich <yuri.benditovich@daynix.com>,
Andrew Melnychenko <andrew@daynix.com>,
Stephen Hemminger <stephen@networkplumber.org>,
gur.stavi@huawei.com, Lei Yang <leiyang@redhat.com>,
Simon Horman <horms@kernel.org>,
Akihiko Odaki <akihiko.odaki@daynix.com>
Subject: [PATCH net-next v12 05/10] tun: Introduce virtio-net hash feature
Date: Fri, 30 May 2025 13:50:09 +0900 [thread overview]
Message-ID: <20250530-rss-v12-5-95d8b348de91@daynix.com> (raw)
In-Reply-To: <20250530-rss-v12-0-95d8b348de91@daynix.com>
Add ioctls and storage required for the virtio-net hash feature to TUN.
Signed-off-by: Akihiko Odaki <akihiko.odaki@daynix.com>
---
drivers/net/Kconfig | 1 +
drivers/net/tun.c | 56 ++++++++++++++++++++++++++++++++++++++++++--------
include/linux/skbuff.h | 3 +++
net/core/skbuff.c | 4 ++++
4 files changed, 56 insertions(+), 8 deletions(-)
diff --git a/drivers/net/Kconfig b/drivers/net/Kconfig
index 1fd5acdc73c6..aecfd244dd83 100644
--- a/drivers/net/Kconfig
+++ b/drivers/net/Kconfig
@@ -395,6 +395,7 @@ config TUN
tristate "Universal TUN/TAP device driver support"
depends on INET
select CRC32
+ select SKB_EXTENSIONS
help
TUN/TAP provides packet reception and transmission for user space
programs. It can be viewed as a simple Point-to-Point or Ethernet
diff --git a/drivers/net/tun.c b/drivers/net/tun.c
index 03d47799e9bd..0a34db248e03 100644
--- a/drivers/net/tun.c
+++ b/drivers/net/tun.c
@@ -209,6 +209,7 @@ struct tun_struct {
struct bpf_prog __rcu *xdp_prog;
struct tun_prog __rcu *steering_prog;
struct tun_prog __rcu *filter_prog;
+ struct tun_vnet_hash __rcu *vnet_hash;
struct ethtool_link_ksettings link_ksettings;
/* init args */
struct file *file;
@@ -451,9 +452,14 @@ static inline void tun_flow_save_rps_rxhash(struct tun_flow_entry *e, u32 hash)
e->rps_rxhash = hash;
}
+static struct virtio_net_hash *tun_add_hash(struct sk_buff *skb)
+{
+ return skb_ext_add(skb, SKB_EXT_TUN_VNET_HASH);
+}
+
static const struct virtio_net_hash *tun_find_hash(const struct sk_buff *skb)
{
- return NULL;
+ return skb_ext_find(skb, SKB_EXT_TUN_VNET_HASH);
}
/* We try to identify a flow through its rxhash. The reason that
@@ -462,14 +468,21 @@ static const struct virtio_net_hash *tun_find_hash(const struct sk_buff *skb)
* the userspace application move between processors, we may get a
* different rxq no. here.
*/
-static u16 tun_automq_select_queue(struct tun_struct *tun, struct sk_buff *skb)
+static u16 tun_automq_select_queue(struct tun_struct *tun,
+ const struct tun_vnet_hash *vnet_hash,
+ struct sk_buff *skb)
{
+ struct flow_keys keys;
+ struct flow_keys_basic keys_basic;
struct tun_flow_entry *e;
u32 txq, numqueues;
numqueues = READ_ONCE(tun->numqueues);
- txq = __skb_get_hash_symmetric(skb);
+ memset(&keys, 0, sizeof(keys));
+ skb_flow_dissect(skb, &flow_keys_dissector_symmetric, &keys, 0);
+
+ txq = flow_hash_from_keys(&keys);
e = tun_flow_find(&tun->flows[tun_hashfn(txq)], txq);
if (e) {
tun_flow_save_rps_rxhash(e, txq);
@@ -478,6 +491,13 @@ static u16 tun_automq_select_queue(struct tun_struct *tun, struct sk_buff *skb)
txq = reciprocal_scale(txq, numqueues);
}
+ keys_basic = (struct flow_keys_basic) {
+ .control = keys.control,
+ .basic = keys.basic
+ };
+ tun_vnet_hash_report(vnet_hash, skb, &keys_basic, skb->l4_hash ? skb->hash : txq,
+ tun_add_hash);
+
return txq;
}
@@ -513,8 +533,15 @@ static u16 tun_select_queue(struct net_device *dev, struct sk_buff *skb,
u16 ret;
rcu_read_lock();
- if (!tun_ebpf_select_queue(tun, skb, &ret))
- ret = tun_automq_select_queue(tun, skb);
+ if (!tun_ebpf_select_queue(tun, skb, &ret)) {
+ struct tun_vnet_hash *vnet_hash = rcu_dereference(tun->vnet_hash);
+
+ if (vnet_hash && vnet_hash->rss)
+ ret = tun_vnet_rss_select_queue(READ_ONCE(tun->numqueues), vnet_hash,
+ skb, tun_add_hash);
+ else
+ ret = tun_automq_select_queue(tun, vnet_hash, skb);
+ }
rcu_read_unlock();
return ret;
@@ -2235,6 +2262,7 @@ static void tun_free_netdev(struct net_device *dev)
security_tun_dev_free_security(tun->security);
__tun_set_ebpf(tun, &tun->steering_prog, NULL);
__tun_set_ebpf(tun, &tun->filter_prog, NULL);
+ kfree_rcu_mightsleep(rcu_access_pointer(tun->vnet_hash));
}
static void tun_setup(struct net_device *dev)
@@ -3014,16 +3042,22 @@ static long __tun_chr_ioctl(struct file *file, unsigned int cmd,
} else {
memset(&ifr, 0, sizeof(ifr));
}
- if (cmd == TUNGETFEATURES) {
+ switch (cmd) {
+ case TUNGETFEATURES:
/* Currently this just means: "what IFF flags are valid?".
* This is needed because we never checked for invalid flags on
* TUNSETIFF.
*/
return put_user(IFF_TUN | IFF_TAP | IFF_NO_CARRIER |
TUN_FEATURES, (unsigned int __user*)argp);
- } else if (cmd == TUNSETQUEUE) {
+
+ case TUNSETQUEUE:
return tun_set_queue(file, &ifr);
- } else if (cmd == SIOCGSKNS) {
+
+ case TUNGETVNETHASHTYPES:
+ return tun_vnet_ioctl_gethashtypes(argp);
+
+ case SIOCGSKNS:
if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
return -EPERM;
return open_related_ns(&net->ns, get_net_ns);
@@ -3264,6 +3298,12 @@ static long __tun_chr_ioctl(struct file *file, unsigned int cmd,
ret = open_related_ns(&net->ns, get_net_ns);
break;
+ case TUNSETVNETREPORTINGAUTOMQ:
+ case TUNSETVNETREPORTINGRSS:
+ case TUNSETVNETRSS:
+ ret = tun_vnet_ioctl_sethash(&tun->vnet_hash, cmd, argp);
+ break;
+
default:
ret = tun_vnet_ioctl(&tun->vnet_hdr_sz, &tun->flags, cmd, argp);
break;
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index bb2b751d274a..cdd793f1c360 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -4842,6 +4842,9 @@ enum skb_ext_id {
#endif
#if IS_ENABLED(CONFIG_MCTP_FLOWS)
SKB_EXT_MCTP,
+#endif
+#if IS_ENABLED(CONFIG_TUN)
+ SKB_EXT_TUN_VNET_HASH,
#endif
SKB_EXT_NUM, /* must be last */
};
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index b1c81687e9d8..75d48217a20f 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -64,6 +64,7 @@
#include <linux/mpls.h>
#include <linux/kcov.h>
#include <linux/iov_iter.h>
+#include <linux/virtio_net.h>
#include <net/protocol.h>
#include <net/dst.h>
@@ -4969,6 +4970,9 @@ static const u8 skb_ext_type_len[] = {
#if IS_ENABLED(CONFIG_MCTP_FLOWS)
[SKB_EXT_MCTP] = SKB_EXT_CHUNKSIZEOF(struct mctp_flow),
#endif
+#if IS_ENABLED(CONFIG_TUN)
+ [SKB_EXT_TUN_VNET_HASH] = SKB_EXT_CHUNKSIZEOF(struct virtio_net_hash),
+#endif
};
static __always_inline unsigned int skb_ext_total_length(void)
--
2.49.0
next prev parent reply other threads:[~2025-05-30 4:50 UTC|newest]
Thread overview: 39+ messages / expand[flat|nested] mbox.gz Atom feed top
2025-05-30 4:50 [PATCH net-next v12 00/10] tun: Introduce virtio-net hashing feature Akihiko Odaki
2025-05-30 4:50 ` [PATCH net-next v12 01/10] virtio_net: Add functions for hashing Akihiko Odaki
2025-06-03 3:19 ` Jason Wang
2025-06-03 5:31 ` Akihiko Odaki
2025-06-04 1:18 ` Jason Wang
2025-06-04 7:20 ` Akihiko Odaki
2025-06-05 1:53 ` Jason Wang
2025-06-05 7:57 ` Akihiko Odaki
2025-06-06 0:48 ` Jason Wang
2025-06-06 9:10 ` Akihiko Odaki
2025-06-17 3:28 ` Jason Wang
2025-06-21 5:50 ` Akihiko Odaki
2025-06-22 17:40 ` Yuri Benditovich
2025-06-23 8:07 ` Jason Wang
2025-06-23 14:28 ` Yuri Benditovich
2025-06-24 0:49 ` Jason Wang
2025-05-30 4:50 ` [PATCH net-next v12 02/10] net: flow_dissector: Export flow_keys_dissector_symmetric Akihiko Odaki
2025-06-03 3:20 ` Jason Wang
2025-05-30 4:50 ` [PATCH net-next v12 03/10] tun: Allow steering eBPF program to fall back Akihiko Odaki
2025-06-04 1:27 ` Jason Wang
2025-06-04 7:24 ` Akihiko Odaki
2025-06-05 1:55 ` Jason Wang
2025-05-30 4:50 ` [PATCH net-next v12 04/10] tun: Add common virtio-net hash feature code Akihiko Odaki
2025-06-04 1:53 ` Jason Wang
2025-06-04 8:42 ` Akihiko Odaki
2025-06-05 2:46 ` Jason Wang
2025-06-05 8:18 ` Akihiko Odaki
2025-06-06 1:01 ` Jason Wang
2025-06-06 9:26 ` Akihiko Odaki
2025-06-17 3:39 ` Jason Wang
2025-06-19 17:01 ` Akihiko Odaki
2025-06-23 7:59 ` Jason Wang
2025-05-30 4:50 ` Akihiko Odaki [this message]
2025-05-30 4:50 ` [PATCH net-next v12 06/10] tap: Introduce virtio-net hash feature Akihiko Odaki
2025-05-30 4:50 ` [PATCH net-next v12 07/10] selftest: tun: Test vnet ioctls without device Akihiko Odaki
2025-05-30 4:50 ` [PATCH net-next v12 08/10] selftest: tun: Add tests for virtio-net hashing Akihiko Odaki
2025-05-30 4:50 ` [PATCH net-next v12 09/10] selftest: tap: Add tests for virtio-net ioctls Akihiko Odaki
2025-05-30 4:50 ` [PATCH net-next v12 10/10] vhost/net: Support VIRTIO_NET_F_HASH_REPORT Akihiko Odaki
2025-07-16 8:55 ` [PATCH net-next v12 00/10] tun: Introduce virtio-net hashing feature Michael S. Tsirkin
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20250530-rss-v12-5-95d8b348de91@daynix.com \
--to=akihiko.odaki@daynix.com \
--cc=andrew@daynix.com \
--cc=corbet@lwn.net \
--cc=davem@davemloft.net \
--cc=edumazet@google.com \
--cc=gur.stavi@huawei.com \
--cc=horms@kernel.org \
--cc=jasowang@redhat.com \
--cc=kuba@kernel.org \
--cc=kvm@vger.kernel.org \
--cc=leiyang@redhat.com \
--cc=linux-doc@vger.kernel.org \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-kselftest@vger.kernel.org \
--cc=mst@redhat.com \
--cc=netdev@vger.kernel.org \
--cc=pabeni@redhat.com \
--cc=shuah@kernel.org \
--cc=stephen@networkplumber.org \
--cc=virtualization@lists.linux-foundation.org \
--cc=willemdebruijn.kernel@gmail.com \
--cc=xuanzhuo@linux.alibaba.com \
--cc=yuri.benditovich@daynix.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).