linux-doc.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Akihiko Odaki <akihiko.odaki@daynix.com>
To: Jonathan Corbet <corbet@lwn.net>,
	 Willem de Bruijn <willemdebruijn.kernel@gmail.com>,
	 Jason Wang <jasowang@redhat.com>,
	"David S. Miller" <davem@davemloft.net>,
	 Eric Dumazet <edumazet@google.com>,
	Jakub Kicinski <kuba@kernel.org>,
	 Paolo Abeni <pabeni@redhat.com>,
	"Michael S. Tsirkin" <mst@redhat.com>,
	 Xuan Zhuo <xuanzhuo@linux.alibaba.com>,
	Shuah Khan <shuah@kernel.org>,
	 linux-doc@vger.kernel.org, linux-kernel@vger.kernel.org,
	 netdev@vger.kernel.org, kvm@vger.kernel.org,
	 virtualization@lists.linux-foundation.org,
	linux-kselftest@vger.kernel.org,
	 Yuri Benditovich <yuri.benditovich@daynix.com>,
	 Andrew Melnychenko <andrew@daynix.com>,
	 Stephen Hemminger <stephen@networkplumber.org>,
	gur.stavi@huawei.com,  Lei Yang <leiyang@redhat.com>,
	Simon Horman <horms@kernel.org>,
	 Akihiko Odaki <akihiko.odaki@daynix.com>
Subject: [PATCH net-next v12 06/10] tap: Introduce virtio-net hash feature
Date: Fri, 30 May 2025 13:50:10 +0900	[thread overview]
Message-ID: <20250530-rss-v12-6-95d8b348de91@daynix.com> (raw)
In-Reply-To: <20250530-rss-v12-0-95d8b348de91@daynix.com>

Add ioctls and storage required for the virtio-net hash feature to TAP.

Signed-off-by: Akihiko Odaki <akihiko.odaki@daynix.com>
---
 drivers/net/ipvlan/ipvtap.c |  2 +-
 drivers/net/macvtap.c       |  2 +-
 drivers/net/tap.c           | 72 +++++++++++++++++++++++++++++++++++++++++----
 include/linux/if_tap.h      |  4 ++-
 4 files changed, 71 insertions(+), 9 deletions(-)

diff --git a/drivers/net/ipvlan/ipvtap.c b/drivers/net/ipvlan/ipvtap.c
index 1afc4c47be73..305438abf7ae 100644
--- a/drivers/net/ipvlan/ipvtap.c
+++ b/drivers/net/ipvlan/ipvtap.c
@@ -114,7 +114,7 @@ static void ipvtap_dellink(struct net_device *dev,
 	struct ipvtap_dev *vlan = netdev_priv(dev);
 
 	netdev_rx_handler_unregister(dev);
-	tap_del_queues(&vlan->tap);
+	tap_del(&vlan->tap);
 	ipvlan_link_delete(dev, head);
 }
 
diff --git a/drivers/net/macvtap.c b/drivers/net/macvtap.c
index 29a5929d48e5..e72144d05ef4 100644
--- a/drivers/net/macvtap.c
+++ b/drivers/net/macvtap.c
@@ -122,7 +122,7 @@ static void macvtap_dellink(struct net_device *dev,
 	struct macvtap_dev *vlantap = netdev_priv(dev);
 
 	netdev_rx_handler_unregister(dev);
-	tap_del_queues(&vlantap->tap);
+	tap_del(&vlantap->tap);
 	macvlan_dellink(dev, head);
 }
 
diff --git a/drivers/net/tap.c b/drivers/net/tap.c
index 25c60ff2d3f2..15f056d7f632 100644
--- a/drivers/net/tap.c
+++ b/drivers/net/tap.c
@@ -49,6 +49,10 @@ struct major_info {
 	struct list_head next;
 };
 
+struct tap_skb_cb {
+	struct virtio_net_hash hash;
+};
+
 #define GOODCOPY_LEN 128
 
 static const struct proto_ops tap_socket_ops;
@@ -179,9 +183,20 @@ static void tap_put_queue(struct tap_queue *q)
 	sock_put(&q->sk);
 }
 
+static struct tap_skb_cb *tap_skb_cb(const struct sk_buff *skb)
+{
+	BUILD_BUG_ON(sizeof(skb->cb) < sizeof(struct tap_skb_cb));
+	return (struct tap_skb_cb *)skb->cb;
+}
+
+static struct virtio_net_hash *tap_add_hash(struct sk_buff *skb)
+{
+	return &tap_skb_cb(skb)->hash;
+}
+
 static const struct virtio_net_hash *tap_find_hash(const struct sk_buff *skb)
 {
-	return NULL;
+	return &tap_skb_cb(skb)->hash;
 }
 
 /*
@@ -194,6 +209,7 @@ static const struct virtio_net_hash *tap_find_hash(const struct sk_buff *skb)
 static struct tap_queue *tap_get_queue(struct tap_dev *tap,
 				       struct sk_buff *skb)
 {
+	struct flow_keys_basic keys_basic;
 	struct tap_queue *queue = NULL;
 	/* Access to taps array is protected by rcu, but access to numvtaps
 	 * isn't. Below we use it to lookup a queue, but treat it as a hint
@@ -201,17 +217,47 @@ static struct tap_queue *tap_get_queue(struct tap_dev *tap,
 	 * racing against queue removal.
 	 */
 	int numvtaps = READ_ONCE(tap->numvtaps);
+	struct tun_vnet_hash *vnet_hash = rcu_dereference(tap->vnet_hash);
 	__u32 rxq;
 
+	*tap_skb_cb(skb) = (struct tap_skb_cb) {
+		.hash = { .report = VIRTIO_NET_HASH_REPORT_NONE }
+	};
+
 	if (!numvtaps)
 		goto out;
 
 	if (numvtaps == 1)
 		goto single;
 
+	if (vnet_hash) {
+		if (vnet_hash->rss) {
+			rxq = tun_vnet_rss_select_queue(numvtaps, vnet_hash, skb, tap_add_hash);
+			queue = rcu_dereference(tap->taps[rxq]);
+			goto out;
+		}
+
+		if (!skb->l4_hash && !skb->sw_hash) {
+			struct flow_keys keys;
+
+			skb_flow_dissect_flow_keys(skb, &keys, FLOW_DISSECTOR_F_STOP_AT_FLOW_LABEL);
+			rxq = flow_hash_from_keys(&keys);
+			keys_basic = (struct flow_keys_basic) {
+				.control = keys.control,
+				.basic = keys.basic
+			};
+		} else {
+			skb_flow_dissect_flow_keys_basic(NULL, skb, &keys_basic, NULL, 0, 0, 0,
+							 FLOW_DISSECTOR_F_STOP_AT_FLOW_LABEL);
+			rxq = skb->hash;
+		}
+	} else {
+		rxq = skb_get_hash(skb);
+	}
+
 	/* Check if we can use flow to select a queue */
-	rxq = skb_get_hash(skb);
 	if (rxq) {
+		tun_vnet_hash_report(vnet_hash, skb, &keys_basic, rxq, tap_add_hash);
 		queue = rcu_dereference(tap->taps[rxq % numvtaps]);
 		goto out;
 	}
@@ -234,10 +280,10 @@ static struct tap_queue *tap_get_queue(struct tap_dev *tap,
 
 /*
  * The net_device is going away, give up the reference
- * that it holds on all queues and safely set the pointer
- * from the queues to NULL.
+ * that it holds on all queues, safely set the pointer
+ * from the queues to NULL, and free vnet_hash.
  */
-void tap_del_queues(struct tap_dev *tap)
+void tap_del(struct tap_dev *tap)
 {
 	struct tap_queue *q, *tmp;
 
@@ -254,8 +300,10 @@ void tap_del_queues(struct tap_dev *tap)
 	BUG_ON(tap->numqueues);
 	/* guarantee that any future tap_set_queue will fail */
 	tap->numvtaps = MAX_TAP_QUEUES;
+
+	kfree_rcu_mightsleep(rtnl_dereference(tap->vnet_hash));
 }
-EXPORT_SYMBOL_GPL(tap_del_queues);
+EXPORT_SYMBOL_GPL(tap_del);
 
 rx_handler_result_t tap_handle_frame(struct sk_buff **pskb)
 {
@@ -998,6 +1046,18 @@ static long tap_ioctl(struct file *file, unsigned int cmd,
 		rtnl_unlock();
 		return ret;
 
+	case TUNGETVNETHASHTYPES:
+		return tun_vnet_ioctl_gethashtypes(argp);
+
+	case TUNSETVNETREPORTINGAUTOMQ:
+	case TUNSETVNETREPORTINGRSS:
+	case TUNSETVNETRSS:
+		rtnl_lock();
+		tap = rtnl_dereference(q->tap);
+		ret = tap ? tun_vnet_ioctl_sethash(&tap->vnet_hash, cmd, argp) : -EBADFD;
+		rtnl_unlock();
+		return ret;
+
 	case SIOCGIFHWADDR:
 		rtnl_lock();
 		tap = tap_get_tap_dev(q);
diff --git a/include/linux/if_tap.h b/include/linux/if_tap.h
index 553552fa635c..6647a7a9e956 100644
--- a/include/linux/if_tap.h
+++ b/include/linux/if_tap.h
@@ -31,6 +31,7 @@ static inline struct ptr_ring *tap_get_ptr_ring(struct file *f)
 #define MAX_TAP_QUEUES 256
 
 struct tap_queue;
+struct tun_vnet_hash;
 
 struct tap_dev {
 	struct net_device	*dev;
@@ -43,6 +44,7 @@ struct tap_dev {
 	int			numqueues;
 	netdev_features_t	tap_features;
 	int			minor;
+	struct tun_vnet_hash __rcu *vnet_hash;
 
 	void (*update_features)(struct tap_dev *tap, netdev_features_t features);
 	void (*count_tx_dropped)(struct tap_dev *tap);
@@ -74,7 +76,7 @@ struct tap_queue {
 };
 
 rx_handler_result_t tap_handle_frame(struct sk_buff **pskb);
-void tap_del_queues(struct tap_dev *tap);
+void tap_del(struct tap_dev *tap);
 int tap_get_minor(dev_t major, struct tap_dev *tap);
 void tap_free_minor(dev_t major, struct tap_dev *tap);
 int tap_queue_resize(struct tap_dev *tap);

-- 
2.49.0


  parent reply	other threads:[~2025-05-30  4:50 UTC|newest]

Thread overview: 39+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2025-05-30  4:50 [PATCH net-next v12 00/10] tun: Introduce virtio-net hashing feature Akihiko Odaki
2025-05-30  4:50 ` [PATCH net-next v12 01/10] virtio_net: Add functions for hashing Akihiko Odaki
2025-06-03  3:19   ` Jason Wang
2025-06-03  5:31     ` Akihiko Odaki
2025-06-04  1:18       ` Jason Wang
2025-06-04  7:20         ` Akihiko Odaki
2025-06-05  1:53           ` Jason Wang
2025-06-05  7:57             ` Akihiko Odaki
2025-06-06  0:48               ` Jason Wang
2025-06-06  9:10                 ` Akihiko Odaki
2025-06-17  3:28                   ` Jason Wang
2025-06-21  5:50                     ` Akihiko Odaki
2025-06-22 17:40                     ` Yuri Benditovich
2025-06-23  8:07                       ` Jason Wang
2025-06-23 14:28                         ` Yuri Benditovich
2025-06-24  0:49                           ` Jason Wang
2025-05-30  4:50 ` [PATCH net-next v12 02/10] net: flow_dissector: Export flow_keys_dissector_symmetric Akihiko Odaki
2025-06-03  3:20   ` Jason Wang
2025-05-30  4:50 ` [PATCH net-next v12 03/10] tun: Allow steering eBPF program to fall back Akihiko Odaki
2025-06-04  1:27   ` Jason Wang
2025-06-04  7:24     ` Akihiko Odaki
2025-06-05  1:55       ` Jason Wang
2025-05-30  4:50 ` [PATCH net-next v12 04/10] tun: Add common virtio-net hash feature code Akihiko Odaki
2025-06-04  1:53   ` Jason Wang
2025-06-04  8:42     ` Akihiko Odaki
2025-06-05  2:46       ` Jason Wang
2025-06-05  8:18         ` Akihiko Odaki
2025-06-06  1:01           ` Jason Wang
2025-06-06  9:26             ` Akihiko Odaki
2025-06-17  3:39               ` Jason Wang
2025-06-19 17:01                 ` Akihiko Odaki
2025-06-23  7:59                   ` Jason Wang
2025-05-30  4:50 ` [PATCH net-next v12 05/10] tun: Introduce virtio-net hash feature Akihiko Odaki
2025-05-30  4:50 ` Akihiko Odaki [this message]
2025-05-30  4:50 ` [PATCH net-next v12 07/10] selftest: tun: Test vnet ioctls without device Akihiko Odaki
2025-05-30  4:50 ` [PATCH net-next v12 08/10] selftest: tun: Add tests for virtio-net hashing Akihiko Odaki
2025-05-30  4:50 ` [PATCH net-next v12 09/10] selftest: tap: Add tests for virtio-net ioctls Akihiko Odaki
2025-05-30  4:50 ` [PATCH net-next v12 10/10] vhost/net: Support VIRTIO_NET_F_HASH_REPORT Akihiko Odaki
2025-07-16  8:55 ` [PATCH net-next v12 00/10] tun: Introduce virtio-net hashing feature Michael S. Tsirkin

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20250530-rss-v12-6-95d8b348de91@daynix.com \
    --to=akihiko.odaki@daynix.com \
    --cc=andrew@daynix.com \
    --cc=corbet@lwn.net \
    --cc=davem@davemloft.net \
    --cc=edumazet@google.com \
    --cc=gur.stavi@huawei.com \
    --cc=horms@kernel.org \
    --cc=jasowang@redhat.com \
    --cc=kuba@kernel.org \
    --cc=kvm@vger.kernel.org \
    --cc=leiyang@redhat.com \
    --cc=linux-doc@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-kselftest@vger.kernel.org \
    --cc=mst@redhat.com \
    --cc=netdev@vger.kernel.org \
    --cc=pabeni@redhat.com \
    --cc=shuah@kernel.org \
    --cc=stephen@networkplumber.org \
    --cc=virtualization@lists.linux-foundation.org \
    --cc=willemdebruijn.kernel@gmail.com \
    --cc=xuanzhuo@linux.alibaba.com \
    --cc=yuri.benditovich@daynix.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).