qemu-devel.nongnu.org archive mirror
 help / color / mirror / Atom feed
From: "Michael S. Tsirkin" <mst@redhat.com>
To: qemu-devel@nongnu.org
Cc: Peter Maydell <peter.maydell@linaro.org>,
	Paolo Abeni <pabeni@redhat.com>,
	Akihiko Odaki <odaki@rsg.ci.i.u-tokyo.ac.jp>,
	Jason Wang <jasowang@redhat.com>, Lei Yang <leiyang@redhat.com>,
	Stefano Garzarella <sgarzare@redhat.com>
Subject: [PULL 14/75] net: implement UDP tunnel features offloading
Date: Sun, 5 Oct 2025 15:16:32 -0400	[thread overview]
Message-ID: <a5289563ad74a2a37e8d2101d82935454c71fef4.1759691708.git.mst@redhat.com> (raw)
In-Reply-To: <cover.1759691708.git.mst@redhat.com>

From: Paolo Abeni <pabeni@redhat.com>

When any host or guest GSO over UDP tunnel offload is enabled the
virtio net header includes the additional tunnel-related fields,
update the size accordingly.

Push the GSO over UDP tunnel offloads all the way down to the tap
device extending the newly introduced NetFeatures struct, and
eventually enable the associated features.

As per virtio specification, to convert features bit to offload bit,
map the extended features into the reserved range.

Finally, make the vhost backend aware of the exact header layout, to
copy it correctly. The tunnel-related field are present if either
the guest or the host negotiated any UDP tunnel related feature:
add them to the kernel supported features list, to allow qemu
transfer to the backend the needed information.

Reviewed-by: Akihiko Odaki <odaki@rsg.ci.i.u-tokyo.ac.jp>
Acked-by: Jason Wang <jasowang@redhat.com>
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
Tested-by: Lei Yang <leiyang@redhat.com>
Acked-by: Stefano Garzarella <sgarzare@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Message-ID: <093b4bc68368046bffbcab2202227632d6e4e83b.1758549625.git.pabeni@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 include/net/net.h   |  2 ++
 hw/net/virtio-net.c | 34 ++++++++++++++++++++++++++--------
 net/net.c           |  3 ++-
 net/tap-linux.c     |  6 ++++++
 net/tap.c           |  2 ++
 5 files changed, 38 insertions(+), 9 deletions(-)

diff --git a/include/net/net.h b/include/net/net.h
index 9a9084690d..72b476ee1d 100644
--- a/include/net/net.h
+++ b/include/net/net.h
@@ -43,6 +43,8 @@ typedef struct NetOffloads {
     bool ufo;
     bool uso4;
     bool uso6;
+    bool tnl;
+    bool tnl_csum;
 } NetOffloads;
 
 #define DEFINE_NIC_PROPERTIES(_state, _conf)                            \
diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c
index f8e2b4823e..33116712eb 100644
--- a/hw/net/virtio-net.c
+++ b/hw/net/virtio-net.c
@@ -103,6 +103,12 @@
 #define VIRTIO_NET_F2O_SHIFT          (VIRTIO_NET_OFFLOAD_MAP_MIN - \
                                        VIRTIO_NET_FEATURES_MAP_MIN + 64)
 
+static bool virtio_has_tunnel_hdr(const uint64_t *features)
+{
+    return virtio_has_feature_ex(features, VIRTIO_NET_F_HOST_UDP_TUNNEL_GSO) ||
+           virtio_has_feature_ex(features, VIRTIO_NET_F_GUEST_UDP_TUNNEL_GSO);
+}
+
 static const VirtIOFeature feature_sizes[] = {
     {.flags = 1ULL << VIRTIO_NET_F_MAC,
      .end = endof(struct virtio_net_config, mac)},
@@ -659,7 +665,8 @@ static bool peer_has_tunnel(VirtIONet *n)
 }
 
 static void virtio_net_set_mrg_rx_bufs(VirtIONet *n, int mergeable_rx_bufs,
-                                       int version_1, int hash_report)
+                                       int version_1, int hash_report,
+                                       int tunnel)
 {
     int i;
     NetClientState *nc;
@@ -667,9 +674,11 @@ static void virtio_net_set_mrg_rx_bufs(VirtIONet *n, int mergeable_rx_bufs,
     n->mergeable_rx_bufs = mergeable_rx_bufs;
 
     if (version_1) {
-        n->guest_hdr_len = hash_report ?
-            sizeof(struct virtio_net_hdr_v1_hash) :
-            sizeof(struct virtio_net_hdr_mrg_rxbuf);
+        n->guest_hdr_len = tunnel ?
+            sizeof(struct virtio_net_hdr_v1_hash_tunnel) :
+            (hash_report ?
+             sizeof(struct virtio_net_hdr_v1_hash) :
+             sizeof(struct virtio_net_hdr_mrg_rxbuf));
         n->rss_data.populate_hash = !!hash_report;
     } else {
         n->guest_hdr_len = n->mergeable_rx_bufs ?
@@ -803,6 +812,10 @@ static void virtio_net_apply_guest_offloads(VirtIONet *n)
        .ufo  = !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_UFO)),
        .uso4 = !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_USO4)),
        .uso6 = !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_USO6)),
+       .tnl  = !!(n->curr_guest_offloads &
+                  (1ULL << VIRTIO_NET_F_GUEST_UDP_TUNNEL_GSO_MAPPED)),
+       .tnl_csum = !!(n->curr_guest_offloads &
+                      (1ULL << VIRTIO_NET_F_GUEST_UDP_TUNNEL_GSO_CSUM_MAPPED)),
     };
 
     qemu_set_offload(qemu_get_queue(n->nic)->peer, &ol);
@@ -824,7 +837,9 @@ virtio_net_guest_offloads_by_features(const uint64_t *features)
         (1ULL << VIRTIO_NET_F_GUEST_ECN)  |
         (1ULL << VIRTIO_NET_F_GUEST_UFO)  |
         (1ULL << VIRTIO_NET_F_GUEST_USO4) |
-        (1ULL << VIRTIO_NET_F_GUEST_USO6);
+        (1ULL << VIRTIO_NET_F_GUEST_USO6) |
+        (1ULL << VIRTIO_NET_F_GUEST_UDP_TUNNEL_GSO_MAPPED) |
+        (1ULL << VIRTIO_NET_F_GUEST_UDP_TUNNEL_GSO_CSUM_MAPPED);
 
     return guest_offloads_mask & virtio_net_features_to_offload(features);
 }
@@ -937,7 +952,8 @@ static void virtio_net_set_features(VirtIODevice *vdev,
                                virtio_has_feature_ex(features,
                                                   VIRTIO_F_VERSION_1),
                                virtio_has_feature_ex(features,
-                                                  VIRTIO_NET_F_HASH_REPORT));
+                                                  VIRTIO_NET_F_HASH_REPORT),
+                               virtio_has_tunnel_hdr(features));
 
     n->rsc4_enabled = virtio_has_feature_ex(features, VIRTIO_NET_F_RSC_EXT) &&
         virtio_has_feature_ex(features, VIRTIO_NET_F_GUEST_TSO4);
@@ -3169,13 +3185,15 @@ static int virtio_net_post_load_device(void *opaque, int version_id)
     VirtIONet *n = opaque;
     VirtIODevice *vdev = VIRTIO_DEVICE(n);
     int i, link_down;
+    bool has_tunnel_hdr = virtio_has_tunnel_hdr(vdev->guest_features_ex);
 
     trace_virtio_net_post_load_device();
     virtio_net_set_mrg_rx_bufs(n, n->mergeable_rx_bufs,
                                virtio_vdev_has_feature(vdev,
                                                        VIRTIO_F_VERSION_1),
                                virtio_vdev_has_feature(vdev,
-                                                       VIRTIO_NET_F_HASH_REPORT));
+                                                      VIRTIO_NET_F_HASH_REPORT),
+                               has_tunnel_hdr);
 
     /* MAC_TABLE_ENTRIES may be different from the saved image */
     if (n->mac_table.in_use > MAC_TABLE_ENTRIES) {
@@ -3995,7 +4013,7 @@ static void virtio_net_device_realize(DeviceState *dev, Error **errp)
 
     n->vqs[0].tx_waiting = 0;
     n->tx_burst = n->net_conf.txburst;
-    virtio_net_set_mrg_rx_bufs(n, 0, 0, 0);
+    virtio_net_set_mrg_rx_bufs(n, 0, 0, 0, 0);
     n->promisc = 1; /* for compatibility */
 
     n->mac_table.macs = g_malloc0(MAC_TABLE_ENTRIES * ETH_ALEN);
diff --git a/net/net.c b/net/net.c
index 9536184a0c..27e0d27807 100644
--- a/net/net.c
+++ b/net/net.c
@@ -575,7 +575,8 @@ void qemu_set_vnet_hdr_len(NetClientState *nc, int len)
 
     assert(len == sizeof(struct virtio_net_hdr_mrg_rxbuf) ||
            len == sizeof(struct virtio_net_hdr) ||
-           len == sizeof(struct virtio_net_hdr_v1_hash));
+           len == sizeof(struct virtio_net_hdr_v1_hash) ||
+           len == sizeof(struct virtio_net_hdr_v1_hash_tunnel));
 
     nc->vnet_hdr_len = len;
     nc->info->set_vnet_hdr_len(nc, len);
diff --git a/net/tap-linux.c b/net/tap-linux.c
index 98b0ae9602..2a90b58467 100644
--- a/net/tap-linux.c
+++ b/net/tap-linux.c
@@ -284,6 +284,12 @@ void tap_fd_set_offload(int fd, const NetOffloads *ol)
         if (ol->uso6) {
             offload |= TUN_F_USO6;
         }
+        if (ol->tnl) {
+            offload |= TUN_F_UDP_TUNNEL_GSO;
+        }
+        if (ol->tnl_csum) {
+            offload |= TUN_F_UDP_TUNNEL_GSO_CSUM;
+        }
     }
 
     if (ioctl(fd, TUNSETOFFLOAD, offload) != 0) {
diff --git a/net/tap.c b/net/tap.c
index 5124372316..abe3b2d036 100644
--- a/net/tap.c
+++ b/net/tap.c
@@ -62,6 +62,8 @@ static const int kernel_feature_bits[] = {
     VIRTIO_F_NOTIFICATION_DATA,
     VIRTIO_NET_F_RSC_EXT,
     VIRTIO_NET_F_HASH_REPORT,
+    VIRTIO_NET_F_GUEST_UDP_TUNNEL_GSO,
+    VIRTIO_NET_F_HOST_UDP_TUNNEL_GSO,
     VHOST_INVALID_FEATURE_BIT
 };
 
-- 
MST



  parent reply	other threads:[~2025-10-05 19:18 UTC|newest]

Thread overview: 84+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2025-10-05 19:16 [PULL 00/75] virtio,pci,pc: features, fixes Michael S. Tsirkin
2025-10-05 19:16 ` [PULL 01/75] net: bundle all offloads in a single struct Michael S. Tsirkin
2025-10-05 19:16 ` [PULL 02/75] linux-headers: deal with counted_by annotation Michael S. Tsirkin
2025-10-05 19:16 ` [PULL 03/75] linux-headers: Update to Linux v6.17-rc1 Michael S. Tsirkin
2025-10-05 19:16 ` [PULL 04/75] virtio: introduce extended features type Michael S. Tsirkin
2025-10-05 19:16 ` [PULL 05/75] virtio: serialize extended features state Michael S. Tsirkin
2025-10-05 19:16 ` [PULL 06/75] virtio: add support for negotiating extended features Michael S. Tsirkin
2025-10-05 19:16 ` [PULL 07/75] virtio-pci: implement support for " Michael S. Tsirkin
2025-10-05 19:16 ` [PULL 08/75] vhost: add support for negotiating " Michael S. Tsirkin
2025-10-05 19:16 ` [PULL 09/75] qmp: update virtio features map to support " Michael S. Tsirkin
2025-10-05 19:16 ` [PULL 10/75] vhost-backend: implement extended features support Michael S. Tsirkin
2025-10-05 19:16 ` [PULL 11/75] vhost-net: " Michael S. Tsirkin
2025-10-05 19:16 ` [PULL 12/75] virtio-net: " Michael S. Tsirkin
2025-10-05 19:16 ` [PULL 13/75] net: implement tunnel probing Michael S. Tsirkin
2025-10-05 19:16 ` Michael S. Tsirkin [this message]
2025-10-05 19:16 ` [PULL 15/75] Revert "hw/acpi/ghes: Make ghes_record_cper_errors() static" Michael S. Tsirkin
2025-10-05 19:16 ` [PULL 16/75] acpi/ghes: Cleanup the code which gets ghes ged state Michael S. Tsirkin
2025-10-05 19:16 ` [PULL 17/75] acpi/ghes: prepare to change the way HEST offsets are calculated Michael S. Tsirkin
2025-10-05 19:16 ` [PULL 18/75] acpi/ghes: add a firmware file with HEST address Michael S. Tsirkin
2025-10-05 19:16 ` [PULL 19/75] acpi/ghes: Use HEST table offsets when preparing GHES records Michael S. Tsirkin
2025-10-05 19:16 ` [PULL 20/75] acpi/ghes: don't hard-code the number of sources for HEST table Michael S. Tsirkin
2025-10-05 19:16 ` [PULL 21/75] acpi/ghes: add a notifier to notify when error data is ready Michael S. Tsirkin
2025-10-05 19:16 ` [PULL 22/75] acpi/generic_event_device: Update GHES migration to cover hest addr Michael S. Tsirkin
2025-10-05 19:16 ` [PULL 23/75] acpi/generic_event_device: add logic to detect if HEST addr is available Michael S. Tsirkin
2025-10-05 19:16 ` [PULL 24/75] acpi/generic_event_device: add an APEI error device Michael S. Tsirkin
2025-10-05 19:16 ` [PULL 25/75] tests/acpi: virt: allow acpi table changes at DSDT and HEST tables Michael S. Tsirkin
2025-10-05 19:16 ` [PULL 26/75] arm/virt: Wire up a GED error device for ACPI / GHES Michael S. Tsirkin
2025-10-05 19:17 ` [PULL 27/75] qapi/acpi-hest: add an interface to do generic CPER error injection Michael S. Tsirkin
2025-10-05 19:17 ` [PULL 28/75] acpi/generic_event_device.c: enable use_hest_addr for QEMU 10.x Michael S. Tsirkin
2025-10-05 19:17 ` [PULL 29/75] tests/acpi: virt: update HEST and DSDT tables Michael S. Tsirkin
2025-10-05 19:17 ` [PULL 30/75] docs: hest: add new "etc/acpi_table_hest_addr" and update workflow Michael S. Tsirkin
2025-10-05 19:17 ` [PULL 31/75] scripts/ghes_inject: add a script to generate GHES error inject Michael S. Tsirkin
2025-10-05 19:17 ` [PULL 32/75] hw/smbios: allow clearing the VM bit in SMBIOS table 0 Michael S. Tsirkin
2025-10-05 19:17 ` [PULL 33/75] hw/i386/pc: Avoid overlap between CXL window and PCI 64bit BARs in QEMU Michael S. Tsirkin
2025-10-06 17:08   ` Michael Tokarev
2025-10-05 19:17 ` [PULL 34/75] pcie_sriov: Fix broken MMIO accesses from SR-IOV VFs Michael S. Tsirkin
2025-10-05 19:17 ` [PULL 35/75] hw/virtio: rename vhost-user-device and make user creatable Michael S. Tsirkin
2025-10-05 19:17 ` [PULL 36/75] smbios: cap DIMM size to 2Tb as workaround for broken Windows Michael S. Tsirkin
2025-10-05 19:17 ` [PULL 37/75] pcie: Add a way to get the outstanding page request allocation (pri) from the config space Michael S. Tsirkin
2025-10-05 19:17 ` [PULL 38/75] intel_iommu: Bypass barrier wait descriptor Michael S. Tsirkin
2025-10-05 19:17 ` [PULL 39/75] intel_iommu: Declare PRI constants and structures Michael S. Tsirkin
2025-10-05 19:17 ` [PULL 40/75] intel_iommu: Declare registers for PRI Michael S. Tsirkin
2025-10-05 19:17 ` [PULL 41/75] intel_iommu: Add PRI operations support Michael S. Tsirkin
2025-10-05 19:17 ` [PULL 42/75] x86: ich9: fix default value of 'No Reboot' bit in GCS Michael S. Tsirkin
2025-10-05 19:17 ` [PULL 43/75] vhost: use virtio_config_get_guest_notifier() Michael S. Tsirkin
2025-10-05 19:17 ` [PULL 44/75] virtio: unify virtio_notify_irqfd() and virtio_notify() Michael S. Tsirkin
2025-10-05 19:17 ` [PULL 45/75] virtio: support irqfd in virtio_notify_config() Michael S. Tsirkin
2025-10-05 19:17 ` [PULL 46/75] tests/libqos: extract qvirtqueue_set_avail_idx() Michael S. Tsirkin
2025-10-05 19:17 ` [PULL 47/75] tests/virtio-scsi: add a virtio_error() IOThread test Michael S. Tsirkin
2025-10-05 19:17 ` [PULL 48/75] pcie_sriov: make pcie_sriov_pf_exit() safe on non-SR-IOV devices Michael S. Tsirkin
2025-10-05 19:17 ` [PULL 49/75] virtio: Add function name to error messages Michael S. Tsirkin
2025-10-05 20:13   ` Alessandro Ratti
2025-10-05 20:24     ` Michael S. Tsirkin
2025-10-08 10:01     ` Michael S. Tsirkin
2025-10-08 16:53       ` Alessandro Ratti
2025-10-05 20:19   ` [PULL v2 75/75] virtio: improve virtqueue mapping " Michael S. Tsirkin
2025-10-05 19:17 ` [PULL 50/75] memory: Adjust event ranges to fit within notifier boundaries Michael S. Tsirkin
2025-10-05 19:17 ` [PULL 51/75] amd_iommu: Document '-device amd-iommu' common options Michael S. Tsirkin
2025-10-05 19:17 ` [PULL 52/75] amd_iommu: Reorder device and page table helpers Michael S. Tsirkin
2025-10-05 19:17 ` [PULL 53/75] amd_iommu: Helper to decode size of page invalidation command Michael S. Tsirkin
2025-10-05 19:18 ` [PULL 54/75] amd_iommu: Add helper function to extract the DTE Michael S. Tsirkin
2025-10-05 19:18 ` [PULL 55/75] amd_iommu: Return an error when unable to read PTE from guest memory Michael S. Tsirkin
2025-10-05 19:18 ` [PULL 56/75] amd_iommu: Add helpers to walk AMD v1 Page Table format Michael S. Tsirkin
2025-10-05 19:18 ` [PULL 57/75] amd_iommu: Add a page walker to sync shadow page tables on invalidation Michael S. Tsirkin
2025-10-05 19:18 ` [PULL 58/75] amd_iommu: Add basic structure to support IOMMU notifier updates Michael S. Tsirkin
2025-10-05 19:18 ` [PULL 59/75] amd_iommu: Sync shadow page tables on page invalidation Michael S. Tsirkin
2025-10-05 19:18 ` [PULL 60/75] amd_iommu: Use iova_tree records to determine large page size on UNMAP Michael S. Tsirkin
2025-10-05 19:18 ` [PULL 61/75] amd_iommu: Unmap all address spaces under the AMD IOMMU on reset Michael S. Tsirkin
2025-10-05 19:18 ` [PULL 62/75] amd_iommu: Add replay callback Michael S. Tsirkin
2025-10-05 19:18 ` [PULL 63/75] amd_iommu: Invalidate address translations on INVALIDATE_IOMMU_ALL Michael S. Tsirkin
2025-10-05 19:18 ` [PULL 64/75] amd_iommu: Toggle memory regions based on address translation mode Michael S. Tsirkin
2025-10-05 19:18 ` [PULL 65/75] amd_iommu: Set all address spaces to use passthrough mode on reset Michael S. Tsirkin
2025-10-05 19:18 ` [PULL 66/75] amd_iommu: Add dma-remap property to AMD vIOMMU device Michael S. Tsirkin
2025-10-05 19:18 ` [PULL 67/75] amd_iommu: Toggle address translation mode on devtab entry invalidation Michael S. Tsirkin
2025-10-05 19:18 ` [PULL 68/75] amd_iommu: Do not assume passthrough translation when DTE[TV]=0 Michael S. Tsirkin
2025-10-05 19:18 ` [PULL 69/75] amd_iommu: Refactor amdvi_page_walk() to use common code for page walk Michael S. Tsirkin
2025-10-05 19:18 ` [PULL 70/75] intel-iommu: Move dma_translation to x86-iommu Michael S. Tsirkin
2025-10-05 19:18 ` [PULL 71/75] amd_iommu: HATDis/HATS=11 support Michael S. Tsirkin
2025-10-05 19:18 ` [PULL 72/75] vdpa-dev: add get_vhost() callback for vhost-vdpa device Michael S. Tsirkin
2025-10-05 19:18 ` [PULL 73/75] intel_iommu: Enable Enhanced Set Root Table Pointer Support (ESRTPS) Michael S. Tsirkin
2025-10-05 19:18 ` [PULL 74/75] intel_iommu: Simplify caching mode check with VFIO device Michael S. Tsirkin
2025-10-05 19:18 ` [PULL 75/75] pci: Fix wrong parameter passing to pci_device_get_iommu_bus_devfn() Michael S. Tsirkin
2025-10-05 20:20 ` [PULL 00/75] virtio,pci,pc: features, fixes Michael S. Tsirkin
2025-10-06 21:59 ` Richard Henderson

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=a5289563ad74a2a37e8d2101d82935454c71fef4.1759691708.git.mst@redhat.com \
    --to=mst@redhat.com \
    --cc=jasowang@redhat.com \
    --cc=leiyang@redhat.com \
    --cc=odaki@rsg.ci.i.u-tokyo.ac.jp \
    --cc=pabeni@redhat.com \
    --cc=peter.maydell@linaro.org \
    --cc=qemu-devel@nongnu.org \
    --cc=sgarzare@redhat.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).