qemu-devel.nongnu.org archive mirror
 help / color / mirror / Atom feed
From: Zhenzhong Duan <zhenzhong.duan@intel.com>
To: qemu-devel@nongnu.org
Cc: alex@shazbot.org, clg@redhat.com, eric.auger@redhat.com,
	mst@redhat.com, jasowang@redhat.com, peterx@redhat.com,
	ddutile@redhat.com, jgg@nvidia.com, nicolinc@nvidia.com,
	skolothumtho@nvidia.com, joao.m.martins@oracle.com,
	clement.mathieu--drif@eviden.com, kevin.tian@intel.com,
	yi.l.liu@intel.com, chao.p.peng@intel.com,
	Zhenzhong Duan <zhenzhong.duan@intel.com>
Subject: [PATCH v8 17/23] vfio/listener: Bypass readonly region for dirty tracking
Date: Mon, 17 Nov 2025 04:37:20 -0500	[thread overview]
Message-ID: <20251117093729.1121324-18-zhenzhong.duan@intel.com> (raw)
In-Reply-To: <20251117093729.1121324-1-zhenzhong.duan@intel.com>

When doing ditry tracking or calculating dirty tracking range, readonly
regions can be bypassed, because corresponding DMA mappings are readonly
and never become dirty.

This can optimize dirty tracking a bit for passthrough device.

Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
---
 hw/vfio/listener.c   | 46 +++++++++++++++++++++++++++++++++-----------
 hw/vfio/trace-events |  1 +
 2 files changed, 36 insertions(+), 11 deletions(-)

diff --git a/hw/vfio/listener.c b/hw/vfio/listener.c
index 3b48f6796c..ca2377d860 100644
--- a/hw/vfio/listener.c
+++ b/hw/vfio/listener.c
@@ -76,8 +76,13 @@ static bool vfio_log_sync_needed(const VFIOContainer *bcontainer)
     return true;
 }
 
-static bool vfio_listener_skipped_section(MemoryRegionSection *section)
+static bool vfio_listener_skipped_section(MemoryRegionSection *section,
+                                          bool bypass_ro)
 {
+    if (bypass_ro && section->readonly) {
+        return true;
+    }
+
     return (!memory_region_is_ram(section->mr) &&
             !memory_region_is_iommu(section->mr)) ||
            memory_region_is_protected(section->mr) ||
@@ -368,9 +373,9 @@ static bool vfio_known_safe_misalignment(MemoryRegionSection *section)
 }
 
 static bool vfio_listener_valid_section(MemoryRegionSection *section,
-                                        const char *name)
+                                        bool bypass_ro, const char *name)
 {
-    if (vfio_listener_skipped_section(section)) {
+    if (vfio_listener_skipped_section(section, bypass_ro)) {
         trace_vfio_listener_region_skip(name,
                 section->offset_within_address_space,
                 section->offset_within_address_space +
@@ -497,7 +502,7 @@ void vfio_container_region_add(VFIOContainer *bcontainer,
     int ret;
     Error *err = NULL;
 
-    if (!vfio_listener_valid_section(section, "region_add")) {
+    if (!vfio_listener_valid_section(section, false, "region_add")) {
         return;
     }
 
@@ -663,7 +668,7 @@ static void vfio_listener_region_del(MemoryListener *listener,
     int ret;
     bool try_unmap = true;
 
-    if (!vfio_listener_valid_section(section, "region_del")) {
+    if (!vfio_listener_valid_section(section, false, "region_del")) {
         return;
     }
 
@@ -722,11 +727,11 @@ static void vfio_listener_region_del(MemoryListener *listener,
         }
 
         /*
-         * Fake an IOTLB entry for identity mapping which is needed by dirty
-         * tracking. In fact, in unmap_bitmap, only translated_addr field is
-         * used to set dirty bitmap.
+         * Fake an IOTLB entry for writable identity mapping which is needed
+         * by dirty tracking. In fact, in unmap_bitmap, only translated_addr
+         * field is used to set dirty bitmap.
          */
-        if (!memory_region_is_iommu(section->mr)) {
+        if (!memory_region_is_iommu(section->mr) && !section->readonly) {
             entry.iova = iova;
             entry.translated_addr = iova;
             iotlb = &entry;
@@ -834,7 +839,8 @@ static void vfio_dirty_tracking_update(MemoryListener *listener,
         container_of(listener, VFIODirtyRangesListener, listener);
     hwaddr iova, end;
 
-    if (!vfio_listener_valid_section(section, "tracking_update") ||
+    /* Bypass readonly section as it never becomes dirty */
+    if (!vfio_listener_valid_section(section, true, "tracking_update") ||
         !vfio_get_section_iova_range(dirty->bcontainer, section,
                                      &iova, &end, NULL)) {
         return;
@@ -1093,6 +1099,19 @@ static void vfio_iommu_map_dirty_notify(IOMMUNotifier *n, IOMMUTLBEntry *iotlb)
     if (!mr) {
         goto out_unlock;
     }
+
+    /*
+     * The mapping is readonly when either it's a readonly mapping in guest
+     * or mapped target is readonly, bypass it for dirty tracking as it
+     * never becomes dirty.
+     */
+    if (!(iotlb->perm & IOMMU_WO) || mr->readonly) {
+        trace_vfio_iommu_map_dirty_notify_skip_ro(iova,
+                                                  iova + iotlb->addr_mask);
+        rcu_read_unlock();
+        return;
+    }
+
     translated_addr = memory_region_get_ram_addr(mr) + xlat;
 
     ret = vfio_container_query_dirty_bitmap(bcontainer, iova, iotlb->addr_mask + 1,
@@ -1228,7 +1247,12 @@ static void vfio_listener_log_sync(MemoryListener *listener,
     int ret;
     Error *local_err = NULL;
 
-    if (vfio_listener_skipped_section(section)) {
+    /*
+     * Bypass readonly section as it never becomes dirty, iommu memory section
+     * is RW and never bypassed. The readonly mappings in iommu MR are bypassed
+     * in vfio_iommu_map_dirty_notify().
+     */
+    if (vfio_listener_skipped_section(section, true)) {
         return;
     }
 
diff --git a/hw/vfio/trace-events b/hw/vfio/trace-events
index 3c62bab764..180e3d526b 100644
--- a/hw/vfio/trace-events
+++ b/hw/vfio/trace-events
@@ -103,6 +103,7 @@ vfio_listener_region_del(uint64_t start, uint64_t end) "region_del 0x%"PRIx64" -
 vfio_device_dirty_tracking_update(uint64_t start, uint64_t end, uint64_t min, uint64_t max) "section 0x%"PRIx64" - 0x%"PRIx64" -> update [0x%"PRIx64" - 0x%"PRIx64"]"
 vfio_device_dirty_tracking_start(int nr_ranges, uint64_t min32, uint64_t max32, uint64_t min64, uint64_t max64, uint64_t minpci, uint64_t maxpci) "nr_ranges %d 32:[0x%"PRIx64" - 0x%"PRIx64"], 64:[0x%"PRIx64" - 0x%"PRIx64"], pci64:[0x%"PRIx64" - 0x%"PRIx64"]"
 vfio_iommu_map_dirty_notify(uint64_t iova_start, uint64_t iova_end) "iommu dirty @ 0x%"PRIx64" - 0x%"PRIx64
+vfio_iommu_map_dirty_notify_skip_ro(uint64_t iova_start, uint64_t iova_end) "iommu dirty @ 0x%"PRIx64" - 0x%"PRIx64
 
 # container.c
 vfio_container_query_dirty_bitmap(uint64_t iova, uint64_t size, uint64_t backend_flag, uint64_t bitmap_size, uint64_t translated_addr, uint64_t dirty_pages) "iova=0x%"PRIx64" size=0x%"PRIx64" backend_flag=0x%"PRIx64" bitmap_size=0x%"PRIx64" gpa=0x%"PRIx64" dirty_pages=%"PRIu64
-- 
2.47.1



  parent reply	other threads:[~2025-11-17  9:41 UTC|newest]

Thread overview: 24+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2025-11-17  9:37 [PATCH v8 00/23] intel_iommu: Enable first stage translation for passthrough device Zhenzhong Duan
2025-11-17  9:37 ` [PATCH v8 01/23] intel_iommu: Rename vtd_ce_get_rid2pasid_entry to vtd_ce_get_pasid_entry Zhenzhong Duan
2025-11-17  9:37 ` [PATCH v8 02/23] intel_iommu: Delete RPS capability related supporting code Zhenzhong Duan
2025-11-17  9:37 ` [PATCH v8 03/23] intel_iommu: Update terminology to match VTD spec Zhenzhong Duan
2025-11-17  9:37 ` [PATCH v8 04/23] hw/pci: Export pci_device_get_iommu_bus_devfn() and return bool Zhenzhong Duan
2025-11-17  9:37 ` [PATCH v8 05/23] hw/pci: Introduce pci_device_get_viommu_flags() Zhenzhong Duan
2025-11-17  9:37 ` [PATCH v8 06/23] intel_iommu: Implement get_viommu_flags() callback Zhenzhong Duan
2025-11-17  9:37 ` [PATCH v8 07/23] intel_iommu: Introduce a new structure VTDHostIOMMUDevice Zhenzhong Duan
2025-11-17  9:37 ` [PATCH v8 08/23] vfio/iommufd: Force creating nesting parent HWPT Zhenzhong Duan
2025-11-17  9:37 ` [PATCH v8 09/23] intel_iommu_accel: Check for compatibility with IOMMUFD backed device when x-flts=on Zhenzhong Duan
2025-11-17  9:37 ` [PATCH v8 10/23] intel_iommu_accel: Fail passthrough device under PCI bridge if x-flts=on Zhenzhong Duan
2025-11-17  9:37 ` [PATCH v8 11/23] intel_iommu_accel: Stick to system MR for IOMMUFD backed host device when x-flts=on Zhenzhong Duan
2025-11-17  9:37 ` [PATCH v8 12/23] intel_iommu: Add some macros and inline functions Zhenzhong Duan
2025-11-17  9:37 ` [PATCH v8 13/23] intel_iommu_accel: Bind/unbind guest page table to host Zhenzhong Duan
2025-11-17  9:37 ` [PATCH v8 14/23] intel_iommu_accel: Propagate PASID-based iotlb invalidation " Zhenzhong Duan
2025-11-17  9:37 ` [PATCH v8 15/23] intel_iommu: Replay all pasid bindings when either SRTP or TE bit is changed Zhenzhong Duan
2025-11-17  9:37 ` [PATCH v8 16/23] intel_iommu: Replay pasid bindings after context cache invalidation Zhenzhong Duan
2025-11-17  9:37 ` Zhenzhong Duan [this message]
2025-11-17  9:37 ` [PATCH v8 18/23] intel_iommu: Add migration support with x-flts=on Zhenzhong Duan
2025-11-17  9:37 ` [PATCH v8 19/23] hw/pci: Introduce pci_device_get_host_iommu_quirks() Zhenzhong Duan
2025-11-17  9:37 ` [PATCH v8 20/23] intel_iommu_accel: Implement get_host_iommu_quirks() callback Zhenzhong Duan
2025-11-17  9:37 ` [PATCH v8 21/23] Workaround for ERRATA_772415_SPR17 Zhenzhong Duan
2025-11-17  9:37 ` [PATCH v8 22/23] intel_iommu: Enable host device when x-flts=on in scalable mode Zhenzhong Duan
2025-11-17  9:37 ` [PATCH v8 23/23] docs/devel: Add IOMMUFD nesting documentation Zhenzhong Duan

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20251117093729.1121324-18-zhenzhong.duan@intel.com \
    --to=zhenzhong.duan@intel.com \
    --cc=alex@shazbot.org \
    --cc=chao.p.peng@intel.com \
    --cc=clement.mathieu--drif@eviden.com \
    --cc=clg@redhat.com \
    --cc=ddutile@redhat.com \
    --cc=eric.auger@redhat.com \
    --cc=jasowang@redhat.com \
    --cc=jgg@nvidia.com \
    --cc=joao.m.martins@oracle.com \
    --cc=kevin.tian@intel.com \
    --cc=mst@redhat.com \
    --cc=nicolinc@nvidia.com \
    --cc=peterx@redhat.com \
    --cc=qemu-devel@nongnu.org \
    --cc=skolothumtho@nvidia.com \
    --cc=yi.l.liu@intel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).