qemu-devel.nongnu.org archive mirror
 help / color / mirror / Atom feed
From: Eric Auger <eric.auger@redhat.com>
To: eric.auger.pro@gmail.com, eric.auger@redhat.com,
	qemu-devel@nongnu.org, qemu-arm@nongnu.org, stefanha@redhat.com,
	fam@euphon.net, philmd@redhat.com, alex.williamson@redhat.com
Cc: lvivier@redhat.com, kwolf@redhat.com, cohuck@redhat.com,
	mreitz@redhat.com
Subject: [RFC 2/3] util/vfio-helpers: Dynamically compute the min/max IOVA
Date: Fri, 25 Sep 2020 15:48:44 +0200	[thread overview]
Message-ID: <20200925134845.21053-3-eric.auger@redhat.com> (raw)
In-Reply-To: <20200925134845.21053-1-eric.auger@redhat.com>

Currently the min/max IOVA are hardcoded to [0x10000, 1 << 39].
Now we dynamically fetch the info from VFIO, if the kernel supports
it, let's use the dynamically retrieved value.

Signed-off-by: Eric Auger <eric.auger@redhat.com>
---
 util/vfio-helpers.c | 34 ++++++++++++++++++++--------------
 1 file changed, 20 insertions(+), 14 deletions(-)

diff --git a/util/vfio-helpers.c b/util/vfio-helpers.c
index 8e91beba95..567bcf1ded 100644
--- a/util/vfio-helpers.c
+++ b/util/vfio-helpers.c
@@ -26,11 +26,11 @@
 
 #define QEMU_VFIO_DEBUG 0
 
+/*
+ * Min/Max IOVA addresses, only used if VFIO does not report
+ * the usable IOVA ranges
+ */
 #define QEMU_VFIO_IOVA_MIN 0x10000ULL
-/* XXX: Once VFIO exposes the iova bit width in the IOMMU capability interface,
- * we can use a runtime limit; alternatively it's also possible to do platform
- * specific detection by reading sysfs entries. Until then, 39 is a safe bet.
- **/
 #define QEMU_VFIO_IOVA_MAX (1ULL << 39)
 
 typedef struct {
@@ -56,6 +56,8 @@ struct QEMUVFIOState {
     struct vfio_region_info config_region_info, bar_region_info[6];
     struct IOVARange *usable_iova_ranges;
     uint8_t nb_iova_ranges;
+    uint64_t max_iova;
+    uint64_t min_iova;
 
     /* These fields are protected by @lock */
     /* VFIO's IO virtual address space is managed by splitting into a few
@@ -63,7 +65,7 @@ struct QEMUVFIOState {
      *
      * ---------------       <= 0
      * |xxxxxxxxxxxxx|
-     * |-------------|       <= QEMU_VFIO_IOVA_MIN
+     * |-------------|       <= min_iova
      * |             |
      * |    Fixed    |
      * |             |
@@ -75,20 +77,20 @@ struct QEMUVFIOState {
      * |             |
      * |    Temp     |
      * |             |
-     * |-------------|       <= QEMU_VFIO_IOVA_MAX
+     * |-------------|       <= max_iova
      * |xxxxxxxxxxxxx|
      * |xxxxxxxxxxxxx|
      * ---------------
      *
-     * - Addresses lower than QEMU_VFIO_IOVA_MIN are reserved as invalid;
+     * - Addresses lower than min_iova are reserved as invalid;
      *
      * - Fixed mappings of HVAs are assigned "low" IOVAs in the range of
-     *   [QEMU_VFIO_IOVA_MIN, low_water_mark).  Once allocated they will not be
+     *   [min_iova, low_water_mark).  Once allocated they will not be
      *   reclaimed - low_water_mark never shrinks;
      *
      * - IOVAs in range [low_water_mark, high_water_mark) are free;
      *
-     * - IOVAs in range [high_water_mark, QEMU_VFIO_IOVA_MAX) are volatile
+     * - IOVAs in range [high_water_mark, max_iova) are volatile
      *   mappings. At each qemu_vfio_dma_reset_temporary() call, the whole area
      *   is recycled. The caller should make sure I/O's depending on these
      *   mappings are completed before calling.
@@ -271,6 +273,8 @@ static void collect_usable_iova_ranges(QEMUVFIOState *s, void *first_cap)
         s->usable_iova_ranges[i].start = cap_iova_range->iova_ranges[i].start;
         s->usable_iova_ranges[i].end = cap_iova_range->iova_ranges[i].end;
     }
+    s->min_iova = s->usable_iova_ranges[0].start;
+    s->max_iova = s->usable_iova_ranges[i - 1].end + 1;
 }
 
 static int qemu_vfio_init_pci(QEMUVFIOState *s, const char *device,
@@ -362,12 +366,14 @@ static int qemu_vfio_init_pci(QEMUVFIOState *s, const char *device,
 
     /*
      * if the kernel does not report usable IOVA regions, choose
-     * the legacy [QEMU_VFIO_IOVA_MIN, QEMU_VFIO_IOVA_MAX -1] region
+     * the legacy [QEMU_VFIO_IOVA_MIN, QEMU_VFIO_IOVA_MAX - 1] region
      */
     s->nb_iova_ranges = 1;
     s->usable_iova_ranges = g_new0(struct IOVARange, 1);
     s->usable_iova_ranges[0].start = QEMU_VFIO_IOVA_MIN;
     s->usable_iova_ranges[0].end = QEMU_VFIO_IOVA_MAX - 1;
+    s->min_iova = QEMU_VFIO_IOVA_MIN;
+    s->max_iova = QEMU_VFIO_IOVA_MAX;
 
     if (iommu_info->argsz > iommu_info_size) {
         void *first_cap;
@@ -484,8 +490,8 @@ static void qemu_vfio_open_common(QEMUVFIOState *s)
     s->ram_notifier.ram_block_added = qemu_vfio_ram_block_added;
     s->ram_notifier.ram_block_removed = qemu_vfio_ram_block_removed;
     ram_block_notifier_add(&s->ram_notifier);
-    s->low_water_mark = QEMU_VFIO_IOVA_MIN;
-    s->high_water_mark = QEMU_VFIO_IOVA_MAX;
+    s->low_water_mark = s->min_iova;
+    s->high_water_mark = s->max_iova;
     qemu_ram_foreach_block(qemu_vfio_init_ramblock, s);
 }
 
@@ -734,7 +740,7 @@ int qemu_vfio_dma_reset_temporary(QEMUVFIOState *s)
         .argsz = sizeof(unmap),
         .flags = 0,
         .iova = s->high_water_mark,
-        .size = QEMU_VFIO_IOVA_MAX - s->high_water_mark,
+        .size = s->max_iova - s->high_water_mark,
     };
     trace_qemu_vfio_dma_reset_temporary(s);
     QEMU_LOCK_GUARD(&s->lock);
@@ -742,7 +748,7 @@ int qemu_vfio_dma_reset_temporary(QEMUVFIOState *s)
         error_report("VFIO_UNMAP_DMA failed: %s", strerror(errno));
         return -errno;
     }
-    s->high_water_mark = QEMU_VFIO_IOVA_MAX;
+    s->high_water_mark = s->max_iova;
     return 0;
 }
 
-- 
2.21.3



  parent reply	other threads:[~2020-09-25 13:59 UTC|newest]

Thread overview: 8+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2020-09-25 13:48 [RFC 0/3] NVMe passthrough: Take into account host IOVA reserved regions Eric Auger
2020-09-25 13:48 ` [RFC 1/3] util/vfio-helpers: Collect " Eric Auger
2020-09-25 14:43   ` Fam Zheng
2020-09-25 15:23     ` Auger Eric
2020-09-25 15:44       ` Fam Zheng
2020-09-25 15:53         ` Auger Eric
2020-09-25 13:48 ` Eric Auger [this message]
2020-09-25 13:48 ` [RFC 3/3] util/vfio-helpers: Rework the IOVA allocator to avoid " Eric Auger

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20200925134845.21053-3-eric.auger@redhat.com \
    --to=eric.auger@redhat.com \
    --cc=alex.williamson@redhat.com \
    --cc=cohuck@redhat.com \
    --cc=eric.auger.pro@gmail.com \
    --cc=fam@euphon.net \
    --cc=kwolf@redhat.com \
    --cc=lvivier@redhat.com \
    --cc=mreitz@redhat.com \
    --cc=philmd@redhat.com \
    --cc=qemu-arm@nongnu.org \
    --cc=qemu-devel@nongnu.org \
    --cc=stefanha@redhat.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).