qemu-devel.nongnu.org archive mirror
 help / color / mirror / Atom feed
From: Stefan Hajnoczi <stefanha@redhat.com>
To: qemu-devel@nongnu.org, Peter Maydell <peter.maydell@linaro.org>
Cc: Kevin Wolf <kwolf@redhat.com>, Fam Zheng <fam@euphon.net>,
	Eduardo Habkost <ehabkost@redhat.com>,
	qemu-block@nongnu.org, Max Reitz <mreitz@redhat.com>,
	Eric Auger <eric.auger@redhat.com>,
	Stefan Hajnoczi <stefanha@redhat.com>,
	Cleber Rosa <crosa@redhat.com>
Subject: [PULL v2 16/17] util/vfio-helpers: Collect IOVA reserved regions
Date: Mon,  5 Oct 2020 16:43:22 +0100	[thread overview]
Message-ID: <20201005154323.31347-17-stefanha@redhat.com> (raw)
In-Reply-To: <20201005154323.31347-1-stefanha@redhat.com>

From: Eric Auger <eric.auger@redhat.com>

The IOVA allocator currently ignores host reserved regions.
As a result some chosen IOVAs may collide with some of them,
resulting in VFIO MAP_DMA errors later on. This happens on ARM
where the MSI reserved window quickly is encountered:
[0x8000000, 0x8100000]. since 5.4 kernel, VFIO returns the usable
IOVA regions. So let's enumerate them in the prospect to avoid
them, later on.

Signed-off-by: Eric Auger <eric.auger@redhat.com>
Message-id: 20200929085550.30926-2-eric.auger@redhat.com
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
---
 util/vfio-helpers.c | 72 +++++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 70 insertions(+), 2 deletions(-)

diff --git a/util/vfio-helpers.c b/util/vfio-helpers.c
index 9ac307e3d4..fe9ca9ce38 100644
--- a/util/vfio-helpers.c
+++ b/util/vfio-helpers.c
@@ -40,6 +40,11 @@ typedef struct {
     uint64_t iova;
 } IOVAMapping;
 
+struct IOVARange {
+    uint64_t start;
+    uint64_t end;
+};
+
 struct QEMUVFIOState {
     QemuMutex lock;
 
@@ -49,6 +54,8 @@ struct QEMUVFIOState {
     int device;
     RAMBlockNotifier ram_notifier;
     struct vfio_region_info config_region_info, bar_region_info[6];
+    struct IOVARange *usable_iova_ranges;
+    uint8_t nb_iova_ranges;
 
     /* These fields are protected by @lock */
     /* VFIO's IO virtual address space is managed by splitting into a few
@@ -236,6 +243,35 @@ static int qemu_vfio_pci_write_config(QEMUVFIOState *s, void *buf, int size, int
     return ret == size ? 0 : -errno;
 }
 
+static void collect_usable_iova_ranges(QEMUVFIOState *s, void *buf)
+{
+    struct vfio_iommu_type1_info *info = (struct vfio_iommu_type1_info *)buf;
+    struct vfio_info_cap_header *cap = (void *)buf + info->cap_offset;
+    struct vfio_iommu_type1_info_cap_iova_range *cap_iova_range;
+    int i;
+
+    while (cap->id != VFIO_IOMMU_TYPE1_INFO_CAP_IOVA_RANGE) {
+        if (!cap->next) {
+            return;
+        }
+        cap = (struct vfio_info_cap_header *)(buf + cap->next);
+    }
+
+    cap_iova_range = (struct vfio_iommu_type1_info_cap_iova_range *)cap;
+
+    s->nb_iova_ranges = cap_iova_range->nr_iovas;
+    if (s->nb_iova_ranges > 1) {
+        s->usable_iova_ranges =
+            g_realloc(s->usable_iova_ranges,
+                      s->nb_iova_ranges * sizeof(struct IOVARange));
+    }
+
+    for (i = 0; i < s->nb_iova_ranges; i++) {
+        s->usable_iova_ranges[i].start = cap_iova_range->iova_ranges[i].start;
+        s->usable_iova_ranges[i].end = cap_iova_range->iova_ranges[i].end;
+    }
+}
+
 static int qemu_vfio_init_pci(QEMUVFIOState *s, const char *device,
                               Error **errp)
 {
@@ -243,10 +279,13 @@ static int qemu_vfio_init_pci(QEMUVFIOState *s, const char *device,
     int i;
     uint16_t pci_cmd;
     struct vfio_group_status group_status = { .argsz = sizeof(group_status) };
-    struct vfio_iommu_type1_info iommu_info = { .argsz = sizeof(iommu_info) };
+    struct vfio_iommu_type1_info *iommu_info = NULL;
+    size_t iommu_info_size = sizeof(*iommu_info);
     struct vfio_device_info device_info = { .argsz = sizeof(device_info) };
     char *group_file = NULL;
 
+    s->usable_iova_ranges = NULL;
+
     /* Create a new container */
     s->container = open("/dev/vfio/vfio", O_RDWR);
 
@@ -310,13 +349,35 @@ static int qemu_vfio_init_pci(QEMUVFIOState *s, const char *device,
         goto fail;
     }
 
+    iommu_info = g_malloc0(iommu_info_size);
+    iommu_info->argsz = iommu_info_size;
+
     /* Get additional IOMMU info */
-    if (ioctl(s->container, VFIO_IOMMU_GET_INFO, &iommu_info)) {
+    if (ioctl(s->container, VFIO_IOMMU_GET_INFO, iommu_info)) {
         error_setg_errno(errp, errno, "Failed to get IOMMU info");
         ret = -errno;
         goto fail;
     }
 
+    /*
+     * if the kernel does not report usable IOVA regions, choose
+     * the legacy [QEMU_VFIO_IOVA_MIN, QEMU_VFIO_IOVA_MAX -1] region
+     */
+    s->nb_iova_ranges = 1;
+    s->usable_iova_ranges = g_new0(struct IOVARange, 1);
+    s->usable_iova_ranges[0].start = QEMU_VFIO_IOVA_MIN;
+    s->usable_iova_ranges[0].end = QEMU_VFIO_IOVA_MAX - 1;
+
+    if (iommu_info->argsz > iommu_info_size) {
+        iommu_info_size = iommu_info->argsz;
+        iommu_info = g_realloc(iommu_info, iommu_info_size);
+        if (ioctl(s->container, VFIO_IOMMU_GET_INFO, iommu_info)) {
+            ret = -errno;
+            goto fail;
+        }
+        collect_usable_iova_ranges(s, iommu_info);
+    }
+
     s->device = ioctl(s->group, VFIO_GROUP_GET_DEVICE_FD, device);
 
     if (s->device < 0) {
@@ -365,8 +426,13 @@ static int qemu_vfio_init_pci(QEMUVFIOState *s, const char *device,
     if (ret) {
         goto fail;
     }
+    g_free(iommu_info);
     return 0;
 fail:
+    g_free(s->usable_iova_ranges);
+    s->usable_iova_ranges = NULL;
+    s->nb_iova_ranges = 0;
+    g_free(iommu_info);
     close(s->group);
 fail_container:
     close(s->container);
@@ -716,6 +782,8 @@ void qemu_vfio_close(QEMUVFIOState *s)
         qemu_vfio_undo_mapping(s, &s->mappings[i], NULL);
     }
     ram_block_notifier_remove(&s->ram_notifier);
+    g_free(s->usable_iova_ranges);
+    s->nb_iova_ranges = 0;
     qemu_vfio_reset(s);
     close(s->device);
     close(s->group);
-- 
2.26.2


  parent reply	other threads:[~2020-10-05 16:09 UTC|newest]

Thread overview: 19+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2020-10-05 15:43 [PULL v2 00/17] Block patches Stefan Hajnoczi
2020-10-05 15:43 ` [PULL v2 01/17] util/vfio-helpers: Pass page protections to qemu_vfio_pci_map_bar() Stefan Hajnoczi
2020-10-05 15:43 ` [PULL v2 02/17] block/nvme: Map doorbells pages write-only Stefan Hajnoczi
2020-10-05 15:43 ` [PULL v2 03/17] block/nvme: Reduce I/O registers scope Stefan Hajnoczi
2020-10-05 15:43 ` [PULL v2 04/17] block/nvme: Drop NVMeRegs structure, directly use NvmeBar Stefan Hajnoczi
2020-10-05 15:43 ` [PULL v2 05/17] block/nvme: Use register definitions from 'block/nvme.h' Stefan Hajnoczi
2020-10-05 15:43 ` [PULL v2 06/17] block/nvme: Replace magic value by SCALE_MS definition Stefan Hajnoczi
2020-10-05 15:43 ` [PULL v2 07/17] block: return error-code from bdrv_invalidate_cache Stefan Hajnoczi
2020-10-05 15:43 ` [PULL v2 08/17] block/io: refactor coroutine wrappers Stefan Hajnoczi
2020-10-05 15:43 ` [PULL v2 09/17] block: declare some coroutine functions in block/coroutines.h Stefan Hajnoczi
2020-10-05 15:43 ` [PULL v2 10/17] scripts: add block-coroutine-wrapper.py Stefan Hajnoczi
2020-10-05 15:43 ` [PULL v2 11/17] block: generate coroutine-wrapper code Stefan Hajnoczi
2020-10-05 15:43 ` [PULL v2 12/17] block: drop bdrv_prwv Stefan Hajnoczi
2020-10-05 15:43 ` [PULL v2 13/17] block/io: refactor save/load vmstate Stefan Hajnoczi
2020-10-05 15:43 ` [PULL v2 14/17] include/block/block.h: drop non-ascii quotation mark Stefan Hajnoczi
2020-10-05 15:43 ` [PULL v2 15/17] docs: add 'io_uring' option to 'aio' param in qemu-options.hx Stefan Hajnoczi
2020-10-05 15:43 ` Stefan Hajnoczi [this message]
2020-10-05 15:43 ` [PULL v2 17/17] util/vfio-helpers: Rework the IOVA allocator to avoid IOVA reserved regions Stefan Hajnoczi
2020-10-06 12:18 ` [PULL v2 00/17] Block patches Peter Maydell

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20201005154323.31347-17-stefanha@redhat.com \
    --to=stefanha@redhat.com \
    --cc=crosa@redhat.com \
    --cc=ehabkost@redhat.com \
    --cc=eric.auger@redhat.com \
    --cc=fam@euphon.net \
    --cc=kwolf@redhat.com \
    --cc=mreitz@redhat.com \
    --cc=peter.maydell@linaro.org \
    --cc=qemu-block@nongnu.org \
    --cc=qemu-devel@nongnu.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).