From: Stefan Hajnoczi <stefanha@redhat.com>
To: qemu-devel@nongnu.org, Peter Maydell <peter.maydell@linaro.org>
Cc: Kevin Wolf <kwolf@redhat.com>, Fam Zheng <fam@euphon.net>,
Eduardo Habkost <ehabkost@redhat.com>,
qemu-block@nongnu.org, Max Reitz <mreitz@redhat.com>,
Eric Auger <eric.auger@redhat.com>,
Stefan Hajnoczi <stefanha@redhat.com>,
Cleber Rosa <crosa@redhat.com>
Subject: [PULL v2 16/17] util/vfio-helpers: Collect IOVA reserved regions
Date: Mon, 5 Oct 2020 16:43:22 +0100 [thread overview]
Message-ID: <20201005154323.31347-17-stefanha@redhat.com> (raw)
In-Reply-To: <20201005154323.31347-1-stefanha@redhat.com>
From: Eric Auger <eric.auger@redhat.com>
The IOVA allocator currently ignores host reserved regions.
As a result some chosen IOVAs may collide with some of them,
resulting in VFIO MAP_DMA errors later on. This happens on ARM
where the MSI reserved window quickly is encountered:
[0x8000000, 0x8100000]. since 5.4 kernel, VFIO returns the usable
IOVA regions. So let's enumerate them in the prospect to avoid
them, later on.
Signed-off-by: Eric Auger <eric.auger@redhat.com>
Message-id: 20200929085550.30926-2-eric.auger@redhat.com
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
---
util/vfio-helpers.c | 72 +++++++++++++++++++++++++++++++++++++++++++--
1 file changed, 70 insertions(+), 2 deletions(-)
diff --git a/util/vfio-helpers.c b/util/vfio-helpers.c
index 9ac307e3d4..fe9ca9ce38 100644
--- a/util/vfio-helpers.c
+++ b/util/vfio-helpers.c
@@ -40,6 +40,11 @@ typedef struct {
uint64_t iova;
} IOVAMapping;
+struct IOVARange {
+ uint64_t start;
+ uint64_t end;
+};
+
struct QEMUVFIOState {
QemuMutex lock;
@@ -49,6 +54,8 @@ struct QEMUVFIOState {
int device;
RAMBlockNotifier ram_notifier;
struct vfio_region_info config_region_info, bar_region_info[6];
+ struct IOVARange *usable_iova_ranges;
+ uint8_t nb_iova_ranges;
/* These fields are protected by @lock */
/* VFIO's IO virtual address space is managed by splitting into a few
@@ -236,6 +243,35 @@ static int qemu_vfio_pci_write_config(QEMUVFIOState *s, void *buf, int size, int
return ret == size ? 0 : -errno;
}
+static void collect_usable_iova_ranges(QEMUVFIOState *s, void *buf)
+{
+ struct vfio_iommu_type1_info *info = (struct vfio_iommu_type1_info *)buf;
+ struct vfio_info_cap_header *cap = (void *)buf + info->cap_offset;
+ struct vfio_iommu_type1_info_cap_iova_range *cap_iova_range;
+ int i;
+
+ while (cap->id != VFIO_IOMMU_TYPE1_INFO_CAP_IOVA_RANGE) {
+ if (!cap->next) {
+ return;
+ }
+ cap = (struct vfio_info_cap_header *)(buf + cap->next);
+ }
+
+ cap_iova_range = (struct vfio_iommu_type1_info_cap_iova_range *)cap;
+
+ s->nb_iova_ranges = cap_iova_range->nr_iovas;
+ if (s->nb_iova_ranges > 1) {
+ s->usable_iova_ranges =
+ g_realloc(s->usable_iova_ranges,
+ s->nb_iova_ranges * sizeof(struct IOVARange));
+ }
+
+ for (i = 0; i < s->nb_iova_ranges; i++) {
+ s->usable_iova_ranges[i].start = cap_iova_range->iova_ranges[i].start;
+ s->usable_iova_ranges[i].end = cap_iova_range->iova_ranges[i].end;
+ }
+}
+
static int qemu_vfio_init_pci(QEMUVFIOState *s, const char *device,
Error **errp)
{
@@ -243,10 +279,13 @@ static int qemu_vfio_init_pci(QEMUVFIOState *s, const char *device,
int i;
uint16_t pci_cmd;
struct vfio_group_status group_status = { .argsz = sizeof(group_status) };
- struct vfio_iommu_type1_info iommu_info = { .argsz = sizeof(iommu_info) };
+ struct vfio_iommu_type1_info *iommu_info = NULL;
+ size_t iommu_info_size = sizeof(*iommu_info);
struct vfio_device_info device_info = { .argsz = sizeof(device_info) };
char *group_file = NULL;
+ s->usable_iova_ranges = NULL;
+
/* Create a new container */
s->container = open("/dev/vfio/vfio", O_RDWR);
@@ -310,13 +349,35 @@ static int qemu_vfio_init_pci(QEMUVFIOState *s, const char *device,
goto fail;
}
+ iommu_info = g_malloc0(iommu_info_size);
+ iommu_info->argsz = iommu_info_size;
+
/* Get additional IOMMU info */
- if (ioctl(s->container, VFIO_IOMMU_GET_INFO, &iommu_info)) {
+ if (ioctl(s->container, VFIO_IOMMU_GET_INFO, iommu_info)) {
error_setg_errno(errp, errno, "Failed to get IOMMU info");
ret = -errno;
goto fail;
}
+ /*
+ * if the kernel does not report usable IOVA regions, choose
+ * the legacy [QEMU_VFIO_IOVA_MIN, QEMU_VFIO_IOVA_MAX -1] region
+ */
+ s->nb_iova_ranges = 1;
+ s->usable_iova_ranges = g_new0(struct IOVARange, 1);
+ s->usable_iova_ranges[0].start = QEMU_VFIO_IOVA_MIN;
+ s->usable_iova_ranges[0].end = QEMU_VFIO_IOVA_MAX - 1;
+
+ if (iommu_info->argsz > iommu_info_size) {
+ iommu_info_size = iommu_info->argsz;
+ iommu_info = g_realloc(iommu_info, iommu_info_size);
+ if (ioctl(s->container, VFIO_IOMMU_GET_INFO, iommu_info)) {
+ ret = -errno;
+ goto fail;
+ }
+ collect_usable_iova_ranges(s, iommu_info);
+ }
+
s->device = ioctl(s->group, VFIO_GROUP_GET_DEVICE_FD, device);
if (s->device < 0) {
@@ -365,8 +426,13 @@ static int qemu_vfio_init_pci(QEMUVFIOState *s, const char *device,
if (ret) {
goto fail;
}
+ g_free(iommu_info);
return 0;
fail:
+ g_free(s->usable_iova_ranges);
+ s->usable_iova_ranges = NULL;
+ s->nb_iova_ranges = 0;
+ g_free(iommu_info);
close(s->group);
fail_container:
close(s->container);
@@ -716,6 +782,8 @@ void qemu_vfio_close(QEMUVFIOState *s)
qemu_vfio_undo_mapping(s, &s->mappings[i], NULL);
}
ram_block_notifier_remove(&s->ram_notifier);
+ g_free(s->usable_iova_ranges);
+ s->nb_iova_ranges = 0;
qemu_vfio_reset(s);
close(s->device);
close(s->group);
--
2.26.2
next prev parent reply other threads:[~2020-10-05 16:09 UTC|newest]
Thread overview: 19+ messages / expand[flat|nested] mbox.gz Atom feed top
2020-10-05 15:43 [PULL v2 00/17] Block patches Stefan Hajnoczi
2020-10-05 15:43 ` [PULL v2 01/17] util/vfio-helpers: Pass page protections to qemu_vfio_pci_map_bar() Stefan Hajnoczi
2020-10-05 15:43 ` [PULL v2 02/17] block/nvme: Map doorbells pages write-only Stefan Hajnoczi
2020-10-05 15:43 ` [PULL v2 03/17] block/nvme: Reduce I/O registers scope Stefan Hajnoczi
2020-10-05 15:43 ` [PULL v2 04/17] block/nvme: Drop NVMeRegs structure, directly use NvmeBar Stefan Hajnoczi
2020-10-05 15:43 ` [PULL v2 05/17] block/nvme: Use register definitions from 'block/nvme.h' Stefan Hajnoczi
2020-10-05 15:43 ` [PULL v2 06/17] block/nvme: Replace magic value by SCALE_MS definition Stefan Hajnoczi
2020-10-05 15:43 ` [PULL v2 07/17] block: return error-code from bdrv_invalidate_cache Stefan Hajnoczi
2020-10-05 15:43 ` [PULL v2 08/17] block/io: refactor coroutine wrappers Stefan Hajnoczi
2020-10-05 15:43 ` [PULL v2 09/17] block: declare some coroutine functions in block/coroutines.h Stefan Hajnoczi
2020-10-05 15:43 ` [PULL v2 10/17] scripts: add block-coroutine-wrapper.py Stefan Hajnoczi
2020-10-05 15:43 ` [PULL v2 11/17] block: generate coroutine-wrapper code Stefan Hajnoczi
2020-10-05 15:43 ` [PULL v2 12/17] block: drop bdrv_prwv Stefan Hajnoczi
2020-10-05 15:43 ` [PULL v2 13/17] block/io: refactor save/load vmstate Stefan Hajnoczi
2020-10-05 15:43 ` [PULL v2 14/17] include/block/block.h: drop non-ascii quotation mark Stefan Hajnoczi
2020-10-05 15:43 ` [PULL v2 15/17] docs: add 'io_uring' option to 'aio' param in qemu-options.hx Stefan Hajnoczi
2020-10-05 15:43 ` Stefan Hajnoczi [this message]
2020-10-05 15:43 ` [PULL v2 17/17] util/vfio-helpers: Rework the IOVA allocator to avoid IOVA reserved regions Stefan Hajnoczi
2020-10-06 12:18 ` [PULL v2 00/17] Block patches Peter Maydell
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20201005154323.31347-17-stefanha@redhat.com \
--to=stefanha@redhat.com \
--cc=crosa@redhat.com \
--cc=ehabkost@redhat.com \
--cc=eric.auger@redhat.com \
--cc=fam@euphon.net \
--cc=kwolf@redhat.com \
--cc=mreitz@redhat.com \
--cc=peter.maydell@linaro.org \
--cc=qemu-block@nongnu.org \
--cc=qemu-devel@nongnu.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).