From: Fam Zheng <fam@euphon.net>
To: Eric Auger <eric.auger@redhat.com>
Cc: lvivier@redhat.com, kwolf@redhat.com, cohuck@redhat.com,
qemu-devel@nongnu.org, mreitz@redhat.com,
alex.williamson@redhat.com, qemu-arm@nongnu.org,
stefanha@redhat.com, philmd@redhat.com, eric.auger.pro@gmail.com
Subject: Re: [RFC 1/3] util/vfio-helpers: Collect IOVA reserved regions
Date: Fri, 25 Sep 2020 15:43:07 +0100 [thread overview]
Message-ID: <20200925144307.GA3809989@dev> (raw)
In-Reply-To: <20200925134845.21053-2-eric.auger@redhat.com>
On 2020-09-25 15:48, Eric Auger wrote:
> The IOVA allocator currently ignores host reserved regions.
> As a result some chosen IOVAs may collide with some of them,
> resulting in VFIO MAP_DMA errors later on. This happens on ARM
> where the MSI reserved window quickly is encountered:
> [0x8000000, 0x8100000]. since 5.4 kernel, VFIO returns the usable
> IOVA regions. So let's enumerate them in the prospect to avoid
> them, later on.
>
> Signed-off-by: Eric Auger <eric.auger@redhat.com>
> ---
> util/vfio-helpers.c | 75 +++++++++++++++++++++++++++++++++++++++++++--
> 1 file changed, 73 insertions(+), 2 deletions(-)
>
> diff --git a/util/vfio-helpers.c b/util/vfio-helpers.c
> index 583bdfb36f..8e91beba95 100644
> --- a/util/vfio-helpers.c
> +++ b/util/vfio-helpers.c
> @@ -40,6 +40,11 @@ typedef struct {
> uint64_t iova;
> } IOVAMapping;
>
> +struct IOVARange {
> + uint64_t start;
> + uint64_t end;
> +};
> +
> struct QEMUVFIOState {
> QemuMutex lock;
>
> @@ -49,6 +54,8 @@ struct QEMUVFIOState {
> int device;
> RAMBlockNotifier ram_notifier;
> struct vfio_region_info config_region_info, bar_region_info[6];
> + struct IOVARange *usable_iova_ranges;
> + uint8_t nb_iova_ranges;
>
> /* These fields are protected by @lock */
> /* VFIO's IO virtual address space is managed by splitting into a few
> @@ -236,6 +243,36 @@ static int qemu_vfio_pci_write_config(QEMUVFIOState *s, void *buf, int size, int
> return ret == size ? 0 : -errno;
> }
>
> +static void collect_usable_iova_ranges(QEMUVFIOState *s, void *first_cap)
> +{
> + struct vfio_iommu_type1_info_cap_iova_range *cap_iova_range;
> + struct vfio_info_cap_header *cap = first_cap;
> + void *offset = first_cap;
> + int i;
> +
> + while (cap->id != VFIO_IOMMU_TYPE1_INFO_CAP_IOVA_RANGE) {
> + if (cap->next) {
This check looks reversed.
> + return;
> + }
> + offset += cap->next;
@offset is unused.
> + cap = (struct vfio_info_cap_header *)cap;
This assignment is nop.
> + }
> +
> + cap_iova_range = (struct vfio_iommu_type1_info_cap_iova_range *)cap;
> +
> + s->nb_iova_ranges = cap_iova_range->nr_iovas;
> + if (s->nb_iova_ranges > 1) {
> + s->usable_iova_ranges =
> + g_realloc(s->usable_iova_ranges,
> + s->nb_iova_ranges * sizeof(struct IOVARange));
> + }
> +
> + for (i = 0; i < s->nb_iova_ranges; i++) {
s/ / /
> + s->usable_iova_ranges[i].start = cap_iova_range->iova_ranges[i].start;
> + s->usable_iova_ranges[i].end = cap_iova_range->iova_ranges[i].end;
> + }
> +}
> +
> static int qemu_vfio_init_pci(QEMUVFIOState *s, const char *device,
> Error **errp)
> {
> @@ -243,10 +280,13 @@ static int qemu_vfio_init_pci(QEMUVFIOState *s, const char *device,
> int i;
> uint16_t pci_cmd;
> struct vfio_group_status group_status = { .argsz = sizeof(group_status) };
> - struct vfio_iommu_type1_info iommu_info = { .argsz = sizeof(iommu_info) };
> + struct vfio_iommu_type1_info *iommu_info = NULL;
> + size_t iommu_info_size = sizeof(*iommu_info);
> struct vfio_device_info device_info = { .argsz = sizeof(device_info) };
> char *group_file = NULL;
>
> + s->usable_iova_ranges = NULL;
> +
> /* Create a new container */
> s->container = open("/dev/vfio/vfio", O_RDWR);
>
> @@ -310,13 +350,38 @@ static int qemu_vfio_init_pci(QEMUVFIOState *s, const char *device,
> goto fail;
> }
>
> + iommu_info = g_malloc0(iommu_info_size);
> + iommu_info->argsz = iommu_info_size;
> +
> /* Get additional IOMMU info */
> - if (ioctl(s->container, VFIO_IOMMU_GET_INFO, &iommu_info)) {
> + if (ioctl(s->container, VFIO_IOMMU_GET_INFO, iommu_info)) {
> error_setg_errno(errp, errno, "Failed to get IOMMU info");
> ret = -errno;
> goto fail;
> }
>
> + /*
> + * if the kernel does not report usable IOVA regions, choose
> + * the legacy [QEMU_VFIO_IOVA_MIN, QEMU_VFIO_IOVA_MAX -1] region
> + */
> + s->nb_iova_ranges = 1;
> + s->usable_iova_ranges = g_new0(struct IOVARange, 1);
> + s->usable_iova_ranges[0].start = QEMU_VFIO_IOVA_MIN;
> + s->usable_iova_ranges[0].end = QEMU_VFIO_IOVA_MAX - 1;
> +
> + if (iommu_info->argsz > iommu_info_size) {
> + void *first_cap;
> +
> + iommu_info_size = iommu_info->argsz;
> + iommu_info = g_realloc(iommu_info, iommu_info_size);
> + if (ioctl(s->container, VFIO_IOMMU_GET_INFO, iommu_info)) {
> + ret = -errno;
> + goto fail;
> + }
> + first_cap = (void *)iommu_info + iommu_info->cap_offset;
> + collect_usable_iova_ranges(s, first_cap);
> + }
> +
> s->device = ioctl(s->group, VFIO_GROUP_GET_DEVICE_FD, device);
>
> if (s->device < 0) {
> @@ -365,8 +430,12 @@ static int qemu_vfio_init_pci(QEMUVFIOState *s, const char *device,
> if (ret) {
> goto fail;
> }
> + g_free(iommu_info);
> return 0;
> fail:
> + g_free(s->usable_iova_ranges);
Set s->usable_iova_ranges to NULL to avoid double free?
> + s->nb_iova_ranges = 0;
> + g_free(iommu_info);
> close(s->group);
> fail_container:
> close(s->container);
> @@ -716,6 +785,8 @@ void qemu_vfio_close(QEMUVFIOState *s)
> qemu_vfio_undo_mapping(s, &s->mappings[i], NULL);
> }
> ram_block_notifier_remove(&s->ram_notifier);
> + g_free(s->usable_iova_ranges);
> + s->nb_iova_ranges = 0;
> qemu_vfio_reset(s);
> close(s->device);
> close(s->group);
> --
> 2.21.3
>
>
Fam
next prev parent reply other threads:[~2020-09-25 14:47 UTC|newest]
Thread overview: 8+ messages / expand[flat|nested] mbox.gz Atom feed top
2020-09-25 13:48 [RFC 0/3] NVMe passthrough: Take into account host IOVA reserved regions Eric Auger
2020-09-25 13:48 ` [RFC 1/3] util/vfio-helpers: Collect " Eric Auger
2020-09-25 14:43 ` Fam Zheng [this message]
2020-09-25 15:23 ` Auger Eric
2020-09-25 15:44 ` Fam Zheng
2020-09-25 15:53 ` Auger Eric
2020-09-25 13:48 ` [RFC 2/3] util/vfio-helpers: Dynamically compute the min/max IOVA Eric Auger
2020-09-25 13:48 ` [RFC 3/3] util/vfio-helpers: Rework the IOVA allocator to avoid IOVA reserved regions Eric Auger
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20200925144307.GA3809989@dev \
--to=fam@euphon.net \
--cc=alex.williamson@redhat.com \
--cc=cohuck@redhat.com \
--cc=eric.auger.pro@gmail.com \
--cc=eric.auger@redhat.com \
--cc=kwolf@redhat.com \
--cc=lvivier@redhat.com \
--cc=mreitz@redhat.com \
--cc=philmd@redhat.com \
--cc=qemu-arm@nongnu.org \
--cc=qemu-devel@nongnu.org \
--cc=stefanha@redhat.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.