From: Auger Eric <eric.auger@redhat.com>
To: Jean-Philippe Brucker <jean-philippe.brucker@arm.com>,
iommu@lists.linux-foundation.org, devel@acpica.org,
linux-acpi@vger.kernel.org, kvm@vger.kernel.org,
kvmarm@lists.cs.columbia.edu,
virtualization@lists.linux-foundation.org,
virtio-dev@lists.oasis-open.org
Cc: jasowang@redhat.com, mst@redhat.com, lv.zheng@intel.com,
robert.moore@intel.com, joro@8bytes.org,
alex.williamson@redhat.com, sudeep.holla@arm.com,
hanjun.guo@linaro.org, lorenzo.pieralisi@arm.com,
lenb@kernel.org, rjw@rjwysocki.net, marc.zyngier@arm.com,
robin.murphy@arm.com, will.deacon@arm.com,
bharat.bhushan@nxp.com, Jayachandran.Nair@cavium.com,
ashok.raj@intel.com, peterx@redhat.com
Subject: Re: [RFC PATCH v2 1/5] iommu: Add virtio-iommu driver
Date: Mon, 15 Jan 2018 16:12:44 +0100 [thread overview]
Message-ID: <54320f26-05a5-9a09-f6cf-ae610c41e077@redhat.com> (raw)
In-Reply-To: <20171117185211.32593-2-jean-philippe.brucker@arm.com>
Hi Jean-Philippe,
please find some comments below.
On 17/11/17 19:52, Jean-Philippe Brucker wrote:
> The virtio IOMMU is a para-virtualized device, allowing to send IOMMU
> requests such as map/unmap over virtio-mmio transport without emulating
> page tables. This implementation handle ATTACH, DETACH, MAP and UNMAP
handles
> requests.
>
> The bulk of the code is to create requests and send them through virtio.
> Implementing the IOMMU API is fairly straightforward since the
> virtio-iommu MAP/UNMAP interface is almost identical.
>
> Signed-off-by: Jean-Philippe Brucker <jean-philippe.brucker@arm.com>
> ---
> drivers/iommu/Kconfig | 11 +
> drivers/iommu/Makefile | 1 +
> drivers/iommu/virtio-iommu.c | 958 ++++++++++++++++++++++++++++++++++++++
> include/uapi/linux/virtio_ids.h | 1 +
> include/uapi/linux/virtio_iommu.h | 140 ++++++
> 5 files changed, 1111 insertions(+)
> create mode 100644 drivers/iommu/virtio-iommu.c
> create mode 100644 include/uapi/linux/virtio_iommu.h
>
> diff --git a/drivers/iommu/Kconfig b/drivers/iommu/Kconfig
> index 17b212f56e6a..7271e59e8b23 100644
> --- a/drivers/iommu/Kconfig
> +++ b/drivers/iommu/Kconfig
> @@ -403,4 +403,15 @@ config QCOM_IOMMU
> help
> Support for IOMMU on certain Qualcomm SoCs.
>
> +config VIRTIO_IOMMU
> + bool "Virtio IOMMU driver"
> + depends on VIRTIO_MMIO
> + select IOMMU_API
> + select INTERVAL_TREE
> + select ARM_DMA_USE_IOMMU if ARM
> + help
> + Para-virtualised IOMMU driver with virtio.
> +
> + Say Y here if you intend to run this kernel as a guest.
> +
> endif # IOMMU_SUPPORT
> diff --git a/drivers/iommu/Makefile b/drivers/iommu/Makefile
> index dca71fe1c885..432242f3a328 100644
> --- a/drivers/iommu/Makefile
> +++ b/drivers/iommu/Makefile
> @@ -31,3 +31,4 @@ obj-$(CONFIG_EXYNOS_IOMMU) += exynos-iommu.o
> obj-$(CONFIG_FSL_PAMU) += fsl_pamu.o fsl_pamu_domain.o
> obj-$(CONFIG_S390_IOMMU) += s390-iommu.o
> obj-$(CONFIG_QCOM_IOMMU) += qcom_iommu.o
> +obj-$(CONFIG_VIRTIO_IOMMU) += virtio-iommu.o
> diff --git a/drivers/iommu/virtio-iommu.c b/drivers/iommu/virtio-iommu.c
> new file mode 100644
> index 000000000000..feb8c8925c3a
> --- /dev/null
> +++ b/drivers/iommu/virtio-iommu.c
> @@ -0,0 +1,958 @@
> +/*
> + * Virtio driver for the paravirtualized IOMMU
> + *
> + * Copyright (C) 2017 ARM Limited
> + * Author: Jean-Philippe Brucker <jean-philippe.brucker@arm.com>
> + *
> + * SPDX-License-Identifier: GPL-2.0
> + */
> +
> +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
> +
> +#include <linux/amba/bus.h>
> +#include <linux/delay.h>
> +#include <linux/dma-iommu.h>
> +#include <linux/freezer.h>
> +#include <linux/interval_tree.h>
> +#include <linux/iommu.h>
> +#include <linux/module.h>
> +#include <linux/of_iommu.h>
> +#include <linux/of_platform.h>
> +#include <linux/pci.h>
> +#include <linux/platform_device.h>
> +#include <linux/virtio.h>
> +#include <linux/virtio_config.h>
> +#include <linux/virtio_ids.h>
> +#include <linux/wait.h>
> +
> +#include <uapi/linux/virtio_iommu.h>
> +
> +#define MSI_IOVA_BASE 0x8000000
> +#define MSI_IOVA_LENGTH 0x100000
> +
> +struct viommu_dev {
> + struct iommu_device iommu;
> + struct device *dev;
> + struct virtio_device *vdev;
> +
> + struct ida domain_ids;
> +
> + struct virtqueue *vq;
> + /* Serialize anything touching the request queue */
> + spinlock_t request_lock;
> +
> + /* Device configuration */
> + struct iommu_domain_geometry geometry;
> + u64 pgsize_bitmap;
> + u8 domain_bits;
> +};
> +
> +struct viommu_mapping {
> + phys_addr_t paddr;
> + struct interval_tree_node iova;
> + union {
> + struct virtio_iommu_req_map map;
> + struct virtio_iommu_req_unmap unmap;
> + } req;
> +};
> +
> +struct viommu_domain {
> + struct iommu_domain domain;
> + struct viommu_dev *viommu;
> + struct mutex mutex;
> + unsigned int id;
> +
> + spinlock_t mappings_lock;
> + struct rb_root_cached mappings;
> +
> + /* Number of endpoints attached to this domain */
> + refcount_t endpoints;
> +};
> +
> +struct viommu_endpoint {
> + struct viommu_dev *viommu;
> + struct viommu_domain *vdomain;
> +};
> +
> +struct viommu_request {
> + struct scatterlist top;
> + struct scatterlist bottom;
> +
> + int written;
> + struct list_head list;
> +};
> +
> +#define to_viommu_domain(domain) container_of(domain, struct viommu_domain, domain)
> +
> +/* Virtio transport */
> +
> +static int viommu_status_to_errno(u8 status)
> +{
> + switch (status) {
> + case VIRTIO_IOMMU_S_OK:
> + return 0;
> + case VIRTIO_IOMMU_S_UNSUPP:
> + return -ENOSYS;
> + case VIRTIO_IOMMU_S_INVAL:
> + return -EINVAL;
> + case VIRTIO_IOMMU_S_RANGE:
> + return -ERANGE;
> + case VIRTIO_IOMMU_S_NOENT:
> + return -ENOENT;
> + case VIRTIO_IOMMU_S_FAULT:
> + return -EFAULT;
> + case VIRTIO_IOMMU_S_IOERR:
> + case VIRTIO_IOMMU_S_DEVERR:
> + default:
> + return -EIO;
> + }
> +}
> +
> +/*
> + * viommu_get_req_size - compute request size
> + *
> + * A virtio-iommu request is split into one device-read-only part (top) and one
> + * device-write-only part (bottom). Given a request, return the sizes of the two
> + * parts in @top and @bottom.
for all but virtio_iommu_req_probe, which has a variable bottom size
> + *
> + * Return 0 on success, or an error when the request seems invalid.
> + */
> +static int viommu_get_req_size(struct viommu_dev *viommu,
> + struct virtio_iommu_req_head *req, size_t *top,
> + size_t *bottom)
> +{
> + size_t size;
> + union virtio_iommu_req *r = (void *)req;
> +
> + *bottom = sizeof(struct virtio_iommu_req_tail);
> +
> + switch (req->type) {
> + case VIRTIO_IOMMU_T_ATTACH:
> + size = sizeof(r->attach);
> + break;
> + case VIRTIO_IOMMU_T_DETACH:
> + size = sizeof(r->detach);
> + break;
> + case VIRTIO_IOMMU_T_MAP:
> + size = sizeof(r->map);
> + break;
> + case VIRTIO_IOMMU_T_UNMAP:
> + size = sizeof(r->unmap);
> + break;
> + default:
> + return -EINVAL;
> + }
> +
> + *top = size - *bottom;
> + return 0;
> +}
> +
> +static int viommu_receive_resp(struct viommu_dev *viommu, int nr_sent,
> + struct list_head *sent)
> +{
> +
> + unsigned int len;
> + int nr_received = 0;
> + struct viommu_request *req, *pending;
> +
> + pending = list_first_entry_or_null(sent, struct viommu_request, list);
> + if (WARN_ON(!pending))
> + return 0;
> +
> + while ((req = virtqueue_get_buf(viommu->vq, &len)) != NULL) {
> + if (req != pending) {
> + dev_warn(viommu->dev, "discarding stale request\n");
> + continue;
> + }
> +
> + pending->written = len;
> +
> + if (++nr_received == nr_sent) {
> + WARN_ON(!list_is_last(&pending->list, sent));
> + break;
> + } else if (WARN_ON(list_is_last(&pending->list, sent))) {
> + break;
> + }
> +
> + pending = list_next_entry(pending, list);
> + }
> +
> + return nr_received;
> +}
> +
> +/* Must be called with request_lock held */
> +static int _viommu_send_reqs_sync(struct viommu_dev *viommu,
> + struct viommu_request *req, int nr,
> + int *nr_sent)
> +{
> + int i, ret;
> + ktime_t timeout;
> + LIST_HEAD(pending);
> + int nr_received = 0;
> + struct scatterlist *sg[2];
> + /*
> + * Yes, 1s timeout. As a guest, we don't necessarily have a precise
> + * notion of time and this just prevents locking up a CPU if the device
> + * dies.
> + */
> + unsigned long timeout_ms = 1000;
> +
> + *nr_sent = 0;
> +
> + for (i = 0; i < nr; i++, req++) {
> + req->written = 0;
> +
> + sg[0] = &req->top;
> + sg[1] = &req->bottom;
> +
> + ret = virtqueue_add_sgs(viommu->vq, sg, 1, 1, req,
> + GFP_ATOMIC);
> + if (ret)
> + break;
> +
> + list_add_tail(&req->list, &pending);
> + }
> +
> + if (i && !virtqueue_kick(viommu->vq))
> + return -EPIPE;
> +
> + timeout = ktime_add_ms(ktime_get(), timeout_ms * i);
I don't really understand how you choose your timeout value: 1s per sent
request.
> + while (nr_received < i && ktime_before(ktime_get(), timeout)) {
> + nr_received += viommu_receive_resp(viommu, i - nr_received,
> + &pending);
> + if (nr_received < i) {
> + /*
> + * FIXME: what's a good way to yield to host? A second
> + * virtqueue_kick won't have any effect since we haven't
> + * added any descriptor.
> + */
> + udelay(10);
could you explain why udelay gets used here?
> + }
> + }
> +
> + if (nr_received != i)
> + ret = -ETIMEDOUT;
> +
> + if (ret == -ENOSPC && nr_received)
> + /*
> + * We've freed some space since virtio told us that the ring is
> + * full, tell the caller to come back for more.
> + */
> + ret = -EAGAIN;
> +
> + *nr_sent = nr_received;
> +
> + return ret;
> +}
> +
> +/*
> + * viommu_send_reqs_sync - add a batch of requests, kick the host and wait for
> + * them to return
> + *
> + * @req: array of requests
> + * @nr: array length
> + * @nr_sent: on return, contains the number of requests actually sent
> + *
> + * Return 0 on success, or an error if we failed to send some of the requests.
> + */
> +static int viommu_send_reqs_sync(struct viommu_dev *viommu,
> + struct viommu_request *req, int nr,
> + int *nr_sent)
> +{
> + int ret;
> + int sent = 0;
> + unsigned long flags;
> +
> + *nr_sent = 0;
> + do {
> + spin_lock_irqsave(&viommu->request_lock, flags);
> + ret = _viommu_send_reqs_sync(viommu, req, nr, &sent);
> + spin_unlock_irqrestore(&viommu->request_lock, flags);
> +
> + *nr_sent += sent;
> + req += sent;
> + nr -= sent;
> + } while (ret == -EAGAIN);
> +
> + return ret;
> +}
> +
> +/*
> + * viommu_send_req_sync - send one request and wait for reply
> + *
> + * @top: pointer to a virtio_iommu_req_* structure
> + *
> + * Returns 0 if the request was successful, or an error number otherwise. No
> + * distinction is done between transport and request errors.
> + */
> +static int viommu_send_req_sync(struct viommu_dev *viommu, void *top)
> +{
> + int ret;
> + int nr_sent;
> + void *bottom;
> + struct viommu_request req = {0};
^
drivers/iommu/virtio-iommu.c:326:9: warning: (near initialization for
‘req.top’) [-Wmissing-braces]
> + size_t top_size, bottom_size;
> + struct virtio_iommu_req_tail *tail;
> + struct virtio_iommu_req_head *head = top;
> +
> + ret = viommu_get_req_size(viommu, head, &top_size, &bottom_size);
> + if (ret)
> + return ret;
> +
> + bottom = top + top_size;
> + tail = bottom + bottom_size - sizeof(*tail);
> +
> + sg_init_one(&req.top, top, top_size);
> + sg_init_one(&req.bottom, bottom, bottom_size);
> +
> + ret = viommu_send_reqs_sync(viommu, &req, 1, &nr_sent);
> + if (ret || !req.written || nr_sent != 1) {
> + dev_err(viommu->dev, "failed to send request\n");
> + return -EIO;
> + }
> +
> + return viommu_status_to_errno(tail->status);
> +}
> +
> +/*
> + * viommu_add_mapping - add a mapping to the internal tree
> + *
> + * On success, return the new mapping. Otherwise return NULL.
> + */
> +static struct viommu_mapping *
> +viommu_add_mapping(struct viommu_domain *vdomain, unsigned long iova,
> + phys_addr_t paddr, size_t size)
> +{
> + unsigned long flags;
> + struct viommu_mapping *mapping;
> +
> + mapping = kzalloc(sizeof(*mapping), GFP_ATOMIC);
> + if (!mapping)
> + return NULL;
> +
> + mapping->paddr = paddr;
> + mapping->iova.start = iova;
> + mapping->iova.last = iova + size - 1;
> +
> + spin_lock_irqsave(&vdomain->mappings_lock, flags);
> + interval_tree_insert(&mapping->iova, &vdomain->mappings);
> + spin_unlock_irqrestore(&vdomain->mappings_lock, flags);
> +
> + return mapping;
> +}
> +
> +/*
> + * viommu_del_mappings - remove mappings from the internal tree
> + *
> + * @vdomain: the domain
> + * @iova: start of the range
> + * @size: size of the range. A size of 0 corresponds to the entire address
> + * space.
> + * @out_mapping: if not NULL, the first removed mapping is returned in there.
> + * This allows the caller to reuse the buffer for the unmap request. Caller
> + * must always free the returned mapping, whether the function succeeds or
> + * not.
if unmapped > 0?
> + *
> + * On success, returns the number of unmapped bytes (>= size)
> + */
> +static size_t viommu_del_mappings(struct viommu_domain *vdomain,
> + unsigned long iova, size_t size,
> + struct viommu_mapping **out_mapping)
> +{
> + size_t unmapped = 0;
> + unsigned long flags;
> + unsigned long last = iova + size - 1;
> + struct viommu_mapping *mapping = NULL;
> + struct interval_tree_node *node, *next;
> +
> + spin_lock_irqsave(&vdomain->mappings_lock, flags);
> + next = interval_tree_iter_first(&vdomain->mappings, iova, last);
> +
> + if (next) {
> + mapping = container_of(next, struct viommu_mapping, iova);
> + /* Trying to split a mapping? */
> + if (WARN_ON(mapping->iova.start < iova))
> + next = NULL;
> + }
> +
> + while (next) {
> + node = next;
> + mapping = container_of(node, struct viommu_mapping, iova);
> +
> + next = interval_tree_iter_next(node, iova, last);
> +
> + /*
> + * Note that for a partial range, this will return the full
> + * mapping so we avoid sending split requests to the device.
> + */
> + unmapped += mapping->iova.last - mapping->iova.start + 1;
> +
> + interval_tree_remove(node, &vdomain->mappings);
> +
> + if (out_mapping && !(*out_mapping))
> + *out_mapping = mapping;
> + else
> + kfree(mapping);
> + }
> + spin_unlock_irqrestore(&vdomain->mappings_lock, flags);
> +
> + return unmapped;
> +}
> +
> +/*
> + * viommu_replay_mappings - re-send MAP requests
> + *
> + * When reattaching a domain that was previously detached from all devices,
> + * mappings were deleted from the device. Re-create the mappings available in
> + * the internal tree.
> + *
> + * Caller should hold the mapping lock if necessary.
the only caller does not hold the lock. At this point we are attaching
our fisrt ep to the domain. I think it would be worth a comment in the
caller.
> + */
> +static int viommu_replay_mappings(struct viommu_domain *vdomain)
> +{
> + int i = 1, ret, nr_sent;
> + struct viommu_request *reqs;
> + struct viommu_mapping *mapping;
> + struct interval_tree_node *node;
> + size_t top_size, bottom_size;
> +
> + node = interval_tree_iter_first(&vdomain->mappings, 0, -1UL);
> + if (!node)
> + return 0;
> +
> + while ((node = interval_tree_iter_next(node, 0, -1UL)) != NULL)
> + i++;
> +
> + reqs = kcalloc(i, sizeof(*reqs), GFP_KERNEL);
> + if (!reqs)
> + return -ENOMEM;
> +
> + bottom_size = sizeof(struct virtio_iommu_req_tail);
> + top_size = sizeof(struct virtio_iommu_req_map) - bottom_size;
> +
> + i = 0;
> + node = interval_tree_iter_first(&vdomain->mappings, 0, -1UL);
> + while (node) {
> + mapping = container_of(node, struct viommu_mapping, iova);
> + sg_init_one(&reqs[i].top, &mapping->req.map, top_size);
> + sg_init_one(&reqs[i].bottom, &mapping->req.map.tail,
> + bottom_size);
> +
> + node = interval_tree_iter_next(node, 0, -1UL);
> + i++;
> + }
> +
> + ret = viommu_send_reqs_sync(vdomain->viommu, reqs, i, &nr_sent);
> + kfree(reqs);
> +
> + return ret;
> +}
> +
> +/* IOMMU API */
> +
> +static bool viommu_capable(enum iommu_cap cap)
> +{
> + return false; /* :( */
> +}
> +
> +static struct iommu_domain *viommu_domain_alloc(unsigned type)
> +{
> + struct viommu_domain *vdomain;
> +
> + if (type != IOMMU_DOMAIN_UNMANAGED && type != IOMMU_DOMAIN_DMA)
> + return NULL;
> +
> + vdomain = kzalloc(sizeof(*vdomain), GFP_KERNEL);
> + if (!vdomain)
> + return NULL;
> +
> + mutex_init(&vdomain->mutex);
> + spin_lock_init(&vdomain->mappings_lock);
> + vdomain->mappings = RB_ROOT_CACHED;
> + refcount_set(&vdomain->endpoints, 0);
> +
> + if (type == IOMMU_DOMAIN_DMA &&
> + iommu_get_dma_cookie(&vdomain->domain)) {
> + kfree(vdomain);
> + return NULL;
> + }
> +
> + return &vdomain->domain;
> +}
> +
> +static int viommu_domain_finalise(struct viommu_dev *viommu,
> + struct iommu_domain *domain)
> +{
> + int ret;
> + struct viommu_domain *vdomain = to_viommu_domain(domain);
> + /* ida limits size to 31 bits. A value of 0 means "max" */
> + unsigned int max_domain = viommu->domain_bits >= 31 ? 0 :
> + 1U << viommu->domain_bits;
> +
> + vdomain->viommu = viommu;
> +
> + domain->pgsize_bitmap = viommu->pgsize_bitmap;
> + domain->geometry = viommu->geometry;
> +
> + ret = ida_simple_get(&viommu->domain_ids, 0, max_domain, GFP_KERNEL);
> + if (ret >= 0)
> + vdomain->id = (unsigned int)ret;
> +
> + return ret > 0 ? 0 : ret;
> +}
> +
> +static void viommu_domain_free(struct iommu_domain *domain)
> +{
> + struct viommu_domain *vdomain = to_viommu_domain(domain);
> +
> + iommu_put_dma_cookie(domain);
> +
> + /* Free all remaining mappings (size 2^64) */
> + viommu_del_mappings(vdomain, 0, 0, NULL);
> +
> + if (vdomain->viommu)
> + ida_simple_remove(&vdomain->viommu->domain_ids, vdomain->id);
> +
> + kfree(vdomain);
> +}
> +
> +static int viommu_attach_dev(struct iommu_domain *domain, struct device *dev)
> +{
> + int i;
> + int ret = 0;
> + struct virtio_iommu_req_attach *req;
> + struct iommu_fwspec *fwspec = dev->iommu_fwspec;
> + struct viommu_endpoint *vdev = fwspec->iommu_priv;
> + struct viommu_domain *vdomain = to_viommu_domain(domain);
> +
> + mutex_lock(&vdomain->mutex);
> + if (!vdomain->viommu) {
> + /*
> + * Initialize the domain proper now that we know which viommu
> + * owns it.
> + */
> + ret = viommu_domain_finalise(vdev->viommu, domain);
> + } else if (vdomain->viommu != vdev->viommu) {
> + dev_err(dev, "cannot attach to foreign vIOMMU\n");
> + ret = -EXDEV;
> + }
> + mutex_unlock(&vdomain->mutex);
> +
> + if (ret)
> + return ret;
> +
> + /*
> + * When attaching the device to a new domain, it will be detached from
> + * the old one and, if as as a result the old domain isn't attached to
as as
> + * any device, all mappings are removed from the old domain and it is
> + * freed. (Note that we can't use get_domain_for_dev here, it returns
> + * the default domain during initial attach.)
I don't see where the old domain is freed. I see you descrement the
endpoints ref count. Also if you replay the mapping, I guess the
mappings were not destroyed?
> + *
> + * Take note of the device disappearing, so we can ignore unmap request
> + * on stale domains (that is, between this detach and the upcoming
> + * free.)
> + *
> + * vdev->vdomain is protected by group->mutex
> + */
> + if (vdev->vdomain)
> + refcount_dec(&vdev->vdomain->endpoints);
> +
> + /* DMA to the stack is forbidden, store request on the heap */
> + req = kzalloc(sizeof(*req), GFP_KERNEL);
> + if (!req)
> + return -ENOMEM;
> +
> + *req = (struct virtio_iommu_req_attach) {
> + .head.type = VIRTIO_IOMMU_T_ATTACH,
> + .domain = cpu_to_le32(vdomain->id),
> + };
> +
> + for (i = 0; i < fwspec->num_ids; i++) {
> + req->endpoint = cpu_to_le32(fwspec->ids[i]);
> +
> + ret = viommu_send_req_sync(vdomain->viommu, req);
> + if (ret)
> + break;
> + }
> +
> + kfree(req);
> +
> + if (ret)
> + return ret;
> +
> + if (!refcount_read(&vdomain->endpoints)) {
> + /*
> + * This endpoint is the first to be attached to the domain.
> + * Replay existing mappings if any.
> + */
> + ret = viommu_replay_mappings(vdomain);
> + if (ret)
> + return ret;
> + }
> +
> + refcount_inc(&vdomain->endpoints);
This does not work for me as the ref counter is initialized to 0 and
ref_count does not work if the counter is 0. It emits a WARN_ON and
stays at 0. I worked around the issue by explicitly setting
recount_set(&vdomain->endpoints, 1) if it was 0 and reffount_inc otherwise.
> + vdev->vdomain = vdomain;
> +
> + return 0;
> +}
> +
> +static int viommu_map(struct iommu_domain *domain, unsigned long iova,
> + phys_addr_t paddr, size_t size, int prot)
> +{
> + int ret;
> + int flags;
> + struct viommu_mapping *mapping;
> + struct viommu_domain *vdomain = to_viommu_domain(domain);
> +
> + mapping = viommu_add_mapping(vdomain, iova, paddr, size);
> + if (!mapping)
> + return -ENOMEM;
> +
> + flags = (prot & IOMMU_READ ? VIRTIO_IOMMU_MAP_F_READ : 0) |
> + (prot & IOMMU_WRITE ? VIRTIO_IOMMU_MAP_F_WRITE : 0);
> +
> + mapping->req.map = (struct virtio_iommu_req_map) {
> + .head.type = VIRTIO_IOMMU_T_MAP,
> + .domain = cpu_to_le32(vdomain->id),
> + .virt_addr = cpu_to_le64(iova),
> + .phys_addr = cpu_to_le64(paddr),
> + .size = cpu_to_le64(size),
> + .flags = cpu_to_le32(flags),
> + };
> +
> + if (!refcount_read(&vdomain->endpoints))
> + return 0;
> +
> + ret = viommu_send_req_sync(vdomain->viommu, &mapping->req);
> + if (ret)
> + viommu_del_mappings(vdomain, iova, size, NULL);
> +
> + return ret;
> +}
> +
> +static size_t viommu_unmap(struct iommu_domain *domain, unsigned long iova,
> + size_t size)
> +{
> + int ret = 0;
> + size_t unmapped;
> + struct viommu_mapping *mapping = NULL;
> + struct viommu_domain *vdomain = to_viommu_domain(domain);
> +
> + unmapped = viommu_del_mappings(vdomain, iova, size, &mapping);
> + if (unmapped < size) {
> + ret = -EINVAL;
> + goto out_free;
> + }
> +
> + /* Device already removed all mappings after detach. */
> + if (!refcount_read(&vdomain->endpoints))
> + goto out_free;
> +
> + if (WARN_ON(!mapping))
> + return 0;
> +
> + mapping->req.unmap = (struct virtio_iommu_req_unmap) {
> + .head.type = VIRTIO_IOMMU_T_UNMAP,
> + .domain = cpu_to_le32(vdomain->id),
> + .virt_addr = cpu_to_le64(iova),
> + .size = cpu_to_le64(unmapped),
> + };
> +
> + ret = viommu_send_req_sync(vdomain->viommu, &mapping->req);
> +
> +out_free:
> + if (mapping)
> + kfree(mapping);
> +
> + return ret ? 0 : unmapped;
> +}
> +
> +static phys_addr_t viommu_iova_to_phys(struct iommu_domain *domain,
> + dma_addr_t iova)
> +{
> + u64 paddr = 0;
> + unsigned long flags;
> + struct viommu_mapping *mapping;
> + struct interval_tree_node *node;
> + struct viommu_domain *vdomain = to_viommu_domain(domain);
> +
> + spin_lock_irqsave(&vdomain->mappings_lock, flags);
> + node = interval_tree_iter_first(&vdomain->mappings, iova, iova);
> + if (node) {
> + mapping = container_of(node, struct viommu_mapping, iova);
> + paddr = mapping->paddr + (iova - mapping->iova.start);
> + }
> + spin_unlock_irqrestore(&vdomain->mappings_lock, flags);
> +
> + return paddr;
> +}
> +
> +static struct iommu_ops viommu_ops;
> +static struct virtio_driver virtio_iommu_drv;
> +
> +static int viommu_match_node(struct device *dev, void *data)
> +{
> + return dev->parent->fwnode == data;
> +}
> +
> +static struct viommu_dev *viommu_get_by_fwnode(struct fwnode_handle *fwnode)
> +{
> + struct device *dev = driver_find_device(&virtio_iommu_drv.driver, NULL,
> + fwnode, viommu_match_node);
> + put_device(dev);
> +
> + return dev ? dev_to_virtio(dev)->priv : NULL;
> +}
> +
> +static int viommu_add_device(struct device *dev)
> +{
> + struct iommu_group *group;
> + struct viommu_endpoint *vdev;
> + struct viommu_dev *viommu = NULL;
> + struct iommu_fwspec *fwspec = dev->iommu_fwspec;
> +
> + if (!fwspec || fwspec->ops != &viommu_ops)
> + return -ENODEV;
> +
> + viommu = viommu_get_by_fwnode(fwspec->iommu_fwnode);
> + if (!viommu)
> + return -ENODEV;
> +
> + vdev = kzalloc(sizeof(*vdev), GFP_KERNEL);
> + if (!vdev)
> + return -ENOMEM;
> +
> + vdev->viommu = viommu;
> + fwspec->iommu_priv = vdev;
> +
> + /*
> + * Last step creates a default domain and attaches to it. Everything
> + * must be ready.
> + */
> + group = iommu_group_get_for_dev(dev);
> + if (!IS_ERR(group))
> + iommu_group_put(group);
> +
> + return PTR_ERR_OR_ZERO(group);
> +}
> +
> +static void viommu_remove_device(struct device *dev)
> +{
> + kfree(dev->iommu_fwspec->iommu_priv);
> +}
> +
> +static struct iommu_group *viommu_device_group(struct device *dev)
> +{
> + if (dev_is_pci(dev))
> + return pci_device_group(dev);
> + else
> + return generic_device_group(dev);
> +}
> +
> +static int viommu_of_xlate(struct device *dev, struct of_phandle_args *args)
> +{
> + return iommu_fwspec_add_ids(dev, args->args, 1);
> +}
> +
> +static void viommu_get_resv_regions(struct device *dev, struct list_head *head)
> +{
> + struct iommu_resv_region *region;
> + int prot = IOMMU_WRITE | IOMMU_NOEXEC | IOMMU_MMIO;
> +
> + region = iommu_alloc_resv_region(MSI_IOVA_BASE, MSI_IOVA_LENGTH, prot,
> + IOMMU_RESV_SW_MSI);
> + if (!region)
> + return;
> +
> + list_add_tail(®ion->list, head);
> +}
> +
> +static void viommu_put_resv_regions(struct device *dev, struct list_head *head)
> +{
> + struct iommu_resv_region *entry, *next;
> +
> + list_for_each_entry_safe(entry, next, head, list)
> + kfree(entry);
> +}
> +
> +static struct iommu_ops viommu_ops = {
> + .capable = viommu_capable,
> + .domain_alloc = viommu_domain_alloc,
> + .domain_free = viommu_domain_free,
> + .attach_dev = viommu_attach_dev,
> + .map = viommu_map,
> + .unmap = viommu_unmap,
> + .map_sg = default_iommu_map_sg,
> + .iova_to_phys = viommu_iova_to_phys,
> + .add_device = viommu_add_device,
> + .remove_device = viommu_remove_device,
> + .device_group = viommu_device_group,
> + .of_xlate = viommu_of_xlate,
> + .get_resv_regions = viommu_get_resv_regions,
> + .put_resv_regions = viommu_put_resv_regions,
> +};
> +
> +static int viommu_init_vq(struct viommu_dev *viommu)
> +{
> + struct virtio_device *vdev = dev_to_virtio(viommu->dev);
> + const char *name = "request";
> + void *ret;
> +
> + ret = virtio_find_single_vq(vdev, NULL, name);
> + if (IS_ERR(ret)) {
> + dev_err(viommu->dev, "cannot find VQ\n");
> + return PTR_ERR(ret);
> + }
> +
> + viommu->vq = ret;
> +
> + return 0;
> +}
> +
> +static int viommu_probe(struct virtio_device *vdev)
> +{
> + struct device *parent_dev = vdev->dev.parent;
> + struct viommu_dev *viommu = NULL;
> + struct device *dev = &vdev->dev;
> + u64 input_start = 0;
> + u64 input_end = -1UL;
> + int ret;
> +
> + viommu = kzalloc(sizeof(*viommu), GFP_KERNEL);
> + if (!viommu)
> + return -ENOMEM;
> +
> + spin_lock_init(&viommu->request_lock);
> + ida_init(&viommu->domain_ids);
> + viommu->dev = dev;
> + viommu->vdev = vdev;
> +
> + ret = viommu_init_vq(viommu);
> + if (ret)
> + goto err_free_viommu;
> +
> + virtio_cread(vdev, struct virtio_iommu_config, page_size_mask,
> + &viommu->pgsize_bitmap);
> +
> + if (!viommu->pgsize_bitmap) {
> + ret = -EINVAL;
> + goto err_free_viommu;
> + }
> +
> + viommu->domain_bits = 32;
> +
> + /* Optional features */
> + virtio_cread_feature(vdev, VIRTIO_IOMMU_F_INPUT_RANGE,
> + struct virtio_iommu_config, input_range.start,
> + &input_start);
> +
> + virtio_cread_feature(vdev, VIRTIO_IOMMU_F_INPUT_RANGE,
> + struct virtio_iommu_config, input_range.end,
> + &input_end);
> +
> + virtio_cread_feature(vdev, VIRTIO_IOMMU_F_DOMAIN_BITS,
> + struct virtio_iommu_config, domain_bits,
> + &viommu->domain_bits);
> +
> + viommu->geometry = (struct iommu_domain_geometry) {
> + .aperture_start = input_start,
> + .aperture_end = input_end,
> + .force_aperture = true,
> + };
> +
> + viommu_ops.pgsize_bitmap = viommu->pgsize_bitmap;
> +
> + virtio_device_ready(vdev);
> +
> + ret = iommu_device_sysfs_add(&viommu->iommu, dev, NULL, "%s",
> + virtio_bus_name(vdev));
> + if (ret)
> + goto err_free_viommu;
> +
> + iommu_device_set_ops(&viommu->iommu, &viommu_ops);
> + iommu_device_set_fwnode(&viommu->iommu, parent_dev->fwnode);
> +
> + iommu_device_register(&viommu->iommu);
> +
> +#ifdef CONFIG_PCI
> + if (pci_bus_type.iommu_ops != &viommu_ops) {
> + pci_request_acs();
> + ret = bus_set_iommu(&pci_bus_type, &viommu_ops);
> + if (ret)
> + goto err_unregister;
> + }
> +#endif
> +#ifdef CONFIG_ARM_AMBA
> + if (amba_bustype.iommu_ops != &viommu_ops) {
> + ret = bus_set_iommu(&amba_bustype, &viommu_ops);
> + if (ret)
> + goto err_unregister;
> + }
> +#endif
> + if (platform_bus_type.iommu_ops != &viommu_ops) {
> + ret = bus_set_iommu(&platform_bus_type, &viommu_ops);
> + if (ret)
> + goto err_unregister;
> + }
> +
> + vdev->priv = viommu;
> +
> + dev_info(dev, "input address: %u bits\n",
> + order_base_2(viommu->geometry.aperture_end));
> + dev_info(dev, "page mask: %#llx\n", viommu->pgsize_bitmap);
> +
> + return 0;
> +
> +err_unregister:
> + iommu_device_unregister(&viommu->iommu);
> +
> +err_free_viommu:
> + kfree(viommu);
> +
> + return ret;
> +}
> +
> +static void viommu_remove(struct virtio_device *vdev)
> +{
> + struct viommu_dev *viommu = vdev->priv;
> +
> + iommu_device_unregister(&viommu->iommu);
> + kfree(viommu);
> +
> + dev_info(&vdev->dev, "device removed\n");
> +}
> +
> +static void viommu_config_changed(struct virtio_device *vdev)
> +{
> + dev_warn(&vdev->dev, "config changed\n");
> +}
> +
> +static unsigned int features[] = {
> + VIRTIO_IOMMU_F_MAP_UNMAP,
> + VIRTIO_IOMMU_F_DOMAIN_BITS,
> + VIRTIO_IOMMU_F_INPUT_RANGE,
> +};
> +
> +static struct virtio_device_id id_table[] = {
> + { VIRTIO_ID_IOMMU, VIRTIO_DEV_ANY_ID },
> + { 0 },
> +};
> +
> +static struct virtio_driver virtio_iommu_drv = {
> + .driver.name = KBUILD_MODNAME,
> + .driver.owner = THIS_MODULE,
> + .id_table = id_table,
> + .feature_table = features,
> + .feature_table_size = ARRAY_SIZE(features),
> + .probe = viommu_probe,
> + .remove = viommu_remove,
> + .config_changed = viommu_config_changed,
> +};
> +
> +module_virtio_driver(virtio_iommu_drv);
> +
> +IOMMU_OF_DECLARE(viommu, "virtio,mmio", NULL);
> +
> +MODULE_DESCRIPTION("Virtio IOMMU driver");
> +MODULE_AUTHOR("Jean-Philippe Brucker <jean-philippe.brucker@arm.com>");
> +MODULE_LICENSE("GPL v2");
> diff --git a/include/uapi/linux/virtio_ids.h b/include/uapi/linux/virtio_ids.h
> index 6d5c3b2d4f4d..934ed3d3cd3f 100644
> --- a/include/uapi/linux/virtio_ids.h
> +++ b/include/uapi/linux/virtio_ids.h
> @@ -43,5 +43,6 @@
> #define VIRTIO_ID_INPUT 18 /* virtio input */
> #define VIRTIO_ID_VSOCK 19 /* virtio vsock transport */
> #define VIRTIO_ID_CRYPTO 20 /* virtio crypto */
> +#define VIRTIO_ID_IOMMU 61216 /* virtio IOMMU (temporary) */
>
> #endif /* _LINUX_VIRTIO_IDS_H */
> diff --git a/include/uapi/linux/virtio_iommu.h b/include/uapi/linux/virtio_iommu.h
> new file mode 100644
> index 000000000000..2b4cd2042897
> --- /dev/null
> +++ b/include/uapi/linux/virtio_iommu.h
> @@ -0,0 +1,140 @@
> +/*
> + * Virtio-iommu definition v0.5
> + *
> + * Copyright (C) 2017 ARM Ltd.
> + *
> + * This header is BSD licensed so anyone can use the definitions
> + * to implement compatible drivers/servers:
> + *
> + * Redistribution and use in source and binary forms, with or without
> + * modification, are permitted provided that the following conditions
> + * are met:
> + * 1. Redistributions of source code must retain the above copyright
> + * notice, this list of conditions and the following disclaimer.
> + * 2. Redistributions in binary form must reproduce the above copyright
> + * notice, this list of conditions and the following disclaimer in the
> + * documentation and/or other materials provided with the distribution.
> + * 3. Neither the name of ARM Ltd. nor the names of its contributors
> + * may be used to endorse or promote products derived from this software
> + * without specific prior written permission.
> + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
> + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
> + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
> + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL IBM OR
> + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
> + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
> + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
> + * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
> + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
> + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
> + * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
> + * SUCH DAMAGE.
> + */
> +#ifndef _UAPI_LINUX_VIRTIO_IOMMU_H
> +#define _UAPI_LINUX_VIRTIO_IOMMU_H
> +
> +/* Feature bits */
> +#define VIRTIO_IOMMU_F_INPUT_RANGE 0
> +#define VIRTIO_IOMMU_F_DOMAIN_BITS 1
> +#define VIRTIO_IOMMU_F_MAP_UNMAP 2
> +#define VIRTIO_IOMMU_F_BYPASS 3
> +
> +struct virtio_iommu_config {
> + /* Supported page sizes */
> + __u64 page_size_mask;
Get a warning
./usr/include/linux/virtio_iommu.h:45: found __[us]{8,16,32,64} type
without #include <linux/types.h>
> + /* Supported IOVA range */
> + struct virtio_iommu_range {
> + __u64 start;
> + __u64 end;
> + } input_range;
> + /* Max domain ID size */
> + __u8 domain_bits;
> +} __packed;
> +
> +/* Request types */
> +#define VIRTIO_IOMMU_T_ATTACH 0x01
> +#define VIRTIO_IOMMU_T_DETACH 0x02
> +#define VIRTIO_IOMMU_T_MAP 0x03
> +#define VIRTIO_IOMMU_T_UNMAP 0x04
> +
> +/* Status types */
> +#define VIRTIO_IOMMU_S_OK 0x00
> +#define VIRTIO_IOMMU_S_IOERR 0x01
> +#define VIRTIO_IOMMU_S_UNSUPP 0x02
> +#define VIRTIO_IOMMU_S_DEVERR 0x03
> +#define VIRTIO_IOMMU_S_INVAL 0x04
> +#define VIRTIO_IOMMU_S_RANGE 0x05
> +#define VIRTIO_IOMMU_S_NOENT 0x06
> +#define VIRTIO_IOMMU_S_FAULT 0x07
> +
> +struct virtio_iommu_req_head {
> + __u8 type;
> + __u8 reserved[3];
> +} __packed;
> +
> +struct virtio_iommu_req_tail {
> + __u8 status;
> + __u8 reserved[3];
> +} __packed;
> +
> +struct virtio_iommu_req_attach {
> + struct virtio_iommu_req_head head;
> +
> + __le32 domain;
> + __le32 endpoint;
> + __le32 reserved;
> +
> + struct virtio_iommu_req_tail tail;
> +} __packed;
> +
> +struct virtio_iommu_req_detach {
> + struct virtio_iommu_req_head head;
> +
> + __le32 endpoint;
> + __le32 reserved;
> +
> + struct virtio_iommu_req_tail tail;
> +} __packed;
> +
> +#define VIRTIO_IOMMU_MAP_F_READ (1 << 0)
> +#define VIRTIO_IOMMU_MAP_F_WRITE (1 << 1)
> +#define VIRTIO_IOMMU_MAP_F_EXEC (1 << 2)
> +
> +#define VIRTIO_IOMMU_MAP_F_MASK (VIRTIO_IOMMU_MAP_F_READ | \
> + VIRTIO_IOMMU_MAP_F_WRITE | \
> + VIRTIO_IOMMU_MAP_F_EXEC)
> +
> +struct virtio_iommu_req_map {
> + struct virtio_iommu_req_head head;
> +
> + __le32 domain;
> + __le64 virt_addr;
> + __le64 phys_addr;
> + __le64 size;
> + __le32 flags;
> +
> + struct virtio_iommu_req_tail tail;
> +} __packed;
> +
> +__packed
> +struct virtio_iommu_req_unmap {
> + struct virtio_iommu_req_head head;
> +
> + __le32 domain;
> + __le64 virt_addr;
> + __le64 size;
> + __le32 reserved;
> +
> + struct virtio_iommu_req_tail tail;
> +} __packed;
> +
> +union virtio_iommu_req {
> + struct virtio_iommu_req_head head;
> +
> + struct virtio_iommu_req_attach attach;
> + struct virtio_iommu_req_detach detach;
> + struct virtio_iommu_req_map map;
> + struct virtio_iommu_req_unmap unmap;
> +};
> +
> +#endif
>
Thanks
Eric
next prev parent reply other threads:[~2018-01-15 15:12 UTC|newest]
Thread overview: 17+ messages / expand[flat|nested] mbox.gz Atom feed top
2017-11-17 18:52 [RFC PATCH v2 0/5] Add virtio-iommu driver Jean-Philippe Brucker
2017-11-17 18:52 ` [RFC PATCH v2 1/5] iommu: " Jean-Philippe Brucker
2017-11-29 15:17 ` [virtio-dev] " Jean-Philippe Brucker
2018-01-15 15:12 ` Auger Eric [this message]
2018-01-16 17:45 ` Jean-Philippe Brucker
2017-11-17 18:52 ` [RFC PATCH v2 2/5] iommu/virtio-iommu: Add probe request Jean-Philippe Brucker
2018-01-16 9:25 ` Auger Eric
2018-01-16 17:46 ` Jean-Philippe Brucker
2018-01-16 23:26 ` Auger Eric
2018-01-19 16:21 ` Jean-Philippe Brucker
2018-01-19 17:22 ` Auger Eric
2017-11-17 18:52 ` [RFC PATCH v2 3/5] iommu/virtio-iommu: Add event queue Jean-Philippe Brucker
2018-01-16 10:10 ` Auger Eric
2018-01-16 17:48 ` Jean-Philippe Brucker
2017-11-17 18:52 ` [RFC PATCH v2 4/5] ACPI/IORT: Support paravirtualized IOMMU Jean-Philippe Brucker
2017-11-29 15:17 ` [virtio-dev] " Jean-Philippe Brucker
2017-11-17 18:52 ` [RFC PATCH v2 5/5] ACPI/IORT: Move IORT to the ACPI folder Jean-Philippe Brucker
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=54320f26-05a5-9a09-f6cf-ae610c41e077@redhat.com \
--to=eric.auger@redhat.com \
--cc=Jayachandran.Nair@cavium.com \
--cc=alex.williamson@redhat.com \
--cc=ashok.raj@intel.com \
--cc=bharat.bhushan@nxp.com \
--cc=devel@acpica.org \
--cc=hanjun.guo@linaro.org \
--cc=iommu@lists.linux-foundation.org \
--cc=jasowang@redhat.com \
--cc=jean-philippe.brucker@arm.com \
--cc=joro@8bytes.org \
--cc=kvm@vger.kernel.org \
--cc=kvmarm@lists.cs.columbia.edu \
--cc=lenb@kernel.org \
--cc=linux-acpi@vger.kernel.org \
--cc=lorenzo.pieralisi@arm.com \
--cc=lv.zheng@intel.com \
--cc=marc.zyngier@arm.com \
--cc=mst@redhat.com \
--cc=peterx@redhat.com \
--cc=rjw@rjwysocki.net \
--cc=robert.moore@intel.com \
--cc=robin.murphy@arm.com \
--cc=sudeep.holla@arm.com \
--cc=virtio-dev@lists.oasis-open.org \
--cc=virtualization@lists.linux-foundation.org \
--cc=will.deacon@arm.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).