* [PATCH v7 1/7] vfio: introduce a vfio_dma type field
2016-04-19 17:24 [PATCH v7 0/7] KVM PCIe/MSI passthrough on ARM/ARM64: kernel part 3/3: vfio changes Eric Auger
@ 2016-04-19 17:24 ` Eric Auger
2016-04-19 17:24 ` [PATCH v7 2/7] vfio/type1: vfio_find_dma accepting a type argument Eric Auger
` (5 subsequent siblings)
6 siblings, 0 replies; 12+ messages in thread
From: Eric Auger @ 2016-04-19 17:24 UTC (permalink / raw)
To: linux-arm-kernel
We introduce a vfio_dma type since we will need to discriminate
different types of dma slots:
- VFIO_IOVA_USER: IOVA region used to map user vaddr
- VFIO_IOVA_RESERVED: IOVA region reserved to map host device PA such
as MSI doorbells
Signed-off-by: Eric Auger <eric.auger@linaro.org>
---
v6 -> v7:
- add VFIO_IOVA_ANY
- do not introduce yet any VFIO_IOVA_RESERVED handling
---
drivers/vfio/vfio_iommu_type1.c | 11 +++++++++++
1 file changed, 11 insertions(+)
diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c
index 75b24e9..aaf5a6c 100644
--- a/drivers/vfio/vfio_iommu_type1.c
+++ b/drivers/vfio/vfio_iommu_type1.c
@@ -53,6 +53,16 @@ module_param_named(disable_hugepages,
MODULE_PARM_DESC(disable_hugepages,
"Disable VFIO IOMMU support for IOMMU hugepages.");
+enum vfio_iova_type {
+ VFIO_IOVA_USER = 0, /* standard IOVA used to map user vaddr */
+ /*
+ * IOVA reserved to map special host physical addresses,
+ * MSI frames for instance
+ */
+ VFIO_IOVA_RESERVED,
+ VFIO_IOVA_ANY, /* matches any IOVA type */
+};
+
struct vfio_iommu {
struct list_head domain_list;
struct mutex lock;
@@ -75,6 +85,7 @@ struct vfio_dma {
unsigned long vaddr; /* Process virtual addr */
size_t size; /* Map size (bytes) */
int prot; /* IOMMU_READ/WRITE */
+ enum vfio_iova_type type; /* type of IOVA */
};
struct vfio_group {
--
1.9.1
^ permalink raw reply related [flat|nested] 12+ messages in thread* [PATCH v7 2/7] vfio/type1: vfio_find_dma accepting a type argument
2016-04-19 17:24 [PATCH v7 0/7] KVM PCIe/MSI passthrough on ARM/ARM64: kernel part 3/3: vfio changes Eric Auger
2016-04-19 17:24 ` [PATCH v7 1/7] vfio: introduce a vfio_dma type field Eric Auger
@ 2016-04-19 17:24 ` Eric Auger
2016-04-19 17:24 ` [PATCH v7 3/7] vfio/type1: specialize remove_dma and replay according to type Eric Auger
` (4 subsequent siblings)
6 siblings, 0 replies; 12+ messages in thread
From: Eric Auger @ 2016-04-19 17:24 UTC (permalink / raw)
To: linux-arm-kernel
In our RB-tree we now have slots of different types (USER and RESERVED).
It becomes useful to be able to search for dma slots of a specific type or
any type. This patch proposes an implementation for that modality and also
changes the existing callers using the USER type.
Signed-off-by: Eric Auger <eric.auger@linaro.org>
---
drivers/vfio/vfio_iommu_type1.c | 63 ++++++++++++++++++++++++++++++++++-------
1 file changed, 53 insertions(+), 10 deletions(-)
diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c
index aaf5a6c..2d769d4 100644
--- a/drivers/vfio/vfio_iommu_type1.c
+++ b/drivers/vfio/vfio_iommu_type1.c
@@ -98,23 +98,64 @@ struct vfio_group {
* into DMA'ble space using the IOMMU
*/
-static struct vfio_dma *vfio_find_dma(struct vfio_iommu *iommu,
- dma_addr_t start, size_t size)
+/**
+ * vfio_find_dma_from_node: looks for a dma slot intersecting a window
+ * from a given rb tree node
+ * @top: top rb tree node where the search starts (including this node)
+ * @start: window start
+ * @size: window size
+ * @type: window type
+ */
+static struct vfio_dma *vfio_find_dma_from_node(struct rb_node *top,
+ dma_addr_t start, size_t size,
+ enum vfio_iova_type type)
{
- struct rb_node *node = iommu->dma_list.rb_node;
+ struct rb_node *node = top;
+ struct vfio_dma *dma;
while (node) {
- struct vfio_dma *dma = rb_entry(node, struct vfio_dma, node);
-
+ dma = rb_entry(node, struct vfio_dma, node);
if (start + size <= dma->iova)
node = node->rb_left;
else if (start >= dma->iova + dma->size)
node = node->rb_right;
else
+ break;
+ }
+ if (!node)
+ return NULL;
+
+ /* a dma slot intersects our window, check the type also matches */
+ if (type == VFIO_IOVA_ANY || dma->type == type)
+ return dma;
+
+ /* restart 2 searches skipping the current node */
+ if (start < dma->iova) {
+ dma = vfio_find_dma_from_node(node->rb_left, start,
+ size, type);
+ if (dma)
return dma;
}
+ if (start + size > dma->iova + dma->size)
+ dma = vfio_find_dma_from_node(node->rb_right, start,
+ size, type);
+ return dma;
+}
+
+/**
+ * vfio_find_dma: find a dma slot intersecting a given window
+ * @iommu: vfio iommu handle
+ * @start: window base iova
+ * @size: window size
+ * @type: window type
+ */
+static struct vfio_dma *vfio_find_dma(struct vfio_iommu *iommu,
+ dma_addr_t start, size_t size,
+ enum vfio_iova_type type)
+{
+ struct rb_node *top_node = iommu->dma_list.rb_node;
- return NULL;
+ return vfio_find_dma_from_node(top_node, start, size, type);
}
static void vfio_link_dma(struct vfio_iommu *iommu, struct vfio_dma *new)
@@ -488,19 +529,21 @@ static int vfio_dma_do_unmap(struct vfio_iommu *iommu,
* mappings within the range.
*/
if (iommu->v2) {
- dma = vfio_find_dma(iommu, unmap->iova, 0);
+ dma = vfio_find_dma(iommu, unmap->iova, 0, VFIO_IOVA_USER);
if (dma && dma->iova != unmap->iova) {
ret = -EINVAL;
goto unlock;
}
- dma = vfio_find_dma(iommu, unmap->iova + unmap->size - 1, 0);
+ dma = vfio_find_dma(iommu, unmap->iova + unmap->size - 1, 0,
+ VFIO_IOVA_USER);
if (dma && dma->iova + dma->size != unmap->iova + unmap->size) {
ret = -EINVAL;
goto unlock;
}
}
- while ((dma = vfio_find_dma(iommu, unmap->iova, unmap->size))) {
+ while ((dma = vfio_find_dma(iommu, unmap->iova, unmap->size,
+ VFIO_IOVA_USER))) {
if (!iommu->v2 && unmap->iova > dma->iova)
break;
unmapped += dma->size;
@@ -604,7 +647,7 @@ static int vfio_dma_do_map(struct vfio_iommu *iommu,
mutex_lock(&iommu->lock);
- if (vfio_find_dma(iommu, iova, size)) {
+ if (vfio_find_dma(iommu, iova, size, VFIO_IOVA_ANY)) {
mutex_unlock(&iommu->lock);
return -EEXIST;
}
--
1.9.1
^ permalink raw reply related [flat|nested] 12+ messages in thread* [PATCH v7 3/7] vfio/type1: specialize remove_dma and replay according to type
2016-04-19 17:24 [PATCH v7 0/7] KVM PCIe/MSI passthrough on ARM/ARM64: kernel part 3/3: vfio changes Eric Auger
2016-04-19 17:24 ` [PATCH v7 1/7] vfio: introduce a vfio_dma type field Eric Auger
2016-04-19 17:24 ` [PATCH v7 2/7] vfio/type1: vfio_find_dma accepting a type argument Eric Auger
@ 2016-04-19 17:24 ` Eric Auger
2016-04-20 3:05 ` kbuild test robot
2016-04-19 17:24 ` [PATCH v7 4/7] vfio: allow reserved iova registration Eric Auger
` (3 subsequent siblings)
6 siblings, 1 reply; 12+ messages in thread
From: Eric Auger @ 2016-04-19 17:24 UTC (permalink / raw)
To: linux-arm-kernel
Before allowing the end-user to create VFIO_IOVA_RESERVED dma slots,
let's implement the expected behavior for removal and replay. As opposed
to user dma slots, the physical pages bound to reserved IOVA do not need
to be pinned/unpinned. Also we currently handle a single reserved iova
domain. Any attempt to remove the single reserved dma slot ends up
deleting the underlying iova domain for each iommu domain.
Signed-off-by: Eric Auger <eric.auger@linaro.org>
---
drivers/vfio/vfio_iommu_type1.c | 17 ++++++++++++++++-
1 file changed, 16 insertions(+), 1 deletion(-)
diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c
index 2d769d4..93c17d9 100644
--- a/drivers/vfio/vfio_iommu_type1.c
+++ b/drivers/vfio/vfio_iommu_type1.c
@@ -36,6 +36,7 @@
#include <linux/uaccess.h>
#include <linux/vfio.h>
#include <linux/workqueue.h>
+#include <linux/dma-reserved-iommu.h>
#define DRIVER_VERSION "0.2"
#define DRIVER_AUTHOR "Alex Williamson <alex.williamson@redhat.com>"
@@ -445,9 +446,20 @@ static void vfio_unmap_unpin(struct vfio_iommu *iommu, struct vfio_dma *dma)
vfio_lock_acct(-unlocked);
}
+static void vfio_destroy_reserved(struct vfio_iommu *iommu)
+{
+ struct vfio_domain *d;
+
+ list_for_each_entry(d, &iommu->domain_list, next)
+ iommu_free_reserved_iova_domain(d->domain);
+}
+
static void vfio_remove_dma(struct vfio_iommu *iommu, struct vfio_dma *dma)
{
- vfio_unmap_unpin(iommu, dma);
+ if (likely(dma->type == VFIO_IOVA_USER))
+ vfio_unmap_unpin(iommu, dma);
+ else
+ vfio_destroy_reserved(iommu);
vfio_unlink_dma(iommu, dma);
kfree(dma);
}
@@ -727,6 +739,9 @@ static int vfio_iommu_replay(struct vfio_iommu *iommu,
dma = rb_entry(n, struct vfio_dma, node);
iova = dma->iova;
+ if (dma->type == VFIO_IOVA_RESERVED)
+ continue;
+
while (iova < dma->iova + dma->size) {
phys_addr_t phys = iommu_iova_to_phys(d->domain, iova);
size_t size;
--
1.9.1
^ permalink raw reply related [flat|nested] 12+ messages in thread* [PATCH v7 4/7] vfio: allow reserved iova registration
2016-04-19 17:24 [PATCH v7 0/7] KVM PCIe/MSI passthrough on ARM/ARM64: kernel part 3/3: vfio changes Eric Auger
` (2 preceding siblings ...)
2016-04-19 17:24 ` [PATCH v7 3/7] vfio/type1: specialize remove_dma and replay according to type Eric Auger
@ 2016-04-19 17:24 ` Eric Auger
2016-04-19 17:24 ` [PATCH v7 5/7] vfio/type1: also check IRQ remapping capability at msi domain Eric Auger
` (2 subsequent siblings)
6 siblings, 0 replies; 12+ messages in thread
From: Eric Auger @ 2016-04-19 17:24 UTC (permalink / raw)
To: linux-arm-kernel
The user is allowed to [un]register a reserved IOVA range by using the
DMA MAP API and setting the new flag: VFIO_DMA_MAP_FLAG_MSI_RESERVED_IOVA.
It provides the base address and the size. This region is stored in the
vfio_dma rb tree. At that point the iova range is not mapped to any target
address yet. The host kernel will use those iova when needed, typically
when the VFIO-PCI driver allocates its MSIs.
Signed-off-by: Eric Auger <eric.auger@linaro.org>
Signed-off-by: Bharat Bhushan <Bharat.Bhushan@freescale.com>
---
v6 -> v7:
- use iommu_free_reserved_iova_domain
- convey prot attributes downto dma-reserved-iommu iova domain creation
- reserved bindings teardown now performed on iommu domain destruction
- rename VFIO_DMA_MAP_FLAG_MSI_RESERVED_IOVA into
VFIO_DMA_MAP_FLAG_RESERVED_MSI_IOVA
- change title
- pass the protection attribute to dma-reserved-iommu API
v3 -> v4:
- use iommu_alloc/free_reserved_iova_domain exported by dma-reserved-iommu
- protect vfio_register_reserved_iova_range implementation with
CONFIG_IOMMU_DMA_RESERVED
- handle unregistration by user-space and on vfio_iommu_type1 release
v1 -> v2:
- set returned value according to alloc_reserved_iova_domain result
- free the iova domains in case any error occurs
RFC v1 -> v1:
- takes into account Alex comments, based on
[RFC PATCH 1/6] vfio: Add interface for add/del reserved iova region:
- use the existing dma map/unmap ioctl interface with a flag to register
a reserved IOVA range. A single reserved iova region is allowed.
---
drivers/vfio/vfio_iommu_type1.c | 138 +++++++++++++++++++++++++++++++++++++++-
include/uapi/linux/vfio.h | 9 ++-
2 files changed, 145 insertions(+), 2 deletions(-)
diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c
index 93c17d9..fa6b8b1 100644
--- a/drivers/vfio/vfio_iommu_type1.c
+++ b/drivers/vfio/vfio_iommu_type1.c
@@ -454,6 +454,27 @@ static void vfio_destroy_reserved(struct vfio_iommu *iommu)
iommu_free_reserved_iova_domain(d->domain);
}
+static int vfio_create_reserved(struct vfio_iommu *iommu,
+ dma_addr_t iova, size_t size,
+ int prot, unsigned long order)
+{
+ struct vfio_domain *d;
+ int ret = 0;
+
+ list_for_each_entry(d, &iommu->domain_list, next) {
+ ret = iommu_alloc_reserved_iova_domain(d->domain, iova,
+ size, prot, order);
+ if (ret)
+ break;
+ }
+
+ if (ret) {
+ list_for_each_entry(d, &iommu->domain_list, next)
+ iommu_free_reserved_iova_domain(d->domain);
+ }
+ return ret;
+}
+
static void vfio_remove_dma(struct vfio_iommu *iommu, struct vfio_dma *dma)
{
if (likely(dma->type == VFIO_IOVA_USER))
@@ -705,6 +726,110 @@ static int vfio_dma_do_map(struct vfio_iommu *iommu,
return ret;
}
+static int vfio_register_reserved_iova_range(struct vfio_iommu *iommu,
+ struct vfio_iommu_type1_dma_map *map)
+{
+ dma_addr_t iova = map->iova;
+ size_t size = map->size;
+ int ret = 0, prot = 0;
+ struct vfio_dma *dma;
+ unsigned long order;
+ uint64_t mask;
+
+ /* Verify that none of our __u64 fields overflow */
+ if (map->size != size || map->iova != iova)
+ return -EINVAL;
+
+ order = __ffs(vfio_pgsize_bitmap(iommu));
+ mask = ((uint64_t)1 << order) - 1;
+
+ WARN_ON(mask & PAGE_MASK);
+
+ if (!size || (size | iova) & mask)
+ return -EINVAL;
+
+ /* Don't allow IOVA address wrap */
+ if (iova + size - 1 < iova)
+ return -EINVAL;
+
+ mutex_lock(&iommu->lock);
+
+ if (vfio_find_dma(iommu, iova, size, VFIO_IOVA_ANY)) {
+ ret = -EEXIST;
+ goto unlock;
+ }
+
+ dma = kzalloc(sizeof(*dma), GFP_KERNEL);
+ if (!dma) {
+ ret = -ENOMEM;
+ goto unlock;
+ }
+
+ dma->iova = iova;
+ dma->size = size;
+ dma->type = VFIO_IOVA_RESERVED;
+
+ if (map->flags & VFIO_DMA_MAP_FLAG_READ)
+ prot = IOMMU_READ;
+ if (map->flags & VFIO_DMA_MAP_FLAG_WRITE)
+ prot |= IOMMU_WRITE;
+
+ ret = vfio_create_reserved(iommu, iova, size, prot, order);
+ if (ret)
+ goto free_unlock;
+
+ vfio_link_dma(iommu, dma);
+ goto unlock;
+
+free_unlock:
+ kfree(dma);
+unlock:
+ mutex_unlock(&iommu->lock);
+ return ret;
+}
+
+static int vfio_unregister_reserved_iova_range(struct vfio_iommu *iommu,
+ struct vfio_iommu_type1_dma_unmap *unmap)
+{
+ dma_addr_t iova = unmap->iova;
+ struct vfio_dma *dma;
+ size_t size = unmap->size;
+ uint64_t mask;
+ unsigned long order;
+ int ret = -EINVAL;
+
+ /* Verify that none of our __u64 fields overflow */
+ if (unmap->size != size || unmap->iova != iova)
+ return ret;
+
+ order = __ffs(vfio_pgsize_bitmap(iommu));
+ mask = ((uint64_t)1 << order) - 1;
+
+ WARN_ON(mask & PAGE_MASK);
+
+ if (!size || (size | iova) & mask)
+ return ret;
+
+ /* Don't allow IOVA address wrap */
+ if (iova + size - 1 < iova)
+ return ret;
+
+ mutex_lock(&iommu->lock);
+
+ dma = vfio_find_dma(iommu, iova, size, VFIO_IOVA_RESERVED);
+
+ if (dma && (dma->iova == iova) && (dma->size == size)) {
+ unmap->size = dma->size;
+ vfio_remove_dma(iommu, dma);
+ ret = 0;
+ goto unlock;
+ }
+ unmap->size = 0;
+unlock:
+ mutex_unlock(&iommu->lock);
+ return ret;
+}
+
static int vfio_bus_type(struct device *dev, void *data)
{
struct bus_type **bus = data;
@@ -1074,7 +1199,8 @@ static long vfio_iommu_type1_ioctl(void *iommu_data,
} else if (cmd == VFIO_IOMMU_MAP_DMA) {
struct vfio_iommu_type1_dma_map map;
uint32_t mask = VFIO_DMA_MAP_FLAG_READ |
- VFIO_DMA_MAP_FLAG_WRITE;
+ VFIO_DMA_MAP_FLAG_WRITE |
+ VFIO_DMA_MAP_FLAG_RESERVED_MSI_IOVA;
minsz = offsetofend(struct vfio_iommu_type1_dma_map, size);
@@ -1084,6 +1210,9 @@ static long vfio_iommu_type1_ioctl(void *iommu_data,
if (map.argsz < minsz || map.flags & ~mask)
return -EINVAL;
+ if (map.flags & VFIO_DMA_MAP_FLAG_RESERVED_MSI_IOVA)
+ return vfio_register_reserved_iova_range(iommu, &map);
+
return vfio_dma_do_map(iommu, &map);
} else if (cmd == VFIO_IOMMU_UNMAP_DMA) {
@@ -1098,6 +1227,13 @@ static long vfio_iommu_type1_ioctl(void *iommu_data,
if (unmap.argsz < minsz || unmap.flags)
return -EINVAL;
+ if (unmap.flags & VFIO_DMA_MAP_FLAG_RESERVED_MSI_IOVA) {
+ ret = vfio_unregister_reserved_iova_range(iommu,
+ &unmap);
+ if (ret)
+ return ret;
+ }
+
ret = vfio_dma_do_unmap(iommu, &unmap);
if (ret)
return ret;
diff --git a/include/uapi/linux/vfio.h b/include/uapi/linux/vfio.h
index 255a211..0637f35 100644
--- a/include/uapi/linux/vfio.h
+++ b/include/uapi/linux/vfio.h
@@ -498,12 +498,18 @@ struct vfio_iommu_type1_info {
*
* Map process virtual addresses to IO virtual addresses using the
* provided struct vfio_dma_map. Caller sets argsz. READ &/ WRITE required.
+ *
+ * In case RESERVED_MSI_IOVA flag is set, the API only aims@registering an
+ * IOVA region that will be used on some platforms to map the host MSI frames.
+ * In that specific case, vaddr is ignored.
*/
struct vfio_iommu_type1_dma_map {
__u32 argsz;
__u32 flags;
#define VFIO_DMA_MAP_FLAG_READ (1 << 0) /* readable from device */
#define VFIO_DMA_MAP_FLAG_WRITE (1 << 1) /* writable from device */
+/* reserved iova for MSI vectors*/
+#define VFIO_DMA_MAP_FLAG_RESERVED_MSI_IOVA (1 << 2)
__u64 vaddr; /* Process virtual address */
__u64 iova; /* IO virtual address */
__u64 size; /* Size of mapping (bytes) */
@@ -519,7 +525,8 @@ struct vfio_iommu_type1_dma_map {
* Caller sets argsz. The actual unmapped size is returned in the size
* field. No guarantee is made to the user that arbitrary unmaps of iova
* or size different from those used in the original mapping call will
- * succeed.
+ * succeed. A Reserved DMA region must be unmapped with RESERVED_MSI_IOVA
+ * flag set.
*/
struct vfio_iommu_type1_dma_unmap {
__u32 argsz;
--
1.9.1
^ permalink raw reply related [flat|nested] 12+ messages in thread* [PATCH v7 5/7] vfio/type1: also check IRQ remapping capability at msi domain
2016-04-19 17:24 [PATCH v7 0/7] KVM PCIe/MSI passthrough on ARM/ARM64: kernel part 3/3: vfio changes Eric Auger
` (3 preceding siblings ...)
2016-04-19 17:24 ` [PATCH v7 4/7] vfio: allow reserved iova registration Eric Auger
@ 2016-04-19 17:24 ` Eric Auger
2016-04-19 17:24 ` [PATCH v7 6/7] iommu/arm-smmu: do not advertise IOMMU_CAP_INTR_REMAP Eric Auger
2016-04-19 17:24 ` [PATCH v7 7/7] vfio/type1: return MSI mapping requirements with VFIO_IOMMU_GET_INFO Eric Auger
6 siblings, 0 replies; 12+ messages in thread
From: Eric Auger @ 2016-04-19 17:24 UTC (permalink / raw)
To: linux-arm-kernel
On x86 IRQ remapping is abstracted by the IOMMU. On ARM this is abstracted
by the msi controller. vfio_safe_irq_domain allows to check whether
interrupts are "safe" for a given device. They are if the device does
not use MSI or if the device uses MSI and the msi-parent controller
supports IRQ remapping.
Then we check at group level if all devices have safe interrupts: if not,
we only allow the group to be attached if allow_unsafe_interrupts is set.
At this point ARM sMMU still advertises IOMMU_CAP_INTR_REMAP. This is
changed in next patch.
Signed-off-by: Eric Auger <eric.auger@linaro.org>
---
v3 -> v4:
- rename vfio_msi_parent_irq_remapping_capable into vfio_safe_irq_domain
and irq_remapping into safe_irq_domains
v2 -> v3:
- protect vfio_msi_parent_irq_remapping_capable with
CONFIG_GENERIC_MSI_IRQ_DOMAIN
---
drivers/vfio/vfio_iommu_type1.c | 44 +++++++++++++++++++++++++++++++++++++++--
1 file changed, 42 insertions(+), 2 deletions(-)
diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c
index fa6b8b1..51b1f15 100644
--- a/drivers/vfio/vfio_iommu_type1.c
+++ b/drivers/vfio/vfio_iommu_type1.c
@@ -37,6 +37,8 @@
#include <linux/vfio.h>
#include <linux/workqueue.h>
#include <linux/dma-reserved-iommu.h>
+#include <linux/irqdomain.h>
+#include <linux/msi.h>
#define DRIVER_VERSION "0.2"
#define DRIVER_AUTHOR "Alex Williamson <alex.williamson@redhat.com>"
@@ -842,6 +844,33 @@ static int vfio_bus_type(struct device *dev, void *data)
return 0;
}
+/**
+ * vfio_safe_irq_domain: returns whether the irq domain
+ * the device is attached to is safe with respect to MSI isolation.
+ * If the irq domain is not an MSI domain, we return it is safe.
+ *
+ * @dev: device handle
+ * @data: unused
+ * returns 0 if the irq domain is safe, -1 if not.
+ */
+static int vfio_safe_irq_domain(struct device *dev, void *data)
+{
+#ifdef CONFIG_GENERIC_MSI_IRQ_DOMAIN
+ struct irq_domain *domain;
+ struct msi_domain_info *info;
+
+ domain = dev_get_msi_domain(dev);
+ if (!domain)
+ return 0;
+
+ info = msi_get_domain_info(domain);
+
+ if (!(info->flags & MSI_FLAG_IRQ_REMAPPING))
+ return -1;
+#endif
+ return 0;
+}
+
static int vfio_iommu_replay(struct vfio_iommu *iommu,
struct vfio_domain *domain)
{
@@ -935,7 +964,7 @@ static int vfio_iommu_type1_attach_group(void *iommu_data,
struct vfio_group *group, *g;
struct vfio_domain *domain, *d;
struct bus_type *bus = NULL;
- int ret;
+ int ret, safe_irq_domains;
mutex_lock(&iommu->lock);
@@ -958,6 +987,13 @@ static int vfio_iommu_type1_attach_group(void *iommu_data,
group->iommu_group = iommu_group;
+ /*
+ * Determine if all the devices of the group have a safe irq domain
+ * with respect to MSI isolation
+ */
+ safe_irq_domains = !iommu_group_for_each_dev(iommu_group, &bus,
+ vfio_safe_irq_domain);
+
/* Determine bus_type in order to allocate a domain */
ret = iommu_group_for_each_dev(iommu_group, &bus, vfio_bus_type);
if (ret)
@@ -985,8 +1021,12 @@ static int vfio_iommu_type1_attach_group(void *iommu_data,
INIT_LIST_HEAD(&domain->group_list);
list_add(&group->next, &domain->group_list);
+ /*
+ * to advertise safe interrupts either the IOMMU or the MSI controllers
+ * must support IRQ remapping/interrupt translation
+ */
if (!allow_unsafe_interrupts &&
- !iommu_capable(bus, IOMMU_CAP_INTR_REMAP)) {
+ (!iommu_capable(bus, IOMMU_CAP_INTR_REMAP) && !safe_irq_domains)) {
pr_warn("%s: No interrupt remapping support. Use the module param \"allow_unsafe_interrupts\" to enable VFIO IOMMU support on this platform\n",
__func__);
ret = -EPERM;
--
1.9.1
^ permalink raw reply related [flat|nested] 12+ messages in thread* [PATCH v7 6/7] iommu/arm-smmu: do not advertise IOMMU_CAP_INTR_REMAP
2016-04-19 17:24 [PATCH v7 0/7] KVM PCIe/MSI passthrough on ARM/ARM64: kernel part 3/3: vfio changes Eric Auger
` (4 preceding siblings ...)
2016-04-19 17:24 ` [PATCH v7 5/7] vfio/type1: also check IRQ remapping capability at msi domain Eric Auger
@ 2016-04-19 17:24 ` Eric Auger
2016-04-22 11:16 ` Robin Murphy
2016-04-19 17:24 ` [PATCH v7 7/7] vfio/type1: return MSI mapping requirements with VFIO_IOMMU_GET_INFO Eric Auger
6 siblings, 1 reply; 12+ messages in thread
From: Eric Auger @ 2016-04-19 17:24 UTC (permalink / raw)
To: linux-arm-kernel
Do not advertise IOMMU_CAP_INTR_REMAP for arm-smmu(-v3). Indeed the
irq_remapping capability is abstracted on irqchip side for ARM as
opposed to Intel IOMMU featuring IRQ remapping HW.
So to check IRQ remapping capability, the msi domain needs to be
checked instead.
This commit needs to be applied after "vfio/type1: also check IRQ
remapping capability at msi domain" else the legacy interrupt
assignment gets broken with arm-smmu.
Signed-off-by: Eric Auger <eric.auger@linaro.org>
---
drivers/iommu/arm-smmu-v3.c | 3 ++-
drivers/iommu/arm-smmu.c | 3 ++-
2 files changed, 4 insertions(+), 2 deletions(-)
diff --git a/drivers/iommu/arm-smmu-v3.c b/drivers/iommu/arm-smmu-v3.c
index afd0dac..1d0106c 100644
--- a/drivers/iommu/arm-smmu-v3.c
+++ b/drivers/iommu/arm-smmu-v3.c
@@ -1386,7 +1386,8 @@ static bool arm_smmu_capable(enum iommu_cap cap)
case IOMMU_CAP_CACHE_COHERENCY:
return true;
case IOMMU_CAP_INTR_REMAP:
- return true; /* MSIs are just memory writes */
+ /* interrupt translation handled at MSI controller level */
+ return false;
case IOMMU_CAP_NOEXEC:
return true;
default:
diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c
index 492339f..6232b2a 100644
--- a/drivers/iommu/arm-smmu.c
+++ b/drivers/iommu/arm-smmu.c
@@ -1312,7 +1312,8 @@ static bool arm_smmu_capable(enum iommu_cap cap)
*/
return true;
case IOMMU_CAP_INTR_REMAP:
- return true; /* MSIs are just memory writes */
+ /* interrupt translation handled at MSI controller level */
+ return false;
case IOMMU_CAP_NOEXEC:
return true;
default:
--
1.9.1
^ permalink raw reply related [flat|nested] 12+ messages in thread* [PATCH v7 6/7] iommu/arm-smmu: do not advertise IOMMU_CAP_INTR_REMAP
2016-04-19 17:24 ` [PATCH v7 6/7] iommu/arm-smmu: do not advertise IOMMU_CAP_INTR_REMAP Eric Auger
@ 2016-04-22 11:16 ` Robin Murphy
2016-04-22 11:39 ` Eric Auger
0 siblings, 1 reply; 12+ messages in thread
From: Robin Murphy @ 2016-04-22 11:16 UTC (permalink / raw)
To: linux-arm-kernel
Hi Eric, Alex,
On 19/04/16 18:24, Eric Auger wrote:
> Do not advertise IOMMU_CAP_INTR_REMAP for arm-smmu(-v3). Indeed the
> irq_remapping capability is abstracted on irqchip side for ARM as
> opposed to Intel IOMMU featuring IRQ remapping HW.
>
> So to check IRQ remapping capability, the msi domain needs to be
> checked instead.
>
> This commit needs to be applied after "vfio/type1: also check IRQ
> remapping capability at msi domain" else the legacy interrupt
> assignment gets broken with arm-smmu.
Hmm, that smells of papering over a different problem. I may have missed
it, but I don't see anything changing legacy interrupt behaviour in this
series - are legacy INTx (or platform) interrupts intrinsically safe
because they're physically wired, or intrinsically unsafe because they
could be shared? If it's the latter then I don't see how the IOMMU or
MSI controller changes anything in that respect, and if it's the former
then surely we should support that right now without the SMMU having to
lie about MSI isolation? I started looking into it but I'm a bit lost...
Robin.
> Signed-off-by: Eric Auger <eric.auger@linaro.org>
> ---
> drivers/iommu/arm-smmu-v3.c | 3 ++-
> drivers/iommu/arm-smmu.c | 3 ++-
> 2 files changed, 4 insertions(+), 2 deletions(-)
>
> diff --git a/drivers/iommu/arm-smmu-v3.c b/drivers/iommu/arm-smmu-v3.c
> index afd0dac..1d0106c 100644
> --- a/drivers/iommu/arm-smmu-v3.c
> +++ b/drivers/iommu/arm-smmu-v3.c
> @@ -1386,7 +1386,8 @@ static bool arm_smmu_capable(enum iommu_cap cap)
> case IOMMU_CAP_CACHE_COHERENCY:
> return true;
> case IOMMU_CAP_INTR_REMAP:
> - return true; /* MSIs are just memory writes */
> + /* interrupt translation handled at MSI controller level */
> + return false;
> case IOMMU_CAP_NOEXEC:
> return true;
> default:
> diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c
> index 492339f..6232b2a 100644
> --- a/drivers/iommu/arm-smmu.c
> +++ b/drivers/iommu/arm-smmu.c
> @@ -1312,7 +1312,8 @@ static bool arm_smmu_capable(enum iommu_cap cap)
> */
> return true;
> case IOMMU_CAP_INTR_REMAP:
> - return true; /* MSIs are just memory writes */
> + /* interrupt translation handled at MSI controller level */
> + return false;
> case IOMMU_CAP_NOEXEC:
> return true;
> default:
>
^ permalink raw reply [flat|nested] 12+ messages in thread
* [PATCH v7 6/7] iommu/arm-smmu: do not advertise IOMMU_CAP_INTR_REMAP
2016-04-22 11:16 ` Robin Murphy
@ 2016-04-22 11:39 ` Eric Auger
2016-04-22 15:40 ` Robin Murphy
0 siblings, 1 reply; 12+ messages in thread
From: Eric Auger @ 2016-04-22 11:39 UTC (permalink / raw)
To: linux-arm-kernel
Hi Robin,
On 04/22/2016 01:16 PM, Robin Murphy wrote:
> Hi Eric, Alex,
>
> On 19/04/16 18:24, Eric Auger wrote:
>> Do not advertise IOMMU_CAP_INTR_REMAP for arm-smmu(-v3). Indeed the
>> irq_remapping capability is abstracted on irqchip side for ARM as
>> opposed to Intel IOMMU featuring IRQ remapping HW.
>>
>> So to check IRQ remapping capability, the msi domain needs to be
>> checked instead.
>>
>> This commit needs to be applied after "vfio/type1: also check IRQ
>> remapping capability at msi domain" else the legacy interrupt
>> assignment gets broken with arm-smmu.
>
> Hmm, that smells of papering over a different problem. I may have missed
> it, but I don't see anything changing legacy interrupt behaviour in this
> series - are legacy INTx (or platform) interrupts intrinsically safe
> because they're physically wired, or intrinsically unsafe because they
> could be shared?
I think it is safe. With legacy/platform interrupts we have:
vfio pci driver physical IRQ handler signals an irqfd.
upon this irqfd signaling KVM injects a virtual IRQ.
So the assigned device does not have any way to trigger a storm of
interrupts on the host, as opposed to with MSI.
Does it make sense to you?
Best Regards
Eric
If it's the latter then I don't see how the IOMMU or
> MSI controller changes anything in that respect, and if it's the former
> then surely we should support that right now without the SMMU having to
> lie about MSI isolation? I started looking into it but I'm a bit lost...
>
> Robin.
>
>> Signed-off-by: Eric Auger <eric.auger@linaro.org>
>> ---
>> drivers/iommu/arm-smmu-v3.c | 3 ++-
>> drivers/iommu/arm-smmu.c | 3 ++-
>> 2 files changed, 4 insertions(+), 2 deletions(-)
>>
>> diff --git a/drivers/iommu/arm-smmu-v3.c b/drivers/iommu/arm-smmu-v3.c
>> index afd0dac..1d0106c 100644
>> --- a/drivers/iommu/arm-smmu-v3.c
>> +++ b/drivers/iommu/arm-smmu-v3.c
>> @@ -1386,7 +1386,8 @@ static bool arm_smmu_capable(enum iommu_cap cap)
>> case IOMMU_CAP_CACHE_COHERENCY:
>> return true;
>> case IOMMU_CAP_INTR_REMAP:
>> - return true; /* MSIs are just memory writes */
>> + /* interrupt translation handled at MSI controller level */
>> + return false;
>> case IOMMU_CAP_NOEXEC:
>> return true;
>> default:
>> diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c
>> index 492339f..6232b2a 100644
>> --- a/drivers/iommu/arm-smmu.c
>> +++ b/drivers/iommu/arm-smmu.c
>> @@ -1312,7 +1312,8 @@ static bool arm_smmu_capable(enum iommu_cap cap)
>> */
>> return true;
>> case IOMMU_CAP_INTR_REMAP:
>> - return true; /* MSIs are just memory writes */
>> + /* interrupt translation handled at MSI controller level */
>> + return false;
>> case IOMMU_CAP_NOEXEC:
>> return true;
>> default:
>>
>
^ permalink raw reply [flat|nested] 12+ messages in thread
* [PATCH v7 6/7] iommu/arm-smmu: do not advertise IOMMU_CAP_INTR_REMAP
2016-04-22 11:39 ` Eric Auger
@ 2016-04-22 15:40 ` Robin Murphy
0 siblings, 0 replies; 12+ messages in thread
From: Robin Murphy @ 2016-04-22 15:40 UTC (permalink / raw)
To: linux-arm-kernel
On 22/04/16 12:39, Eric Auger wrote:
> Hi Robin,
> On 04/22/2016 01:16 PM, Robin Murphy wrote:
>> Hi Eric, Alex,
>>
>> On 19/04/16 18:24, Eric Auger wrote:
>>> Do not advertise IOMMU_CAP_INTR_REMAP for arm-smmu(-v3). Indeed the
>>> irq_remapping capability is abstracted on irqchip side for ARM as
>>> opposed to Intel IOMMU featuring IRQ remapping HW.
>>>
>>> So to check IRQ remapping capability, the msi domain needs to be
>>> checked instead.
>>>
>>> This commit needs to be applied after "vfio/type1: also check IRQ
>>> remapping capability at msi domain" else the legacy interrupt
>>> assignment gets broken with arm-smmu.
>>
>> Hmm, that smells of papering over a different problem. I may have missed
>> it, but I don't see anything changing legacy interrupt behaviour in this
>> series - are legacy INTx (or platform) interrupts intrinsically safe
>> because they're physically wired, or intrinsically unsafe because they
>> could be shared?
>
> I think it is safe. With legacy/platform interrupts we have:
> vfio pci driver physical IRQ handler signals an irqfd.
> upon this irqfd signaling KVM injects a virtual IRQ.
>
> So the assigned device does not have any way to trigger a storm of
> interrupts on the host, as opposed to with MSI.
>
> Does it make sense to you?
I think so, thanks for the explanation. In that case, I'm strongly in
favour of applying this patch and un-breaking legacy interrupts
regardless of MSI support. I'll keep investigating to see what I can
figure out.
Robin.
> Best Regards
>
> Eric
>
> If it's the latter then I don't see how the IOMMU or
>> MSI controller changes anything in that respect, and if it's the former
>> then surely we should support that right now without the SMMU having to
>> lie about MSI isolation? I started looking into it but I'm a bit lost...
>>
>> Robin.
>>
>>> Signed-off-by: Eric Auger <eric.auger@linaro.org>
>>> ---
>>> drivers/iommu/arm-smmu-v3.c | 3 ++-
>>> drivers/iommu/arm-smmu.c | 3 ++-
>>> 2 files changed, 4 insertions(+), 2 deletions(-)
>>>
>>> diff --git a/drivers/iommu/arm-smmu-v3.c b/drivers/iommu/arm-smmu-v3.c
>>> index afd0dac..1d0106c 100644
>>> --- a/drivers/iommu/arm-smmu-v3.c
>>> +++ b/drivers/iommu/arm-smmu-v3.c
>>> @@ -1386,7 +1386,8 @@ static bool arm_smmu_capable(enum iommu_cap cap)
>>> case IOMMU_CAP_CACHE_COHERENCY:
>>> return true;
>>> case IOMMU_CAP_INTR_REMAP:
>>> - return true; /* MSIs are just memory writes */
>>> + /* interrupt translation handled at MSI controller level */
>>> + return false;
>>> case IOMMU_CAP_NOEXEC:
>>> return true;
>>> default:
>>> diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c
>>> index 492339f..6232b2a 100644
>>> --- a/drivers/iommu/arm-smmu.c
>>> +++ b/drivers/iommu/arm-smmu.c
>>> @@ -1312,7 +1312,8 @@ static bool arm_smmu_capable(enum iommu_cap cap)
>>> */
>>> return true;
>>> case IOMMU_CAP_INTR_REMAP:
>>> - return true; /* MSIs are just memory writes */
>>> + /* interrupt translation handled at MSI controller level */
>>> + return false;
>>> case IOMMU_CAP_NOEXEC:
>>> return true;
>>> default:
>>>
>>
>
^ permalink raw reply [flat|nested] 12+ messages in thread
* [PATCH v7 7/7] vfio/type1: return MSI mapping requirements with VFIO_IOMMU_GET_INFO
2016-04-19 17:24 [PATCH v7 0/7] KVM PCIe/MSI passthrough on ARM/ARM64: kernel part 3/3: vfio changes Eric Auger
` (5 preceding siblings ...)
2016-04-19 17:24 ` [PATCH v7 6/7] iommu/arm-smmu: do not advertise IOMMU_CAP_INTR_REMAP Eric Auger
@ 2016-04-19 17:24 ` Eric Auger
6 siblings, 0 replies; 12+ messages in thread
From: Eric Auger @ 2016-04-19 17:24 UTC (permalink / raw)
To: linux-arm-kernel
This patch allows the user-space to know whether MSI addresses need to
be mapped in the IOMMU. The user-space uses VFIO_IOMMU_GET_INFO ioctl and
IOMMU_INFO_REQUIRE_MSI_MAP gets set if they need to.
The computation of the number of IOVA pages to be provided by the user
space will be implemented in a separate patch using capability chains.
Signed-off-by: Eric Auger <eric.auger@linaro.org>
---
v6 -> v7:
- remove the computation of the number of IOVA pages to be provisionned.
This number depends on the domain/group/device topology which can
dynamically change. Let's rely instead rely on an arbitrary max depending
on the system
v4 -> v5:
- move msi_info and ret declaration within the conditional code
v3 -> v4:
- replace former vfio_domains_require_msi_mapping by
more complex computation of MSI mapping requirements, especially the
number of pages to be provided by the user-space.
- reword patch title
RFC v1 -> v1:
- derived from
[RFC PATCH 3/6] vfio: Extend iommu-info to return MSIs automap state
- renamed allow_msi_reconfig into require_msi_mapping
- fixed VFIO_IOMMU_GET_INFO
---
drivers/vfio/vfio_iommu_type1.c | 24 ++++++++++++++++++++++++
include/uapi/linux/vfio.h | 5 ++++-
2 files changed, 28 insertions(+), 1 deletion(-)
diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c
index 51b1f15..a63ce6f 100644
--- a/drivers/vfio/vfio_iommu_type1.c
+++ b/drivers/vfio/vfio_iommu_type1.c
@@ -310,6 +310,27 @@ static int vaddr_get_pfn(unsigned long vaddr, int prot, unsigned long *pfn)
}
/*
+ * vfio_domains_require_msi_mapping: return whether MSI doorbells must be
+ * iommu mapped
+ *
+ * returns true if msi mapping is requested
+ */
+static bool vfio_domains_require_msi_mapping(struct vfio_iommu *iommu)
+{
+ struct vfio_domain *d;
+ bool flag;
+
+ mutex_lock(&iommu->lock);
+ /* All domains have same require_msi_map property, pick first */
+ d = list_first_entry(&iommu->domain_list, struct vfio_domain, next);
+ flag = (!(iommu_domain_get_attr(d->domain,
+ DOMAIN_ATTR_MSI_MAPPING, NULL)));
+
+ mutex_unlock(&iommu->lock);
+
+ return flag;
+}
+/*
* Attempt to pin pages. We really don't want to track all the pfns and
* the iommu can only map chunks of consecutive pfns anyway, so get the
* first page and all consecutive pages with the same locking.
@@ -1231,6 +1252,9 @@ static long vfio_iommu_type1_ioctl(void *iommu_data,
info.flags = VFIO_IOMMU_INFO_PGSIZES;
+ if (vfio_domains_require_msi_mapping(iommu))
+ info.flags |= VFIO_IOMMU_INFO_REQUIRE_MSI_MAP;
+
info.iova_pgsizes = vfio_pgsize_bitmap(iommu);
return copy_to_user((void __user *)arg, &info, minsz) ?
diff --git a/include/uapi/linux/vfio.h b/include/uapi/linux/vfio.h
index 0637f35..4d9c97d 100644
--- a/include/uapi/linux/vfio.h
+++ b/include/uapi/linux/vfio.h
@@ -488,6 +488,7 @@ struct vfio_iommu_type1_info {
__u32 argsz;
__u32 flags;
#define VFIO_IOMMU_INFO_PGSIZES (1 << 0) /* supported page sizes info */
+#define VFIO_IOMMU_INFO_REQUIRE_MSI_MAP (1 << 1)/* MSI must be mapped */
__u64 iova_pgsizes; /* Bitmap of supported page sizes */
};
@@ -501,7 +502,9 @@ struct vfio_iommu_type1_info {
*
* In case RESERVED_MSI_IOVA flag is set, the API only aims@registering an
* IOVA region that will be used on some platforms to map the host MSI frames.
- * In that specific case, vaddr is ignored.
+ * In that specific case, vaddr is ignored. The requirement for provisioning
+ * such reserved IOVA range can be checked by calling VFIO_IOMMU_GET_INFO and
+ * testing the VFIO_IOMMU_INFO_REQUIRE_MSI_MAP flag.
*/
struct vfio_iommu_type1_dma_map {
__u32 argsz;
--
1.9.1
^ permalink raw reply related [flat|nested] 12+ messages in thread