From: Alex Mastro <amastro@fb.com>
To: Alex Williamson <alex@shazbot.org>
Cc: Jason Gunthorpe <jgg@ziepe.ca>,
Alejandro Jimenez <alejandro.j.jimenez@oracle.com>,
David Matlack <dmatlack@google.com>, <kvm@vger.kernel.org>,
<linux-kernel@vger.kernel.org>, Alex Mastro <amastro@fb.com>,
Jason Gunthorpe <jgg@ziepe.ca>
Subject: [PATCH v5 3/5] vfio/type1: handle DMA map/unmap up to the addressable limit
Date: Mon, 27 Oct 2025 10:33:43 -0700 [thread overview]
Message-ID: <20251027-fix-unmap-v5-3-4f0fcf8ffb7d@fb.com> (raw)
In-Reply-To: <20251027-fix-unmap-v5-0-4f0fcf8ffb7d@fb.com>
Before this commit, it was possible to create end of address space
mappings, but unmapping them via VFIO_IOMMU_UNMAP_DMA, replaying them
for newly added iommu domains, and querying their dirty pages via
VFIO_DMA_UNMAP_FLAG_GET_DIRTY_BITMAP was broken due to bugs caused by
comparisons against (iova + size) expressions, which overflow to zero.
This commit handles DMA map/unmap operations up to the addressable
limit by comparing against inclusive end-of-range limits, and changing
iteration to perform relative traversals across range sizes, rather than
absolute traversals across addresses.
vfio_link_dma inserts a zero-sized vfio_dma into the rb-tree, and is
only used for that purpose, so discard the size from consideration for
the insertion point.
Tested-by: Alejandro Jimenez <alejandro.j.jimenez@oracle.com>
Fixes: 73fa0d10d077 ("vfio: Type1 IOMMU implementation")
Reviewed-by: Jason Gunthorpe <jgg@nvidia.com>
Reviewed-by: Alejandro Jimenez <alejandro.j.jimenez@oracle.com>
Signed-off-by: Alex Mastro <amastro@fb.com>
---
drivers/vfio/vfio_iommu_type1.c | 77 ++++++++++++++++++++++-------------------
1 file changed, 42 insertions(+), 35 deletions(-)
diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c
index 48bcc0633d44..5167bec14e36 100644
--- a/drivers/vfio/vfio_iommu_type1.c
+++ b/drivers/vfio/vfio_iommu_type1.c
@@ -168,12 +168,14 @@ static struct vfio_dma *vfio_find_dma(struct vfio_iommu *iommu,
{
struct rb_node *node = iommu->dma_list.rb_node;
+ WARN_ON(!size);
+
while (node) {
struct vfio_dma *dma = rb_entry(node, struct vfio_dma, node);
- if (start + size <= dma->iova)
+ if (start + size - 1 < dma->iova)
node = node->rb_left;
- else if (start >= dma->iova + dma->size)
+ else if (start > dma->iova + dma->size - 1)
node = node->rb_right;
else
return dma;
@@ -183,16 +185,19 @@ static struct vfio_dma *vfio_find_dma(struct vfio_iommu *iommu,
}
static struct rb_node *vfio_find_dma_first_node(struct vfio_iommu *iommu,
- dma_addr_t start, size_t size)
+ dma_addr_t start,
+ dma_addr_t end)
{
struct rb_node *res = NULL;
struct rb_node *node = iommu->dma_list.rb_node;
struct vfio_dma *dma_res = NULL;
+ WARN_ON(end < start);
+
while (node) {
struct vfio_dma *dma = rb_entry(node, struct vfio_dma, node);
- if (start < dma->iova + dma->size) {
+ if (start <= dma->iova + dma->size - 1) {
res = node;
dma_res = dma;
if (start >= dma->iova)
@@ -202,7 +207,7 @@ static struct rb_node *vfio_find_dma_first_node(struct vfio_iommu *iommu,
node = node->rb_right;
}
}
- if (res && size && dma_res->iova >= start + size)
+ if (res && dma_res->iova > end)
res = NULL;
return res;
}
@@ -212,11 +217,13 @@ static void vfio_link_dma(struct vfio_iommu *iommu, struct vfio_dma *new)
struct rb_node **link = &iommu->dma_list.rb_node, *parent = NULL;
struct vfio_dma *dma;
+ WARN_ON(new->size != 0);
+
while (*link) {
parent = *link;
dma = rb_entry(parent, struct vfio_dma, node);
- if (new->iova + new->size <= dma->iova)
+ if (new->iova <= dma->iova)
link = &(*link)->rb_left;
else
link = &(*link)->rb_right;
@@ -1141,12 +1148,12 @@ static size_t unmap_unpin_slow(struct vfio_domain *domain,
static long vfio_unmap_unpin(struct vfio_iommu *iommu, struct vfio_dma *dma,
bool do_accounting)
{
- dma_addr_t iova = dma->iova, end = dma->iova + dma->size;
struct vfio_domain *domain, *d;
LIST_HEAD(unmapped_region_list);
struct iommu_iotlb_gather iotlb_gather;
int unmapped_region_cnt = 0;
long unlocked = 0;
+ size_t pos = 0;
if (!dma->size)
return 0;
@@ -1170,13 +1177,14 @@ static long vfio_unmap_unpin(struct vfio_iommu *iommu, struct vfio_dma *dma,
}
iommu_iotlb_gather_init(&iotlb_gather);
- while (iova < end) {
+ while (pos < dma->size) {
size_t unmapped, len;
phys_addr_t phys, next;
+ dma_addr_t iova = dma->iova + pos;
phys = iommu_iova_to_phys(domain->domain, iova);
if (WARN_ON(!phys)) {
- iova += PAGE_SIZE;
+ pos += PAGE_SIZE;
continue;
}
@@ -1185,7 +1193,7 @@ static long vfio_unmap_unpin(struct vfio_iommu *iommu, struct vfio_dma *dma,
* may require hardware cache flushing, try to find the
* largest contiguous physical memory chunk to unmap.
*/
- for (len = PAGE_SIZE; iova + len < end; len += PAGE_SIZE) {
+ for (len = PAGE_SIZE; pos + len < dma->size; len += PAGE_SIZE) {
next = iommu_iova_to_phys(domain->domain, iova + len);
if (next != phys + len)
break;
@@ -1206,7 +1214,7 @@ static long vfio_unmap_unpin(struct vfio_iommu *iommu, struct vfio_dma *dma,
break;
}
- iova += unmapped;
+ pos += unmapped;
}
dma->iommu_mapped = false;
@@ -1298,7 +1306,7 @@ static int update_user_bitmap(u64 __user *bitmap, struct vfio_iommu *iommu,
}
static int vfio_iova_dirty_bitmap(u64 __user *bitmap, struct vfio_iommu *iommu,
- dma_addr_t iova, size_t size, size_t pgsize)
+ dma_addr_t iova, dma_addr_t iova_end, size_t pgsize)
{
struct vfio_dma *dma;
struct rb_node *n;
@@ -1315,8 +1323,8 @@ static int vfio_iova_dirty_bitmap(u64 __user *bitmap, struct vfio_iommu *iommu,
if (dma && dma->iova != iova)
return -EINVAL;
- dma = vfio_find_dma(iommu, iova + size - 1, 0);
- if (dma && dma->iova + dma->size != iova + size)
+ dma = vfio_find_dma(iommu, iova_end, 1);
+ if (dma && dma->iova + dma->size - 1 != iova_end)
return -EINVAL;
for (n = rb_first(&iommu->dma_list); n; n = rb_next(n)) {
@@ -1325,7 +1333,7 @@ static int vfio_iova_dirty_bitmap(u64 __user *bitmap, struct vfio_iommu *iommu,
if (dma->iova < iova)
continue;
- if (dma->iova > iova + size - 1)
+ if (dma->iova > iova_end)
break;
ret = update_user_bitmap(bitmap, iommu, dma, iova, pgsize);
@@ -1418,7 +1426,7 @@ static int vfio_dma_do_unmap(struct vfio_iommu *iommu,
if (unmap_all) {
if (iova || size)
goto unlock;
- size = SIZE_MAX;
+ iova_end = ~(dma_addr_t)0;
} else {
if (!size || size & (pgsize - 1))
goto unlock;
@@ -1473,17 +1481,17 @@ static int vfio_dma_do_unmap(struct vfio_iommu *iommu,
if (dma && dma->iova != iova)
goto unlock;
- dma = vfio_find_dma(iommu, iova_end, 0);
- if (dma && dma->iova + dma->size != iova + size)
+ dma = vfio_find_dma(iommu, iova_end, 1);
+ if (dma && dma->iova + dma->size - 1 != iova_end)
goto unlock;
}
ret = 0;
- n = first_n = vfio_find_dma_first_node(iommu, iova, size);
+ n = first_n = vfio_find_dma_first_node(iommu, iova, iova_end);
while (n) {
dma = rb_entry(n, struct vfio_dma, node);
- if (dma->iova >= iova + size)
+ if (dma->iova > iova_end)
break;
if (!iommu->v2 && iova > dma->iova)
@@ -1813,12 +1821,12 @@ static int vfio_iommu_replay(struct vfio_iommu *iommu,
for (; n; n = rb_next(n)) {
struct vfio_dma *dma;
- dma_addr_t iova;
+ size_t pos = 0;
dma = rb_entry(n, struct vfio_dma, node);
- iova = dma->iova;
- while (iova < dma->iova + dma->size) {
+ while (pos < dma->size) {
+ dma_addr_t iova = dma->iova + pos;
phys_addr_t phys;
size_t size;
@@ -1834,14 +1842,14 @@ static int vfio_iommu_replay(struct vfio_iommu *iommu,
phys = iommu_iova_to_phys(d->domain, iova);
if (WARN_ON(!phys)) {
- iova += PAGE_SIZE;
+ pos += PAGE_SIZE;
continue;
}
size = PAGE_SIZE;
p = phys + size;
i = iova + size;
- while (i < dma->iova + dma->size &&
+ while (pos + size < dma->size &&
p == iommu_iova_to_phys(d->domain, i)) {
size += PAGE_SIZE;
p += PAGE_SIZE;
@@ -1849,9 +1857,8 @@ static int vfio_iommu_replay(struct vfio_iommu *iommu,
}
} else {
unsigned long pfn;
- unsigned long vaddr = dma->vaddr +
- (iova - dma->iova);
- size_t n = dma->iova + dma->size - iova;
+ unsigned long vaddr = dma->vaddr + pos;
+ size_t n = dma->size - pos;
long npage;
npage = vfio_pin_pages_remote(dma, vaddr,
@@ -1882,7 +1889,7 @@ static int vfio_iommu_replay(struct vfio_iommu *iommu,
goto unwind;
}
- iova += size;
+ pos += size;
}
}
@@ -1899,29 +1906,29 @@ static int vfio_iommu_replay(struct vfio_iommu *iommu,
unwind:
for (; n; n = rb_prev(n)) {
struct vfio_dma *dma = rb_entry(n, struct vfio_dma, node);
- dma_addr_t iova;
+ size_t pos = 0;
if (dma->iommu_mapped) {
iommu_unmap(domain->domain, dma->iova, dma->size);
continue;
}
- iova = dma->iova;
- while (iova < dma->iova + dma->size) {
+ while (pos < dma->size) {
+ dma_addr_t iova = dma->iova + pos;
phys_addr_t phys, p;
size_t size;
dma_addr_t i;
phys = iommu_iova_to_phys(domain->domain, iova);
if (!phys) {
- iova += PAGE_SIZE;
+ pos += PAGE_SIZE;
continue;
}
size = PAGE_SIZE;
p = phys + size;
i = iova + size;
- while (i < dma->iova + dma->size &&
+ while (pos + size < dma->size &&
p == iommu_iova_to_phys(domain->domain, i)) {
size += PAGE_SIZE;
p += PAGE_SIZE;
@@ -3059,7 +3066,7 @@ static int vfio_iommu_type1_dirty_pages(struct vfio_iommu *iommu,
if (iommu->dirty_page_tracking)
ret = vfio_iova_dirty_bitmap(range.bitmap.data,
- iommu, iova, size,
+ iommu, iova, iova_end,
range.bitmap.pgsize);
else
ret = -EINVAL;
--
2.47.3
next prev parent reply other threads:[~2025-10-27 17:34 UTC|newest]
Thread overview: 9+ messages / expand[flat|nested] mbox.gz Atom feed top
2025-10-27 17:33 [PATCH v5 0/5] vfio: handle DMA map/unmap up to the addressable limit Alex Mastro
2025-10-27 17:33 ` [PATCH v5 1/5] vfio/type1: sanitize for overflow using check_*_overflow Alex Mastro
2025-10-27 17:33 ` [PATCH v5 2/5] vfio/type1: move iova increment to unmap_unpin_* caller Alex Mastro
2025-10-27 17:33 ` Alex Mastro [this message]
2025-10-27 17:33 ` [PATCH v5 4/5] vfio: selftests: update DMA map/unmap helpers to support more test kinds Alex Mastro
2025-10-27 23:37 ` David Matlack
2025-10-27 23:58 ` Alex Mastro
2025-10-27 17:33 ` [PATCH v5 5/5] vfio: selftests: add end of address space DMA map/unmap tests Alex Mastro
2025-10-27 23:45 ` David Matlack
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20251027-fix-unmap-v5-3-4f0fcf8ffb7d@fb.com \
--to=amastro@fb.com \
--cc=alejandro.j.jimenez@oracle.com \
--cc=alex@shazbot.org \
--cc=dmatlack@google.com \
--cc=jgg@ziepe.ca \
--cc=kvm@vger.kernel.org \
--cc=linux-kernel@vger.kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.