* [PATCH v3 1/3] iommu/io-pgtable-arm: Remove split on unmap behavior
2024-11-05 18:14 [PATCH v3 0/3] Remove split on unmap behavior Jason Gunthorpe
@ 2024-11-05 18:14 ` Jason Gunthorpe
2024-11-05 18:14 ` [PATCH v3 2/3] iommu/io-pgtable-arm-v7s: " Jason Gunthorpe
` (2 subsequent siblings)
3 siblings, 0 replies; 8+ messages in thread
From: Jason Gunthorpe @ 2024-11-05 18:14 UTC (permalink / raw)
To: iommu, Joerg Roedel, linux-arm-kernel, Robin Murphy, Will Deacon
Cc: Boris Brezillon, dri-devel, Liviu Dudau, patches, Steven Price
A minority of page table implementations (arm_lpae, armv7) are unique in
how they handle partial unmap of large IOPTEs.
Other implementations will unmap the large IOPTE and return it's
length. For example if a 2M IOPTE is present and the first 4K is requested
to be unmapped then unmap will remove the whole 2M and report 2M as the
result.
arm_lpae instead replaces the IOPTE with a table of smaller IOPTEs, unmaps
the 4K and returns 4k. This is actually an illegal/non-hitless operation
on at least SMMUv3 because of the BBM level 0 rules.
Will says this was done to support VFIO, but upon deeper analysis this was
never strictly necessary:
https://lore.kernel.org/all/20241024134411.GA6956@nvidia.com/
In summary, historical VFIO supported the AMD behavior of unmapping the
whole large IOPTE and returning the size, even if asked to unmap a
portion. The driver would see this as a request to split a large IOPTE.
Modern VFIO always unmaps entire large IOPTEs (except on AMD) and drivers
don't see an IOPTE split.
Given it doesn't work fully correctly on SMMUv3 and relying on ARM unique
behavior would create portability problems across IOMMU drivers, retire
this functionality.
Outside the iommu users, this will potentially effect io_pgtable users of
ARM_32_LPAE_S1, ARM_32_LPAE_S2, ARM_64_LPAE_S1, ARM_64_LPAE_S2, and
ARM_MALI_LPAE formats.
Cc: Boris Brezillon <boris.brezillon@collabora.com>
Cc: Steven Price <steven.price@arm.com>
Cc: Liviu Dudau <liviu.dudau@arm.com>
Cc: dri-devel@lists.freedesktop.org
Reviewed-by: Liviu Dudau <liviu.dudau@arm.com>
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
---
drivers/iommu/io-pgtable-arm.c | 81 +---------------------------------
1 file changed, 2 insertions(+), 79 deletions(-)
diff --git a/drivers/iommu/io-pgtable-arm.c b/drivers/iommu/io-pgtable-arm.c
index 0e67f1721a3d98..4e7689f5f12b4c 100644
--- a/drivers/iommu/io-pgtable-arm.c
+++ b/drivers/iommu/io-pgtable-arm.c
@@ -569,66 +569,6 @@ static void arm_lpae_free_pgtable(struct io_pgtable *iop)
kfree(data);
}
-static size_t arm_lpae_split_blk_unmap(struct arm_lpae_io_pgtable *data,
- struct iommu_iotlb_gather *gather,
- unsigned long iova, size_t size,
- arm_lpae_iopte blk_pte, int lvl,
- arm_lpae_iopte *ptep, size_t pgcount)
-{
- struct io_pgtable_cfg *cfg = &data->iop.cfg;
- arm_lpae_iopte pte, *tablep;
- phys_addr_t blk_paddr;
- size_t tablesz = ARM_LPAE_GRANULE(data);
- size_t split_sz = ARM_LPAE_BLOCK_SIZE(lvl, data);
- int ptes_per_table = ARM_LPAE_PTES_PER_TABLE(data);
- int i, unmap_idx_start = -1, num_entries = 0, max_entries;
-
- if (WARN_ON(lvl == ARM_LPAE_MAX_LEVELS))
- return 0;
-
- tablep = __arm_lpae_alloc_pages(tablesz, GFP_ATOMIC, cfg, data->iop.cookie);
- if (!tablep)
- return 0; /* Bytes unmapped */
-
- if (size == split_sz) {
- unmap_idx_start = ARM_LPAE_LVL_IDX(iova, lvl, data);
- max_entries = ptes_per_table - unmap_idx_start;
- num_entries = min_t(int, pgcount, max_entries);
- }
-
- blk_paddr = iopte_to_paddr(blk_pte, data);
- pte = iopte_prot(blk_pte);
-
- for (i = 0; i < ptes_per_table; i++, blk_paddr += split_sz) {
- /* Unmap! */
- if (i >= unmap_idx_start && i < (unmap_idx_start + num_entries))
- continue;
-
- __arm_lpae_init_pte(data, blk_paddr, pte, lvl, 1, &tablep[i]);
- }
-
- pte = arm_lpae_install_table(tablep, ptep, blk_pte, data);
- if (pte != blk_pte) {
- __arm_lpae_free_pages(tablep, tablesz, cfg, data->iop.cookie);
- /*
- * We may race against someone unmapping another part of this
- * block, but anything else is invalid. We can't misinterpret
- * a page entry here since we're never at the last level.
- */
- if (iopte_type(pte) != ARM_LPAE_PTE_TYPE_TABLE)
- return 0;
-
- tablep = iopte_deref(pte, data);
- } else if (unmap_idx_start >= 0) {
- for (i = 0; i < num_entries; i++)
- io_pgtable_tlb_add_page(&data->iop, gather, iova + i * size, size);
-
- return num_entries * size;
- }
-
- return __arm_lpae_unmap(data, gather, iova, size, pgcount, lvl, tablep);
-}
-
static size_t __arm_lpae_unmap(struct arm_lpae_io_pgtable *data,
struct iommu_iotlb_gather *gather,
unsigned long iova, size_t size, size_t pgcount,
@@ -678,12 +618,8 @@ static size_t __arm_lpae_unmap(struct arm_lpae_io_pgtable *data,
return i * size;
} else if (iopte_leaf(pte, lvl, iop->fmt)) {
- /*
- * Insert a table at the next level to map the old region,
- * minus the part we want to unmap
- */
- return arm_lpae_split_blk_unmap(data, gather, iova, size, pte,
- lvl + 1, ptep, pgcount);
+ WARN_ONCE(true, "Unmap of a partial large IOPTE is not allowed");
+ return 0;
}
/* Keep on walkin' */
@@ -1347,19 +1283,6 @@ static int __init arm_lpae_run_tests(struct io_pgtable_cfg *cfg)
iova += SZ_1G;
}
- /* Partial unmap */
- size = 1UL << __ffs(cfg->pgsize_bitmap);
- if (ops->unmap_pages(ops, SZ_1G + size, size, 1, NULL) != size)
- return __FAIL(ops, i);
-
- /* Remap of partial unmap */
- if (ops->map_pages(ops, SZ_1G + size, size, size, 1,
- IOMMU_READ, GFP_KERNEL, &mapped))
- return __FAIL(ops, i);
-
- if (ops->iova_to_phys(ops, SZ_1G + size + 42) != (size + 42))
- return __FAIL(ops, i);
-
/* Full unmap */
iova = 0;
for_each_set_bit(j, &cfg->pgsize_bitmap, BITS_PER_LONG) {
--
2.43.0
^ permalink raw reply related [flat|nested] 8+ messages in thread* [PATCH v3 2/3] iommu/io-pgtable-arm-v7s: Remove split on unmap behavior
2024-11-05 18:14 [PATCH v3 0/3] Remove split on unmap behavior Jason Gunthorpe
2024-11-05 18:14 ` [PATCH v3 1/3] iommu/io-pgtable-arm: " Jason Gunthorpe
@ 2024-11-05 18:14 ` Jason Gunthorpe
2024-11-06 8:43 ` kernel test robot
2024-11-05 18:14 ` [PATCH v3 3/3] iommu: Add a kdoc to iommu_unmap() Jason Gunthorpe
2024-11-06 15:53 ` [PATCH v3 0/3] Remove split on unmap behavior Will Deacon
3 siblings, 1 reply; 8+ messages in thread
From: Jason Gunthorpe @ 2024-11-05 18:14 UTC (permalink / raw)
To: iommu, Joerg Roedel, linux-arm-kernel, Robin Murphy, Will Deacon
Cc: Boris Brezillon, dri-devel, Liviu Dudau, patches, Steven Price
A minority of page table implementations (arm_lpae, armv7) are unique in
how they handle partial unmap of large IOPTEs.
Other implementations will unmap the large IOPTE and return it's
length. For example if a 2M IOPTE is present and the first 4K is requested
to be unmapped then unmap will remove the whole 2M and report 2M as the
result.
armv7 instead will break up contiguous entries and replace an entry with a
whole table so it can unmap the requested 4k.
This seems copied from the arm_lpae implementation, which was analyzed
here:
https://lore.kernel.org/all/20241024134411.GA6956@nvidia.com/
Bring consistency to the implementations and remove this unused
functionality.
There are no uses outside iommu, this effects the ARM_V7S drivers
msm_iommu, mtk_iommu, and arm-smmmu.
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
---
drivers/iommu/io-pgtable-arm-v7s.c | 146 +----------------------------
1 file changed, 5 insertions(+), 141 deletions(-)
diff --git a/drivers/iommu/io-pgtable-arm-v7s.c b/drivers/iommu/io-pgtable-arm-v7s.c
index 06ffc683b28fee..9ae8cb8999ef51 100644
--- a/drivers/iommu/io-pgtable-arm-v7s.c
+++ b/drivers/iommu/io-pgtable-arm-v7s.c
@@ -166,7 +166,6 @@ struct arm_v7s_io_pgtable {
arm_v7s_iopte *pgd;
struct kmem_cache *l2_tables;
- spinlock_t split_lock;
};
static bool arm_v7s_pte_is_cont(arm_v7s_iopte pte, int lvl);
@@ -363,25 +362,6 @@ static arm_v7s_iopte arm_v7s_prot_to_pte(int prot, int lvl,
return pte;
}
-static int arm_v7s_pte_to_prot(arm_v7s_iopte pte, int lvl)
-{
- int prot = IOMMU_READ;
- arm_v7s_iopte attr = pte >> ARM_V7S_ATTR_SHIFT(lvl);
-
- if (!(attr & ARM_V7S_PTE_AP_RDONLY))
- prot |= IOMMU_WRITE;
- if (!(attr & ARM_V7S_PTE_AP_UNPRIV))
- prot |= IOMMU_PRIV;
- if ((attr & (ARM_V7S_TEX_MASK << ARM_V7S_TEX_SHIFT)) == 0)
- prot |= IOMMU_MMIO;
- else if (pte & ARM_V7S_ATTR_C)
- prot |= IOMMU_CACHE;
- if (pte & ARM_V7S_ATTR_XN(lvl))
- prot |= IOMMU_NOEXEC;
-
- return prot;
-}
-
static arm_v7s_iopte arm_v7s_pte_to_cont(arm_v7s_iopte pte, int lvl)
{
if (lvl == 1) {
@@ -398,23 +378,6 @@ static arm_v7s_iopte arm_v7s_pte_to_cont(arm_v7s_iopte pte, int lvl)
return pte;
}
-static arm_v7s_iopte arm_v7s_cont_to_pte(arm_v7s_iopte pte, int lvl)
-{
- if (lvl == 1) {
- pte &= ~ARM_V7S_CONT_SECTION;
- } else if (lvl == 2) {
- arm_v7s_iopte xn = pte & BIT(ARM_V7S_CONT_PAGE_XN_SHIFT);
- arm_v7s_iopte tex = pte & (ARM_V7S_CONT_PAGE_TEX_MASK <<
- ARM_V7S_CONT_PAGE_TEX_SHIFT);
-
- pte ^= xn | tex | ARM_V7S_PTE_TYPE_CONT_PAGE;
- pte |= (xn >> ARM_V7S_CONT_PAGE_XN_SHIFT) |
- (tex >> ARM_V7S_CONT_PAGE_TEX_SHIFT) |
- ARM_V7S_PTE_TYPE_PAGE;
- }
- return pte;
-}
-
static bool arm_v7s_pte_is_cont(arm_v7s_iopte pte, int lvl)
{
if (lvl == 1 && !ARM_V7S_PTE_IS_TABLE(pte, lvl))
@@ -591,77 +554,6 @@ static void arm_v7s_free_pgtable(struct io_pgtable *iop)
kfree(data);
}
-static arm_v7s_iopte arm_v7s_split_cont(struct arm_v7s_io_pgtable *data,
- unsigned long iova, int idx, int lvl,
- arm_v7s_iopte *ptep)
-{
- struct io_pgtable *iop = &data->iop;
- arm_v7s_iopte pte;
- size_t size = ARM_V7S_BLOCK_SIZE(lvl);
- int i;
-
- /* Check that we didn't lose a race to get the lock */
- pte = *ptep;
- if (!arm_v7s_pte_is_cont(pte, lvl))
- return pte;
-
- ptep -= idx & (ARM_V7S_CONT_PAGES - 1);
- pte = arm_v7s_cont_to_pte(pte, lvl);
- for (i = 0; i < ARM_V7S_CONT_PAGES; i++)
- ptep[i] = pte + i * size;
-
- __arm_v7s_pte_sync(ptep, ARM_V7S_CONT_PAGES, &iop->cfg);
-
- size *= ARM_V7S_CONT_PAGES;
- io_pgtable_tlb_flush_walk(iop, iova, size, size);
- return pte;
-}
-
-static size_t arm_v7s_split_blk_unmap(struct arm_v7s_io_pgtable *data,
- struct iommu_iotlb_gather *gather,
- unsigned long iova, size_t size,
- arm_v7s_iopte blk_pte,
- arm_v7s_iopte *ptep)
-{
- struct io_pgtable_cfg *cfg = &data->iop.cfg;
- arm_v7s_iopte pte, *tablep;
- int i, unmap_idx, num_entries, num_ptes;
-
- tablep = __arm_v7s_alloc_table(2, GFP_ATOMIC, data);
- if (!tablep)
- return 0; /* Bytes unmapped */
-
- num_ptes = ARM_V7S_PTES_PER_LVL(2, cfg);
- num_entries = size >> ARM_V7S_LVL_SHIFT(2);
- unmap_idx = ARM_V7S_LVL_IDX(iova, 2, cfg);
-
- pte = arm_v7s_prot_to_pte(arm_v7s_pte_to_prot(blk_pte, 1), 2, cfg);
- if (num_entries > 1)
- pte = arm_v7s_pte_to_cont(pte, 2);
-
- for (i = 0; i < num_ptes; i += num_entries, pte += size) {
- /* Unmap! */
- if (i == unmap_idx)
- continue;
-
- __arm_v7s_set_pte(&tablep[i], pte, num_entries, cfg);
- }
-
- pte = arm_v7s_install_table(tablep, ptep, blk_pte, cfg);
- if (pte != blk_pte) {
- __arm_v7s_free_table(tablep, 2, data);
-
- if (!ARM_V7S_PTE_IS_TABLE(pte, 1))
- return 0;
-
- tablep = iopte_deref(pte, 1, data);
- return __arm_v7s_unmap(data, gather, iova, size, 2, tablep);
- }
-
- io_pgtable_tlb_add_page(&data->iop, gather, iova, size);
- return size;
-}
-
static size_t __arm_v7s_unmap(struct arm_v7s_io_pgtable *data,
struct iommu_iotlb_gather *gather,
unsigned long iova, size_t size, int lvl,
@@ -694,11 +586,8 @@ static size_t __arm_v7s_unmap(struct arm_v7s_io_pgtable *data,
* case in a lock for the sake of correctness and be done with it.
*/
if (num_entries <= 1 && arm_v7s_pte_is_cont(pte[0], lvl)) {
- unsigned long flags;
-
- spin_lock_irqsave(&data->split_lock, flags);
- pte[0] = arm_v7s_split_cont(data, iova, idx, lvl, ptep);
- spin_unlock_irqrestore(&data->split_lock, flags);
+ WARN_ONCE(true, "Unmap of a partial large IOPTE is not allowed");
+ return 0;
}
/* If the size matches this level, we're in the right place */
@@ -721,12 +610,8 @@ static size_t __arm_v7s_unmap(struct arm_v7s_io_pgtable *data,
}
return size;
} else if (lvl == 1 && !ARM_V7S_PTE_IS_TABLE(pte[0], lvl)) {
- /*
- * Insert a table at the next level to map the old region,
- * minus the part we want to unmap
- */
- return arm_v7s_split_blk_unmap(data, gather, iova, size, pte[0],
- ptep);
+ WARN_ONCE(true, "Unmap of a partial large IOPTE is not allowed");
+ return 0;
}
/* Keep on walkin' */
@@ -811,8 +696,6 @@ static struct io_pgtable *arm_v7s_alloc_pgtable(struct io_pgtable_cfg *cfg,
if (!data)
return NULL;
- spin_lock_init(&data->split_lock);
-
/*
* ARM_MTK_TTBR_EXT extend the translation table base support larger
* memory address.
@@ -936,7 +819,7 @@ static int __init arm_v7s_do_selftests(void)
.quirks = IO_PGTABLE_QUIRK_ARM_NS,
.pgsize_bitmap = SZ_4K | SZ_64K | SZ_1M | SZ_16M,
};
- unsigned int iova, size, iova_start;
+ unsigned int iova, size;
unsigned int i, loopnr = 0;
size_t mapped;
@@ -988,25 +871,6 @@ static int __init arm_v7s_do_selftests(void)
loopnr++;
}
- /* Partial unmap */
- i = 1;
- size = 1UL << __ffs(cfg.pgsize_bitmap);
- while (i < loopnr) {
- iova_start = i * SZ_16M;
- if (ops->unmap_pages(ops, iova_start + size, size, 1, NULL) != size)
- return __FAIL(ops);
-
- /* Remap of partial unmap */
- if (ops->map_pages(ops, iova_start + size, size, size, 1,
- IOMMU_READ, GFP_KERNEL, &mapped))
- return __FAIL(ops);
-
- if (ops->iova_to_phys(ops, iova_start + size + 42)
- != (size + 42))
- return __FAIL(ops);
- i++;
- }
-
/* Full unmap */
iova = 0;
for_each_set_bit(i, &cfg.pgsize_bitmap, BITS_PER_LONG) {
--
2.43.0
^ permalink raw reply related [flat|nested] 8+ messages in thread