* [PATCH 1/5] iommu/io-pgtable-arm: Allow appropriate DMA API use
@ 2015-07-27 18:18 Robin Murphy
[not found] ` <c8cc82fea26044e393ff857014d4146faac5ec1e.1438019069.git.robin.murphy-5wv7dgnIgG8@public.gmane.org>
0 siblings, 1 reply; 13+ messages in thread
From: Robin Murphy @ 2015-07-27 18:18 UTC (permalink / raw)
To: will.deacon-5wv7dgnIgG8,
laurent.pinchart+renesas-ryLnwIuWjnjg/C1BVhZhaw
Cc: linux-lFZ/pmaqli7XmaaqVzeoHQ,
iommu-cunTk1MwBs9QetFLy7KEm3xJsTq8ys+cHZ5vskTnxNA
Currently, users of the LPAE page table code are (ab)using dma_map_page()
as a means to flush page table updates for non-coherent IOMMUs. Since
from the CPU's point of view, creating IOMMU page tables *is* passing
DMA buffers to a device (the IOMMU's page table walker), there's little
reason not to use the DMA API correctly.
Allow drivers to opt into appropriate DMA operations for page table
allocation and updates by providing the relevant device, and make the
flush_pgtable() callback optional in case those DMA API operations are
sufficient. The expectation is that an LPAE IOMMU should have a full view
of system memory, so use streaming mappings to avoid unnecessary pressure
on ZONE_DMA, and treat any DMA translation as a warning sign.
Signed-off-by: Robin Murphy <robin.murphy-5wv7dgnIgG8@public.gmane.org>
---
Hi all,
Since Russell fixing Tegra[1] reminded me, I dug this out from, er,
rather a long time ago[2] and tidied it up. I've tested the SMMUv2
version with the MMU-401s on Juno (both coherent and non-coherent)
with no visible regressions; I have the same hope for the SMMUv3 and
IPMMU changes since they should be semantically identical. At worst
the Renesas driver might need a larger DMA mask setting as per
f1d84548694f, but given that there shouldn't be any highmem involved
I'd think it should be OK as-is.
Robin.
[1]:http://article.gmane.org/gmane.linux.ports.tegra/23150
[2]:http://article.gmane.org/gmane.linux.kernel.iommu/8972
drivers/iommu/io-pgtable-arm.c | 107 +++++++++++++++++++++++++++++++----------
drivers/iommu/io-pgtable.h | 2 +
2 files changed, 84 insertions(+), 25 deletions(-)
diff --git a/drivers/iommu/io-pgtable-arm.c b/drivers/iommu/io-pgtable-arm.c
index 4e46021..b93a60e 100644
--- a/drivers/iommu/io-pgtable-arm.c
+++ b/drivers/iommu/io-pgtable-arm.c
@@ -200,12 +200,76 @@ typedef u64 arm_lpae_iopte;
static bool selftest_running = false;
+static dma_addr_t __arm_lpae_dma(struct device *dev, void *pages)
+{
+ return phys_to_dma(dev, virt_to_phys(pages));
+}
+
+static void *__arm_lpae_alloc_pages(size_t size, gfp_t gfp,
+ struct io_pgtable_cfg *cfg, void *cookie)
+{
+ void *pages = alloc_pages_exact(size, gfp | __GFP_ZERO);
+ struct device *dev = cfg->iommu_dev;
+ dma_addr_t dma;
+
+ if (!pages)
+ return NULL;
+ if (dev) {
+ dma = dma_map_single(dev, pages, size, DMA_TO_DEVICE);
+ if (dma_mapping_error(dev, dma))
+ goto out_free;
+ /*
+ * We depend on the IOMMU being able to work with any physical
+ * address directly, so if the DMA layer suggests it can't by
+ * giving us back some translation, that bodes very badly...
+ */
+ if (WARN(dma != __arm_lpae_dma(dev, pages),
+ "Cannot accommodate DMA translation for IOMMU page tables\n"))
+ goto out_unmap;
+ }
+ if (cfg->tlb->flush_pgtable)
+ cfg->tlb->flush_pgtable(pages, size, cookie);
+ return pages;
+
+out_unmap:
+ dma_unmap_single(dev, dma, size, DMA_TO_DEVICE);
+out_free:
+ free_pages_exact(pages, size);
+ return NULL;
+}
+
+static void __arm_lpae_free_pages(void *pages, size_t size,
+ struct io_pgtable_cfg *cfg)
+{
+ struct device *dev = cfg->iommu_dev;
+
+ if (dev)
+ dma_unmap_single(dev, __arm_lpae_dma(dev, pages),
+ size, DMA_TO_DEVICE);
+ free_pages_exact(pages, size);
+}
+
+static void __arm_lpae_set_pte(arm_lpae_iopte *ptep, arm_lpae_iopte pte,
+ struct io_pgtable_cfg *cfg, void *cookie)
+{
+ struct device *dev = cfg->iommu_dev;
+
+ *ptep = pte;
+
+ if (dev)
+ dma_sync_single_for_device(dev, __arm_lpae_dma(dev, ptep),
+ sizeof(pte), DMA_TO_DEVICE);
+ if (cfg->tlb->flush_pgtable)
+ cfg->tlb->flush_pgtable(ptep, sizeof(pte), cookie);
+}
+
static int arm_lpae_init_pte(struct arm_lpae_io_pgtable *data,
unsigned long iova, phys_addr_t paddr,
arm_lpae_iopte prot, int lvl,
arm_lpae_iopte *ptep)
{
arm_lpae_iopte pte = prot;
+ struct io_pgtable_cfg *cfg = &data->iop.cfg;
/* We require an unmap first */
if (iopte_leaf(*ptep, lvl)) {
@@ -213,7 +277,7 @@ static int arm_lpae_init_pte(struct arm_lpae_io_pgtable *data,
return -EEXIST;
}
- if (data->iop.cfg.quirks & IO_PGTABLE_QUIRK_ARM_NS)
+ if (cfg->quirks & IO_PGTABLE_QUIRK_ARM_NS)
pte |= ARM_LPAE_PTE_NS;
if (lvl == ARM_LPAE_MAX_LEVELS - 1)
@@ -224,8 +288,7 @@ static int arm_lpae_init_pte(struct arm_lpae_io_pgtable *data,
pte |= ARM_LPAE_PTE_AF | ARM_LPAE_PTE_SH_IS;
pte |= pfn_to_iopte(paddr >> data->pg_shift, data);
- *ptep = pte;
- data->iop.cfg.tlb->flush_pgtable(ptep, sizeof(*ptep), data->iop.cookie);
+ __arm_lpae_set_pte(ptep, pte, cfg, data->iop.cookie);
return 0;
}
@@ -236,12 +299,13 @@ static int __arm_lpae_map(struct arm_lpae_io_pgtable *data, unsigned long iova,
arm_lpae_iopte *cptep, pte;
void *cookie = data->iop.cookie;
size_t block_size = ARM_LPAE_BLOCK_SIZE(lvl, data);
+ struct io_pgtable_cfg *cfg = &data->iop.cfg;
/* Find our entry at the current level */
ptep += ARM_LPAE_LVL_IDX(iova, lvl, data);
/* If we can install a leaf entry at this level, then do so */
- if (size == block_size && (size & data->iop.cfg.pgsize_bitmap))
+ if (size == block_size && (size & cfg->pgsize_bitmap))
return arm_lpae_init_pte(data, iova, paddr, prot, lvl, ptep);
/* We can't allocate tables at the final level */
@@ -251,18 +315,15 @@ static int __arm_lpae_map(struct arm_lpae_io_pgtable *data, unsigned long iova,
/* Grab a pointer to the next level */
pte = *ptep;
if (!pte) {
- cptep = alloc_pages_exact(1UL << data->pg_shift,
- GFP_ATOMIC | __GFP_ZERO);
+ cptep = __arm_lpae_alloc_pages(1UL << data->pg_shift,
+ GFP_ATOMIC, cfg, cookie);
if (!cptep)
return -ENOMEM;
- data->iop.cfg.tlb->flush_pgtable(cptep, 1UL << data->pg_shift,
- cookie);
pte = __pa(cptep) | ARM_LPAE_PTE_TYPE_TABLE;
- if (data->iop.cfg.quirks & IO_PGTABLE_QUIRK_ARM_NS)
+ if (cfg->quirks & IO_PGTABLE_QUIRK_ARM_NS)
pte |= ARM_LPAE_PTE_NSTABLE;
- *ptep = pte;
- data->iop.cfg.tlb->flush_pgtable(ptep, sizeof(*ptep), cookie);
+ __arm_lpae_set_pte(ptep, pte, cfg, cookie);
} else {
cptep = iopte_deref(pte, data);
}
@@ -347,7 +408,7 @@ static void __arm_lpae_free_pgtable(struct arm_lpae_io_pgtable *data, int lvl,
__arm_lpae_free_pgtable(data, lvl + 1, iopte_deref(pte, data));
}
- free_pages_exact(start, table_size);
+ __arm_lpae_free_pages(start, table_size, &data->iop.cfg);
}
static void arm_lpae_free_pgtable(struct io_pgtable *iop)
@@ -366,8 +427,8 @@ static int arm_lpae_split_blk_unmap(struct arm_lpae_io_pgtable *data,
unsigned long blk_start, blk_end;
phys_addr_t blk_paddr;
arm_lpae_iopte table = 0;
+ struct io_pgtable_cfg *cfg = &data->iop.cfg;
void *cookie = data->iop.cookie;
- const struct iommu_gather_ops *tlb = data->iop.cfg.tlb;
blk_start = iova & ~(blk_size - 1);
blk_end = blk_start + blk_size;
@@ -393,10 +454,9 @@ static int arm_lpae_split_blk_unmap(struct arm_lpae_io_pgtable *data,
}
}
- *ptep = table;
- tlb->flush_pgtable(ptep, sizeof(*ptep), cookie);
+ __arm_lpae_set_pte(ptep, table, cfg, cookie);
iova &= ~(blk_size - 1);
- tlb->tlb_add_flush(iova, blk_size, true, cookie);
+ cfg->tlb->tlb_add_flush(iova, blk_size, true, cookie);
return size;
}
@@ -418,13 +478,12 @@ static int __arm_lpae_unmap(struct arm_lpae_io_pgtable *data,
/* If the size matches this level, we're in the right place */
if (size == blk_size) {
- *ptep = 0;
- tlb->flush_pgtable(ptep, sizeof(*ptep), cookie);
+ __arm_lpae_set_pte(ptep, 0, &data->iop.cfg, cookie);
if (!iopte_leaf(pte, lvl)) {
/* Also flush any partial walks */
tlb->tlb_add_flush(iova, size, false, cookie);
- tlb->tlb_sync(data->iop.cookie);
+ tlb->tlb_sync(cookie);
ptep = iopte_deref(pte, data);
__arm_lpae_free_pgtable(data, lvl + 1, ptep);
} else {
@@ -640,12 +699,11 @@ arm_64_lpae_alloc_pgtable_s1(struct io_pgtable_cfg *cfg, void *cookie)
cfg->arm_lpae_s1_cfg.mair[1] = 0;
/* Looking good; allocate a pgd */
- data->pgd = alloc_pages_exact(data->pgd_size, GFP_KERNEL | __GFP_ZERO);
+ data->pgd = __arm_lpae_alloc_pages(data->pgd_size, GFP_KERNEL,
+ cfg, cookie);
if (!data->pgd)
goto out_free_data;
- cfg->tlb->flush_pgtable(data->pgd, data->pgd_size, cookie);
-
/* TTBRs */
cfg->arm_lpae_s1_cfg.ttbr[0] = virt_to_phys(data->pgd);
cfg->arm_lpae_s1_cfg.ttbr[1] = 0;
@@ -728,12 +786,11 @@ arm_64_lpae_alloc_pgtable_s2(struct io_pgtable_cfg *cfg, void *cookie)
cfg->arm_lpae_s2_cfg.vtcr = reg;
/* Allocate pgd pages */
- data->pgd = alloc_pages_exact(data->pgd_size, GFP_KERNEL | __GFP_ZERO);
+ data->pgd = __arm_lpae_alloc_pages(data->pgd_size, GFP_KERNEL,
+ cfg, cookie);
if (!data->pgd)
goto out_free_data;
- cfg->tlb->flush_pgtable(data->pgd, data->pgd_size, cookie);
-
/* VTTBR */
cfg->arm_lpae_s2_cfg.vttbr = virt_to_phys(data->pgd);
return &data->iop;
diff --git a/drivers/iommu/io-pgtable.h b/drivers/iommu/io-pgtable.h
index 10e32f6..39fe864 100644
--- a/drivers/iommu/io-pgtable.h
+++ b/drivers/iommu/io-pgtable.h
@@ -41,6 +41,7 @@ struct iommu_gather_ops {
* @ias: Input address (iova) size, in bits.
* @oas: Output address (paddr) size, in bits.
* @tlb: TLB management callbacks for this set of tables.
+ * @iommu_dev: The owner of the page table memory (for DMA purposes).
*/
struct io_pgtable_cfg {
#define IO_PGTABLE_QUIRK_ARM_NS (1 << 0) /* Set NS bit in PTEs */
@@ -49,6 +50,7 @@ struct io_pgtable_cfg {
unsigned int ias;
unsigned int oas;
const struct iommu_gather_ops *tlb;
+ struct device *iommu_dev;
/* Low-level data specific to the table format */
union {
--
1.9.1
^ permalink raw reply related [flat|nested] 13+ messages in thread[parent not found: <c8cc82fea26044e393ff857014d4146faac5ec1e.1438019069.git.robin.murphy-5wv7dgnIgG8@public.gmane.org>]
* [PATCH 2/5] iommu/arm-smmu: Sort out coherency [not found] ` <c8cc82fea26044e393ff857014d4146faac5ec1e.1438019069.git.robin.murphy-5wv7dgnIgG8@public.gmane.org> @ 2015-07-27 18:18 ` Robin Murphy [not found] ` <4d5880f2131854bc59107ccc917993136e511341.1438019069.git.robin.murphy-5wv7dgnIgG8@public.gmane.org> 2015-07-27 18:18 ` [PATCH 3/5] iommu/arm-smmu: Clean up DMA API usage Robin Murphy ` (3 subsequent siblings) 4 siblings, 1 reply; 13+ messages in thread From: Robin Murphy @ 2015-07-27 18:18 UTC (permalink / raw) To: will.deacon-5wv7dgnIgG8, laurent.pinchart+renesas-ryLnwIuWjnjg/C1BVhZhaw Cc: linux-lFZ/pmaqli7XmaaqVzeoHQ, iommu-cunTk1MwBs9QetFLy7KEm3xJsTq8ys+cHZ5vskTnxNA Currently we detect the whether the SMMU has coherent page table walk capability from the IDR0.CTTW field, and base our cache maintenance decisions on that. In preparation for fixing the bogus DMA API usage, however, we need to ensure that the DMA API agrees about this, which necessitates deferring to the dma-coherent property in the device tree for the final say. As an added bonus, since systems exist where an external CTTW signal has been tied off incorrectly at integration, allowing DT to override it offers a neat workaround for coherency issues with such SMMUs. Signed-off-by: Robin Murphy <robin.murphy-5wv7dgnIgG8@public.gmane.org> --- Documentation/devicetree/bindings/iommu/arm,smmu.txt | 4 ++++ drivers/iommu/arm-smmu.c | 17 ++++++++++++++--- 2 files changed, 18 insertions(+), 3 deletions(-) diff --git a/Documentation/devicetree/bindings/iommu/arm,smmu.txt b/Documentation/devicetree/bindings/iommu/arm,smmu.txt index 0676050..04724fc 100644 --- a/Documentation/devicetree/bindings/iommu/arm,smmu.txt +++ b/Documentation/devicetree/bindings/iommu/arm,smmu.txt @@ -43,6 +43,10 @@ conditions. ** System MMU optional properties: +- dma-coherent : Presence or absence should correctly reflect whether + the SMMU translation table walk interface is + cache-coherent with the CPU. + - calxeda,smmu-secure-config-access : Enable proper handling of buggy implementations that always use secure access to SMMU configuration registers. In this case non-secure diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c index 4cd0c29..9a59bef 100644 --- a/drivers/iommu/arm-smmu.c +++ b/drivers/iommu/arm-smmu.c @@ -1532,6 +1532,7 @@ static int arm_smmu_device_cfg_probe(struct arm_smmu_device *smmu) unsigned long size; void __iomem *gr0_base = ARM_SMMU_GR0(smmu); u32 id; + bool cttw_dt, cttw_reg; dev_notice(smmu->dev, "probing hardware configuration...\n"); dev_notice(smmu->dev, "SMMUv%d with:\n", smmu->version); @@ -1571,10 +1572,20 @@ static int arm_smmu_device_cfg_probe(struct arm_smmu_device *smmu) dev_notice(smmu->dev, "\taddress translation ops\n"); } - if (id & ID0_CTTW) { + /* + * In order for DMA API calls to work properly, we must defer to what + * the DT says about coherency, regardless of what the hardware claims. + * Fortunately, this also opens up a workaround for systems where the + * ID register value has ended up configured incorrectly. + */ + cttw_dt = of_property_read_bool(smmu->dev->of_node, "dma-coherent"); + if (cttw_dt) smmu->features |= ARM_SMMU_FEAT_COHERENT_WALK; - dev_notice(smmu->dev, "\tcoherent table walk\n"); - } + cttw_reg = !!(id & ID0_CTTW); + if (cttw_dt || cttw_reg) + dev_notice(smmu->dev, "\t%scoherent table walk%s\n", + cttw_dt ? "" : "no ", + (cttw_dt == cttw_reg) ? "" : " (overridden by DT)"); if (id & ID0_SMS) { u32 smr, sid, mask; -- 1.9.1 ^ permalink raw reply related [flat|nested] 13+ messages in thread
[parent not found: <4d5880f2131854bc59107ccc917993136e511341.1438019069.git.robin.murphy-5wv7dgnIgG8@public.gmane.org>]
* Re: [PATCH 2/5] iommu/arm-smmu: Sort out coherency [not found] ` <4d5880f2131854bc59107ccc917993136e511341.1438019069.git.robin.murphy-5wv7dgnIgG8@public.gmane.org> @ 2015-07-28 9:43 ` Will Deacon [not found] ` <20150728094349.GE29209-5wv7dgnIgG8@public.gmane.org> 0 siblings, 1 reply; 13+ messages in thread From: Will Deacon @ 2015-07-28 9:43 UTC (permalink / raw) To: Robin Murphy Cc: laurent.pinchart+renesas-ryLnwIuWjnjg/C1BVhZhaw@public.gmane.org, linux-lFZ/pmaqli7XmaaqVzeoHQ@public.gmane.org, iommu-cunTk1MwBs9QetFLy7KEm3xJsTq8ys+cHZ5vskTnxNA@public.gmane.org Hi Robin, Cheers for doing this. Just a few comments (mainly in relation to being consistent with SMMUv3). On Mon, Jul 27, 2015 at 07:18:09PM +0100, Robin Murphy wrote: > Currently we detect the whether the SMMU has coherent page table walk "we detect the whether"? > capability from the IDR0.CTTW field, and base our cache maintenance > decisions on that. In preparation for fixing the bogus DMA API usage, > however, we need to ensure that the DMA API agrees about this, which > necessitates deferring to the dma-coherent property in the device tree > for the final say. > > As an added bonus, since systems exist where an external CTTW signal > has been tied off incorrectly at integration, allowing DT to override > it offers a neat workaround for coherency issues with such SMMUs. > > Signed-off-by: Robin Murphy <robin.murphy-5wv7dgnIgG8@public.gmane.org> > --- > Documentation/devicetree/bindings/iommu/arm,smmu.txt | 4 ++++ > drivers/iommu/arm-smmu.c | 17 ++++++++++++++--- > 2 files changed, 18 insertions(+), 3 deletions(-) > > diff --git a/Documentation/devicetree/bindings/iommu/arm,smmu.txt b/Documentation/devicetree/bindings/iommu/arm,smmu.txt > index 0676050..04724fc 100644 > --- a/Documentation/devicetree/bindings/iommu/arm,smmu.txt > +++ b/Documentation/devicetree/bindings/iommu/arm,smmu.txt > @@ -43,6 +43,10 @@ conditions. > > ** System MMU optional properties: > > +- dma-coherent : Presence or absence should correctly reflect whether > + the SMMU translation table walk interface is > + cache-coherent with the CPU. > + Please copy the text from: Documentation/devicetree/bindings/iommu/arm,smmu-v3.txt# and remove the bit about stream table accesses. > - calxeda,smmu-secure-config-access : Enable proper handling of buggy > implementations that always use secure access to > SMMU configuration registers. In this case non-secure > diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c > index 4cd0c29..9a59bef 100644 > --- a/drivers/iommu/arm-smmu.c > +++ b/drivers/iommu/arm-smmu.c > @@ -1532,6 +1532,7 @@ static int arm_smmu_device_cfg_probe(struct arm_smmu_device *smmu) > unsigned long size; > void __iomem *gr0_base = ARM_SMMU_GR0(smmu); > u32 id; > + bool cttw_dt, cttw_reg; > > dev_notice(smmu->dev, "probing hardware configuration...\n"); > dev_notice(smmu->dev, "SMMUv%d with:\n", smmu->version); > @@ -1571,10 +1572,20 @@ static int arm_smmu_device_cfg_probe(struct arm_smmu_device *smmu) > dev_notice(smmu->dev, "\taddress translation ops\n"); > } > > - if (id & ID0_CTTW) { > + /* > + * In order for DMA API calls to work properly, we must defer to what > + * the DT says about coherency, regardless of what the hardware claims. > + * Fortunately, this also opens up a workaround for systems where the > + * ID register value has ended up configured incorrectly. > + */ > + cttw_dt = of_property_read_bool(smmu->dev->of_node, "dma-coherent"); Use of_dma_is_coherent(smmu->dev->of_node) > + if (cttw_dt) > smmu->features |= ARM_SMMU_FEAT_COHERENT_WALK; > - dev_notice(smmu->dev, "\tcoherent table walk\n"); > - } > + cttw_reg = !!(id & ID0_CTTW); > + if (cttw_dt || cttw_reg) > + dev_notice(smmu->dev, "\t%scoherent table walk%s\n", > + cttw_dt ? "" : "no ", > + (cttw_dt == cttw_reg) ? "" : " (overridden by DT)"); Similarly here; I think we should just follow the message printed by the SMMUv3 driver but s/IDR0.COHACC/ID0.CTTW/. Will ^ permalink raw reply [flat|nested] 13+ messages in thread
[parent not found: <20150728094349.GE29209-5wv7dgnIgG8@public.gmane.org>]
* Re: [PATCH 2/5] iommu/arm-smmu: Sort out coherency [not found] ` <20150728094349.GE29209-5wv7dgnIgG8@public.gmane.org> @ 2015-07-28 15:17 ` Robin Murphy 0 siblings, 0 replies; 13+ messages in thread From: Robin Murphy @ 2015-07-28 15:17 UTC (permalink / raw) To: Will Deacon Cc: laurent.pinchart+renesas-ryLnwIuWjnjg/C1BVhZhaw@public.gmane.org, linux-lFZ/pmaqli7XmaaqVzeoHQ@public.gmane.org, iommu-cunTk1MwBs9QetFLy7KEm3xJsTq8ys+cHZ5vskTnxNA@public.gmane.org Hi Will, On 28/07/15 10:43, Will Deacon wrote: > Hi Robin, > > Cheers for doing this. Just a few comments (mainly in relation to being > consistent with SMMUv3). Thanks! > On Mon, Jul 27, 2015 at 07:18:09PM +0100, Robin Murphy wrote: >> Currently we detect the whether the SMMU has coherent page table walk > > "we detect the whether"? Er, yeah, looks like rain tomorrow afternoon :P >> capability from the IDR0.CTTW field, and base our cache maintenance >> decisions on that. In preparation for fixing the bogus DMA API usage, >> however, we need to ensure that the DMA API agrees about this, which >> necessitates deferring to the dma-coherent property in the device tree >> for the final say. >> >> As an added bonus, since systems exist where an external CTTW signal >> has been tied off incorrectly at integration, allowing DT to override >> it offers a neat workaround for coherency issues with such SMMUs. >> >> Signed-off-by: Robin Murphy <robin.murphy-5wv7dgnIgG8@public.gmane.org> >> --- >> Documentation/devicetree/bindings/iommu/arm,smmu.txt | 4 ++++ >> drivers/iommu/arm-smmu.c | 17 ++++++++++++++--- >> 2 files changed, 18 insertions(+), 3 deletions(-) >> >> diff --git a/Documentation/devicetree/bindings/iommu/arm,smmu.txt b/Documentation/devicetree/bindings/iommu/arm,smmu.txt >> index 0676050..04724fc 100644 >> --- a/Documentation/devicetree/bindings/iommu/arm,smmu.txt >> +++ b/Documentation/devicetree/bindings/iommu/arm,smmu.txt >> @@ -43,6 +43,10 @@ conditions. >> >> ** System MMU optional properties: >> >> +- dma-coherent : Presence or absence should correctly reflect whether >> + the SMMU translation table walk interface is >> + cache-coherent with the CPU. >> + > > Please copy the text from: > Documentation/devicetree/bindings/iommu/arm,smmu-v3.txt# > and remove the bit about stream table accesses. Done, although since the resulting "...DMA operations made by the SMMU (page table walks)..." is both redundant and horribly clunky it gets rewritten as "...page table walks made by the SMMU..." >> - calxeda,smmu-secure-config-access : Enable proper handling of buggy >> implementations that always use secure access to >> SMMU configuration registers. In this case non-secure >> diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c >> index 4cd0c29..9a59bef 100644 >> --- a/drivers/iommu/arm-smmu.c >> +++ b/drivers/iommu/arm-smmu.c >> @@ -1532,6 +1532,7 @@ static int arm_smmu_device_cfg_probe(struct arm_smmu_device *smmu) >> unsigned long size; >> void __iomem *gr0_base = ARM_SMMU_GR0(smmu); >> u32 id; >> + bool cttw_dt, cttw_reg; >> >> dev_notice(smmu->dev, "probing hardware configuration...\n"); >> dev_notice(smmu->dev, "SMMUv%d with:\n", smmu->version); >> @@ -1571,10 +1572,20 @@ static int arm_smmu_device_cfg_probe(struct arm_smmu_device *smmu) >> dev_notice(smmu->dev, "\taddress translation ops\n"); >> } >> >> - if (id & ID0_CTTW) { >> + /* >> + * In order for DMA API calls to work properly, we must defer to what >> + * the DT says about coherency, regardless of what the hardware claims. >> + * Fortunately, this also opens up a workaround for systems where the >> + * ID register value has ended up configured incorrectly. >> + */ >> + cttw_dt = of_property_read_bool(smmu->dev->of_node, "dma-coherent"); > > Use of_dma_is_coherent(smmu->dev->of_node) *facepalm* >> + if (cttw_dt) >> smmu->features |= ARM_SMMU_FEAT_COHERENT_WALK; >> - dev_notice(smmu->dev, "\tcoherent table walk\n"); >> - } >> + cttw_reg = !!(id & ID0_CTTW); >> + if (cttw_dt || cttw_reg) >> + dev_notice(smmu->dev, "\t%scoherent table walk%s\n", >> + cttw_dt ? "" : "no ", >> + (cttw_dt == cttw_reg) ? "" : " (overridden by DT)"); > > Similarly here; I think we should just follow the message printed by the > SMMUv3 driver but s/IDR0.COHACC/ID0.CTTW/. I note that the v3 driver stays silent as long as the DT and hardware agree - I'm reticent to entirely remove the original message here as it's proved rather useful (especially in debugging-over-email exchanges) to see at a glance from a boot log whether the driver thinks a particular SMMU is coherent or not (particularly with my SMMUv1 many-instances-in-the-same-system hat on). I can certainly make the override message more consistent, though. Robin ^ permalink raw reply [flat|nested] 13+ messages in thread
* [PATCH 3/5] iommu/arm-smmu: Clean up DMA API usage [not found] ` <c8cc82fea26044e393ff857014d4146faac5ec1e.1438019069.git.robin.murphy-5wv7dgnIgG8@public.gmane.org> 2015-07-27 18:18 ` [PATCH 2/5] iommu/arm-smmu: Sort out coherency Robin Murphy @ 2015-07-27 18:18 ` Robin Murphy [not found] ` <921980a38ec7d35610c68e8e94235c55318ba80c.1438019069.git.robin.murphy-5wv7dgnIgG8@public.gmane.org> 2015-07-27 18:18 ` [PATCH 4/5] iommu/arm-smmu-v3: " Robin Murphy ` (2 subsequent siblings) 4 siblings, 1 reply; 13+ messages in thread From: Robin Murphy @ 2015-07-27 18:18 UTC (permalink / raw) To: will.deacon-5wv7dgnIgG8, laurent.pinchart+renesas-ryLnwIuWjnjg/C1BVhZhaw Cc: linux-lFZ/pmaqli7XmaaqVzeoHQ, iommu-cunTk1MwBs9QetFLy7KEm3xJsTq8ys+cHZ5vskTnxNA With the correct DMA API calls now integrated into the io-pgtable code, let that handle the flushing of non-coherent page table updates. Signed-off-by: Robin Murphy <robin.murphy-5wv7dgnIgG8@public.gmane.org> --- drivers/iommu/arm-smmu.c | 23 +++++++---------------- 1 file changed, 7 insertions(+), 16 deletions(-) diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c index 9a59bef..9501668 100644 --- a/drivers/iommu/arm-smmu.c +++ b/drivers/iommu/arm-smmu.c @@ -610,24 +610,13 @@ static void arm_smmu_tlb_inv_range_nosync(unsigned long iova, size_t size, static void arm_smmu_flush_pgtable(void *addr, size_t size, void *cookie) { struct arm_smmu_domain *smmu_domain = cookie; - struct arm_smmu_device *smmu = smmu_domain->smmu; - unsigned long offset = (unsigned long)addr & ~PAGE_MASK; - - /* Ensure new page tables are visible to the hardware walker */ - if (smmu->features & ARM_SMMU_FEAT_COHERENT_WALK) { + /* + * Ensure new page tables are visible to a coherent hardware walker. + * The page table code deals with flushing for the non-coherent case. + */ + if (smmu_domain->smmu->features & ARM_SMMU_FEAT_COHERENT_WALK) dsb(ishst); - } else { - /* - * If the SMMU can't walk tables in the CPU caches, treat them - * like non-coherent DMA since we need to flush the new entries - * all the way out to memory. There's no possibility of - * recursion here as the SMMU table walker will not be wired - * through another SMMU. - */ - dma_map_page(smmu->dev, virt_to_page(addr), offset, size, - DMA_TO_DEVICE); - } } static struct iommu_gather_ops arm_smmu_gather_ops = { @@ -899,6 +888,8 @@ static int arm_smmu_init_domain_context(struct iommu_domain *domain, .oas = oas, .tlb = &arm_smmu_gather_ops, }; + if (!(smmu->features & ARM_SMMU_FEAT_COHERENT_WALK)) + pgtbl_cfg.iommu_dev = smmu->dev; smmu_domain->smmu = smmu; pgtbl_ops = alloc_io_pgtable_ops(fmt, &pgtbl_cfg, smmu_domain); -- 1.9.1 ^ permalink raw reply related [flat|nested] 13+ messages in thread
[parent not found: <921980a38ec7d35610c68e8e94235c55318ba80c.1438019069.git.robin.murphy-5wv7dgnIgG8@public.gmane.org>]
* Re: [PATCH 3/5] iommu/arm-smmu: Clean up DMA API usage [not found] ` <921980a38ec7d35610c68e8e94235c55318ba80c.1438019069.git.robin.murphy-5wv7dgnIgG8@public.gmane.org> @ 2015-07-28 10:18 ` Will Deacon [not found] ` <20150728101810.GG29209-5wv7dgnIgG8@public.gmane.org> 0 siblings, 1 reply; 13+ messages in thread From: Will Deacon @ 2015-07-28 10:18 UTC (permalink / raw) To: Robin Murphy Cc: laurent.pinchart+renesas-ryLnwIuWjnjg/C1BVhZhaw@public.gmane.org, linux-lFZ/pmaqli7XmaaqVzeoHQ@public.gmane.org, iommu-cunTk1MwBs9QetFLy7KEm3xJsTq8ys+cHZ5vskTnxNA@public.gmane.org On Mon, Jul 27, 2015 at 07:18:10PM +0100, Robin Murphy wrote: > With the correct DMA API calls now integrated into the io-pgtable code, > let that handle the flushing of non-coherent page table updates. > > Signed-off-by: Robin Murphy <robin.murphy-5wv7dgnIgG8@public.gmane.org> > --- > drivers/iommu/arm-smmu.c | 23 +++++++---------------- > 1 file changed, 7 insertions(+), 16 deletions(-) > > diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c > index 9a59bef..9501668 100644 > --- a/drivers/iommu/arm-smmu.c > +++ b/drivers/iommu/arm-smmu.c > @@ -610,24 +610,13 @@ static void arm_smmu_tlb_inv_range_nosync(unsigned long iova, size_t size, > static void arm_smmu_flush_pgtable(void *addr, size_t size, void *cookie) > { > struct arm_smmu_domain *smmu_domain = cookie; > - struct arm_smmu_device *smmu = smmu_domain->smmu; > - unsigned long offset = (unsigned long)addr & ~PAGE_MASK; > > - > - /* Ensure new page tables are visible to the hardware walker */ > - if (smmu->features & ARM_SMMU_FEAT_COHERENT_WALK) { > + /* > + * Ensure new page tables are visible to a coherent hardware walker. > + * The page table code deals with flushing for the non-coherent case. > + */ > + if (smmu_domain->smmu->features & ARM_SMMU_FEAT_COHERENT_WALK) > dsb(ishst); > - } else { > - /* > - * If the SMMU can't walk tables in the CPU caches, treat them > - * like non-coherent DMA since we need to flush the new entries > - * all the way out to memory. There's no possibility of > - * recursion here as the SMMU table walker will not be wired > - * through another SMMU. > - */ > - dma_map_page(smmu->dev, virt_to_page(addr), offset, size, > - DMA_TO_DEVICE); > - } > } > > static struct iommu_gather_ops arm_smmu_gather_ops = { > @@ -899,6 +888,8 @@ static int arm_smmu_init_domain_context(struct iommu_domain *domain, > .oas = oas, > .tlb = &arm_smmu_gather_ops, > }; > + if (!(smmu->features & ARM_SMMU_FEAT_COHERENT_WALK)) > + pgtbl_cfg.iommu_dev = smmu->dev; Ideally, I'd like to set the dev unconditionally and kill the flush_pgtable callback. See my comments on the other patch. Will ^ permalink raw reply [flat|nested] 13+ messages in thread
[parent not found: <20150728101810.GG29209-5wv7dgnIgG8@public.gmane.org>]
* Re: [PATCH 3/5] iommu/arm-smmu: Clean up DMA API usage [not found] ` <20150728101810.GG29209-5wv7dgnIgG8@public.gmane.org> @ 2015-07-28 15:48 ` Robin Murphy [not found] ` <55B7A439.3000007-5wv7dgnIgG8@public.gmane.org> 0 siblings, 1 reply; 13+ messages in thread From: Robin Murphy @ 2015-07-28 15:48 UTC (permalink / raw) To: Will Deacon Cc: laurent.pinchart+renesas-ryLnwIuWjnjg/C1BVhZhaw@public.gmane.org, linux-lFZ/pmaqli7XmaaqVzeoHQ@public.gmane.org, iommu-cunTk1MwBs9QetFLy7KEm3xJsTq8ys+cHZ5vskTnxNA@public.gmane.org On 28/07/15 11:18, Will Deacon wrote: > On Mon, Jul 27, 2015 at 07:18:10PM +0100, Robin Murphy wrote: >> With the correct DMA API calls now integrated into the io-pgtable code, >> let that handle the flushing of non-coherent page table updates. >> >> Signed-off-by: Robin Murphy <robin.murphy-5wv7dgnIgG8@public.gmane.org> >> --- >> drivers/iommu/arm-smmu.c | 23 +++++++---------------- >> 1 file changed, 7 insertions(+), 16 deletions(-) >> >> diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c >> index 9a59bef..9501668 100644 >> --- a/drivers/iommu/arm-smmu.c >> +++ b/drivers/iommu/arm-smmu.c >> @@ -610,24 +610,13 @@ static void arm_smmu_tlb_inv_range_nosync(unsigned long iova, size_t size, >> static void arm_smmu_flush_pgtable(void *addr, size_t size, void *cookie) >> { >> struct arm_smmu_domain *smmu_domain = cookie; >> - struct arm_smmu_device *smmu = smmu_domain->smmu; >> - unsigned long offset = (unsigned long)addr & ~PAGE_MASK; >> >> - >> - /* Ensure new page tables are visible to the hardware walker */ >> - if (smmu->features & ARM_SMMU_FEAT_COHERENT_WALK) { >> + /* >> + * Ensure new page tables are visible to a coherent hardware walker. >> + * The page table code deals with flushing for the non-coherent case. >> + */ >> + if (smmu_domain->smmu->features & ARM_SMMU_FEAT_COHERENT_WALK) >> dsb(ishst); >> - } else { >> - /* >> - * If the SMMU can't walk tables in the CPU caches, treat them >> - * like non-coherent DMA since we need to flush the new entries >> - * all the way out to memory. There's no possibility of >> - * recursion here as the SMMU table walker will not be wired >> - * through another SMMU. >> - */ >> - dma_map_page(smmu->dev, virt_to_page(addr), offset, size, >> - DMA_TO_DEVICE); >> - } >> } >> >> static struct iommu_gather_ops arm_smmu_gather_ops = { >> @@ -899,6 +888,8 @@ static int arm_smmu_init_domain_context(struct iommu_domain *domain, >> .oas = oas, >> .tlb = &arm_smmu_gather_ops, >> }; >> + if (!(smmu->features & ARM_SMMU_FEAT_COHERENT_WALK)) >> + pgtbl_cfg.iommu_dev = smmu->dev; > > Ideally, I'd like to set the dev unconditionally and kill the flush_pgtable > callback. See my comments on the other patch. I'm not sure setting dev unconditionally would be right - we'd end up doing a load of busy work in the DMA API to decide to do nothing and never get a barrier out of it. We could have a "bool coherent" in the cfg to make the wmb() happen, but the mere absence of dev would do that job just as well (wmb() isn't going to need it to report anything, unlike the DMA API path); either way flush_pgtable can go. Robin. ^ permalink raw reply [flat|nested] 13+ messages in thread
[parent not found: <55B7A439.3000007-5wv7dgnIgG8@public.gmane.org>]
* Re: [PATCH 3/5] iommu/arm-smmu: Clean up DMA API usage [not found] ` <55B7A439.3000007-5wv7dgnIgG8@public.gmane.org> @ 2015-07-28 16:06 ` Will Deacon 0 siblings, 0 replies; 13+ messages in thread From: Will Deacon @ 2015-07-28 16:06 UTC (permalink / raw) To: Robin Murphy Cc: laurent.pinchart+renesas-ryLnwIuWjnjg/C1BVhZhaw@public.gmane.org, linux-lFZ/pmaqli7XmaaqVzeoHQ@public.gmane.org, iommu-cunTk1MwBs9QetFLy7KEm3xJsTq8ys+cHZ5vskTnxNA@public.gmane.org On Tue, Jul 28, 2015 at 04:48:09PM +0100, Robin Murphy wrote: > On 28/07/15 11:18, Will Deacon wrote: > > On Mon, Jul 27, 2015 at 07:18:10PM +0100, Robin Murphy wrote: > >> With the correct DMA API calls now integrated into the io-pgtable code, > >> let that handle the flushing of non-coherent page table updates. > >> > >> Signed-off-by: Robin Murphy <robin.murphy-5wv7dgnIgG8@public.gmane.org> > >> --- > >> drivers/iommu/arm-smmu.c | 23 +++++++---------------- > >> 1 file changed, 7 insertions(+), 16 deletions(-) > >> > >> diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c > >> index 9a59bef..9501668 100644 > >> --- a/drivers/iommu/arm-smmu.c > >> +++ b/drivers/iommu/arm-smmu.c > >> @@ -610,24 +610,13 @@ static void arm_smmu_tlb_inv_range_nosync(unsigned long iova, size_t size, > >> static void arm_smmu_flush_pgtable(void *addr, size_t size, void *cookie) > >> { > >> struct arm_smmu_domain *smmu_domain = cookie; > >> - struct arm_smmu_device *smmu = smmu_domain->smmu; > >> - unsigned long offset = (unsigned long)addr & ~PAGE_MASK; > >> > >> - > >> - /* Ensure new page tables are visible to the hardware walker */ > >> - if (smmu->features & ARM_SMMU_FEAT_COHERENT_WALK) { > >> + /* > >> + * Ensure new page tables are visible to a coherent hardware walker. > >> + * The page table code deals with flushing for the non-coherent case. > >> + */ > >> + if (smmu_domain->smmu->features & ARM_SMMU_FEAT_COHERENT_WALK) > >> dsb(ishst); > >> - } else { > >> - /* > >> - * If the SMMU can't walk tables in the CPU caches, treat them > >> - * like non-coherent DMA since we need to flush the new entries > >> - * all the way out to memory. There's no possibility of > >> - * recursion here as the SMMU table walker will not be wired > >> - * through another SMMU. > >> - */ > >> - dma_map_page(smmu->dev, virt_to_page(addr), offset, size, > >> - DMA_TO_DEVICE); > >> - } > >> } > >> > >> static struct iommu_gather_ops arm_smmu_gather_ops = { > >> @@ -899,6 +888,8 @@ static int arm_smmu_init_domain_context(struct iommu_domain *domain, > >> .oas = oas, > >> .tlb = &arm_smmu_gather_ops, > >> }; > >> + if (!(smmu->features & ARM_SMMU_FEAT_COHERENT_WALK)) > >> + pgtbl_cfg.iommu_dev = smmu->dev; > > > > Ideally, I'd like to set the dev unconditionally and kill the flush_pgtable > > callback. See my comments on the other patch. > > I'm not sure setting dev unconditionally would be right - we'd end up > doing a load of busy work in the DMA API to decide to do nothing and > never get a barrier out of it. We could have a "bool coherent" in the > cfg to make the wmb() happen, but the mere absence of dev would do that > job just as well (wmb() isn't going to need it to report anything, > unlike the DMA API path); either way flush_pgtable can go. I think we should move to using the DMA API as step (1), then discuss its inefficiencies in step (2) :) No point working around perceived performance issues before we know they're actually hurting us. Will ^ permalink raw reply [flat|nested] 13+ messages in thread
* [PATCH 4/5] iommu/arm-smmu-v3: Clean up DMA API usage [not found] ` <c8cc82fea26044e393ff857014d4146faac5ec1e.1438019069.git.robin.murphy-5wv7dgnIgG8@public.gmane.org> 2015-07-27 18:18 ` [PATCH 2/5] iommu/arm-smmu: Sort out coherency Robin Murphy 2015-07-27 18:18 ` [PATCH 3/5] iommu/arm-smmu: Clean up DMA API usage Robin Murphy @ 2015-07-27 18:18 ` Robin Murphy 2015-07-27 18:18 ` [PATCH 5/5] iommu/ipmmu-vmsa: " Robin Murphy 2015-07-28 10:17 ` [PATCH 1/5] iommu/io-pgtable-arm: Allow appropriate DMA API use Will Deacon 4 siblings, 0 replies; 13+ messages in thread From: Robin Murphy @ 2015-07-27 18:18 UTC (permalink / raw) To: will.deacon-5wv7dgnIgG8, laurent.pinchart+renesas-ryLnwIuWjnjg/C1BVhZhaw Cc: linux-lFZ/pmaqli7XmaaqVzeoHQ, iommu-cunTk1MwBs9QetFLy7KEm3xJsTq8ys+cHZ5vskTnxNA With the correct DMA API calls now integrated into the io-pgtable code, let that handle the flushing of non-coherent page table updates. Signed-off-by: Robin Murphy <robin.murphy-5wv7dgnIgG8@public.gmane.org> --- drivers/iommu/arm-smmu-v3.c | 19 ++++--------------- 1 file changed, 4 insertions(+), 15 deletions(-) diff --git a/drivers/iommu/arm-smmu-v3.c b/drivers/iommu/arm-smmu-v3.c index 8e9ec81..caa01a6 100644 --- a/drivers/iommu/arm-smmu-v3.c +++ b/drivers/iommu/arm-smmu-v3.c @@ -1304,23 +1304,10 @@ static void arm_smmu_tlb_inv_range_nosync(unsigned long iova, size_t size, static void arm_smmu_flush_pgtable(void *addr, size_t size, void *cookie) { struct arm_smmu_domain *smmu_domain = cookie; - struct arm_smmu_device *smmu = smmu_domain->smmu; - unsigned long offset = (unsigned long)addr & ~PAGE_MASK; - if (smmu->features & ARM_SMMU_FEAT_COHERENCY) { + /* The page table code handles flushing in the non-coherent case */ + if (smmu_domain->smmu->features & ARM_SMMU_FEAT_COHERENCY) dsb(ishst); - } else { - dma_addr_t dma_addr; - struct device *dev = smmu->dev; - - dma_addr = dma_map_page(dev, virt_to_page(addr), offset, size, - DMA_TO_DEVICE); - - if (dma_mapping_error(dev, dma_addr)) - dev_err(dev, "failed to flush pgtable at %p\n", addr); - else - dma_unmap_page(dev, dma_addr, size, DMA_TO_DEVICE); - } } static struct iommu_gather_ops arm_smmu_gather_ops = { @@ -1503,6 +1490,8 @@ static int arm_smmu_domain_finalise(struct iommu_domain *domain) .oas = oas, .tlb = &arm_smmu_gather_ops, }; + if (!(smmu->features & ARM_SMMU_FEAT_COHERENCY)) + pgtbl_cfg.iommu_dev = smmu->dev; pgtbl_ops = alloc_io_pgtable_ops(fmt, &pgtbl_cfg, smmu_domain); if (!pgtbl_ops) -- 1.9.1 ^ permalink raw reply related [flat|nested] 13+ messages in thread
* [PATCH 5/5] iommu/ipmmu-vmsa: Clean up DMA API usage [not found] ` <c8cc82fea26044e393ff857014d4146faac5ec1e.1438019069.git.robin.murphy-5wv7dgnIgG8@public.gmane.org> ` (2 preceding siblings ...) 2015-07-27 18:18 ` [PATCH 4/5] iommu/arm-smmu-v3: " Robin Murphy @ 2015-07-27 18:18 ` Robin Murphy 2015-07-28 10:17 ` [PATCH 1/5] iommu/io-pgtable-arm: Allow appropriate DMA API use Will Deacon 4 siblings, 0 replies; 13+ messages in thread From: Robin Murphy @ 2015-07-27 18:18 UTC (permalink / raw) To: will.deacon-5wv7dgnIgG8, laurent.pinchart+renesas-ryLnwIuWjnjg/C1BVhZhaw Cc: linux-lFZ/pmaqli7XmaaqVzeoHQ, iommu-cunTk1MwBs9QetFLy7KEm3xJsTq8ys+cHZ5vskTnxNA With the correct DMA API calls now integrated into the io-pgtable code, let that handle the flushing of non-coherent page table updates. Signed-off-by: Robin Murphy <robin.murphy-5wv7dgnIgG8@public.gmane.org> --- drivers/iommu/ipmmu-vmsa.c | 19 +++++-------------- 1 file changed, 5 insertions(+), 14 deletions(-) diff --git a/drivers/iommu/ipmmu-vmsa.c b/drivers/iommu/ipmmu-vmsa.c index 1a67c53..8cf605f 100644 --- a/drivers/iommu/ipmmu-vmsa.c +++ b/drivers/iommu/ipmmu-vmsa.c @@ -283,24 +283,10 @@ static void ipmmu_tlb_add_flush(unsigned long iova, size_t size, bool leaf, /* The hardware doesn't support selective TLB flush. */ } -static void ipmmu_flush_pgtable(void *ptr, size_t size, void *cookie) -{ - unsigned long offset = (unsigned long)ptr & ~PAGE_MASK; - struct ipmmu_vmsa_domain *domain = cookie; - - /* - * TODO: Add support for coherent walk through CCI with DVM and remove - * cache handling. - */ - dma_map_page(domain->mmu->dev, virt_to_page(ptr), offset, size, - DMA_TO_DEVICE); -} - static struct iommu_gather_ops ipmmu_gather_ops = { .tlb_flush_all = ipmmu_tlb_flush_all, .tlb_add_flush = ipmmu_tlb_add_flush, .tlb_sync = ipmmu_tlb_flush_all, - .flush_pgtable = ipmmu_flush_pgtable, }; /* ----------------------------------------------------------------------------- @@ -327,6 +313,11 @@ static int ipmmu_domain_init_context(struct ipmmu_vmsa_domain *domain) domain->cfg.ias = 32; domain->cfg.oas = 40; domain->cfg.tlb = &ipmmu_gather_ops; + /* + * TODO: Add support for coherent walk through CCI with DVM and remove + * cache handling. For now, delegate it to the io-pgtable code. + */ + domain->cfg.iommu_dev = domain->mmu->dev; domain->iop = alloc_io_pgtable_ops(ARM_32_LPAE_S1, &domain->cfg, domain); -- 1.9.1 ^ permalink raw reply related [flat|nested] 13+ messages in thread
* Re: [PATCH 1/5] iommu/io-pgtable-arm: Allow appropriate DMA API use [not found] ` <c8cc82fea26044e393ff857014d4146faac5ec1e.1438019069.git.robin.murphy-5wv7dgnIgG8@public.gmane.org> ` (3 preceding siblings ...) 2015-07-27 18:18 ` [PATCH 5/5] iommu/ipmmu-vmsa: " Robin Murphy @ 2015-07-28 10:17 ` Will Deacon [not found] ` <20150728101722.GF29209-5wv7dgnIgG8@public.gmane.org> 4 siblings, 1 reply; 13+ messages in thread From: Will Deacon @ 2015-07-28 10:17 UTC (permalink / raw) To: Robin Murphy Cc: laurent.pinchart+renesas-ryLnwIuWjnjg/C1BVhZhaw@public.gmane.org, linux-lFZ/pmaqli7XmaaqVzeoHQ@public.gmane.org, iommu-cunTk1MwBs9QetFLy7KEm3xJsTq8ys+cHZ5vskTnxNA@public.gmane.org Hi Robin, On Mon, Jul 27, 2015 at 07:18:08PM +0100, Robin Murphy wrote: > Currently, users of the LPAE page table code are (ab)using dma_map_page() > as a means to flush page table updates for non-coherent IOMMUs. Since > from the CPU's point of view, creating IOMMU page tables *is* passing > DMA buffers to a device (the IOMMU's page table walker), there's little > reason not to use the DMA API correctly. > > Allow drivers to opt into appropriate DMA operations for page table > allocation and updates by providing the relevant device, and make the > flush_pgtable() callback optional in case those DMA API operations are > sufficient. The expectation is that an LPAE IOMMU should have a full view > of system memory, so use streaming mappings to avoid unnecessary pressure > on ZONE_DMA, and treat any DMA translation as a warning sign. > > Signed-off-by: Robin Murphy <robin.murphy-5wv7dgnIgG8@public.gmane.org> > --- > > Hi all, > > Since Russell fixing Tegra[1] reminded me, I dug this out from, er, > rather a long time ago[2] and tidied it up. I've tested the SMMUv2 > version with the MMU-401s on Juno (both coherent and non-coherent) > with no visible regressions; I have the same hope for the SMMUv3 and > IPMMU changes since they should be semantically identical. At worst > the Renesas driver might need a larger DMA mask setting as per > f1d84548694f, but given that there shouldn't be any highmem involved > I'd think it should be OK as-is. > > Robin. > > [1]:http://article.gmane.org/gmane.linux.ports.tegra/23150 > [2]:http://article.gmane.org/gmane.linux.kernel.iommu/8972 > > drivers/iommu/io-pgtable-arm.c | 107 +++++++++++++++++++++++++++++++---------- > drivers/iommu/io-pgtable.h | 2 + > 2 files changed, 84 insertions(+), 25 deletions(-) > > diff --git a/drivers/iommu/io-pgtable-arm.c b/drivers/iommu/io-pgtable-arm.c > index 4e46021..b93a60e 100644 > --- a/drivers/iommu/io-pgtable-arm.c > +++ b/drivers/iommu/io-pgtable-arm.c > @@ -200,12 +200,76 @@ typedef u64 arm_lpae_iopte; > > static bool selftest_running = false; > > +static dma_addr_t __arm_lpae_dma(struct device *dev, void *pages) > +{ > + return phys_to_dma(dev, virt_to_phys(pages)); > +} > + > +static void *__arm_lpae_alloc_pages(size_t size, gfp_t gfp, > + struct io_pgtable_cfg *cfg, void *cookie) > +{ > + void *pages = alloc_pages_exact(size, gfp | __GFP_ZERO); > + struct device *dev = cfg->iommu_dev; > + dma_addr_t dma; > + > + if (!pages) > + return NULL; Missing newline here. > + if (dev) { > + dma = dma_map_single(dev, pages, size, DMA_TO_DEVICE); > + if (dma_mapping_error(dev, dma)) > + goto out_free; > + /* > + * We depend on the IOMMU being able to work with any physical > + * address directly, so if the DMA layer suggests it can't by > + * giving us back some translation, that bodes very badly... > + */ > + if (WARN(dma != __arm_lpae_dma(dev, pages), > + "Cannot accommodate DMA translation for IOMMU page tables\n")) Now that we have a struct device for the iommu, we could use dev_err to make this diagnostic more useful. > + goto out_unmap; > + } Missing newline again... > + if (cfg->tlb->flush_pgtable) Why would you have both a dev and a flush callback? In which cases is the DMA API insufficient? > + cfg->tlb->flush_pgtable(pages, size, cookie); ... and here (yeah, pedantry, but consistency keeps this file easier to read). > + return pages; > + > +out_unmap: > + dma_unmap_single(dev, dma, size, DMA_TO_DEVICE); > +out_free: > + free_pages_exact(pages, size); > + return NULL; > +} > + > +static void __arm_lpae_free_pages(void *pages, size_t size, > + struct io_pgtable_cfg *cfg) > +{ > + struct device *dev = cfg->iommu_dev; > + > + if (dev) > + dma_unmap_single(dev, __arm_lpae_dma(dev, pages), > + size, DMA_TO_DEVICE); > + free_pages_exact(pages, size); > +} > + > +static void __arm_lpae_set_pte(arm_lpae_iopte *ptep, arm_lpae_iopte pte, > + struct io_pgtable_cfg *cfg, void *cookie) > +{ > + struct device *dev = cfg->iommu_dev; > + > + *ptep = pte; > + > + if (dev) > + dma_sync_single_for_device(dev, __arm_lpae_dma(dev, ptep), > + sizeof(pte), DMA_TO_DEVICE); > + if (cfg->tlb->flush_pgtable) > + cfg->tlb->flush_pgtable(ptep, sizeof(pte), cookie); Could we kill the flush_pgtable callback completely and just stick in a dma_wmb() here? Ideally, we've have something like dma_store_release, which we could use to set the parent page table entry, but that's left as a future exercise ;) > diff --git a/drivers/iommu/io-pgtable.h b/drivers/iommu/io-pgtable.h > index 10e32f6..39fe864 100644 > --- a/drivers/iommu/io-pgtable.h > +++ b/drivers/iommu/io-pgtable.h > @@ -41,6 +41,7 @@ struct iommu_gather_ops { > * @ias: Input address (iova) size, in bits. > * @oas: Output address (paddr) size, in bits. > * @tlb: TLB management callbacks for this set of tables. > + * @iommu_dev: The owner of the page table memory (for DMA purposes). > */ > struct io_pgtable_cfg { > #define IO_PGTABLE_QUIRK_ARM_NS (1 << 0) /* Set NS bit in PTEs */ > @@ -49,6 +50,7 @@ struct io_pgtable_cfg { > unsigned int ias; > unsigned int oas; > const struct iommu_gather_ops *tlb; > + struct device *iommu_dev; I think we should also update the comments for iommu_gather_ops once we decide on the fate of flush_pgtable. Will ^ permalink raw reply [flat|nested] 13+ messages in thread
[parent not found: <20150728101722.GF29209-5wv7dgnIgG8@public.gmane.org>]
* Re: [PATCH 1/5] iommu/io-pgtable-arm: Allow appropriate DMA API use [not found] ` <20150728101722.GF29209-5wv7dgnIgG8@public.gmane.org> @ 2015-07-28 15:39 ` Robin Murphy [not found] ` <55B7A22B.6010608-5wv7dgnIgG8@public.gmane.org> 0 siblings, 1 reply; 13+ messages in thread From: Robin Murphy @ 2015-07-28 15:39 UTC (permalink / raw) To: Will Deacon Cc: laurent.pinchart+renesas-ryLnwIuWjnjg/C1BVhZhaw@public.gmane.org, linux-lFZ/pmaqli7XmaaqVzeoHQ@public.gmane.org, iommu-cunTk1MwBs9QetFLy7KEm3xJsTq8ys+cHZ5vskTnxNA@public.gmane.org On 28/07/15 11:17, Will Deacon wrote: [...] >> diff --git a/drivers/iommu/io-pgtable-arm.c b/drivers/iommu/io-pgtable-arm.c >> index 4e46021..b93a60e 100644 >> --- a/drivers/iommu/io-pgtable-arm.c >> +++ b/drivers/iommu/io-pgtable-arm.c >> @@ -200,12 +200,76 @@ typedef u64 arm_lpae_iopte; >> >> static bool selftest_running = false; >> >> +static dma_addr_t __arm_lpae_dma(struct device *dev, void *pages) >> +{ >> + return phys_to_dma(dev, virt_to_phys(pages)); >> +} >> + >> +static void *__arm_lpae_alloc_pages(size_t size, gfp_t gfp, >> + struct io_pgtable_cfg *cfg, void *cookie) >> +{ >> + void *pages = alloc_pages_exact(size, gfp | __GFP_ZERO); >> + struct device *dev = cfg->iommu_dev; >> + dma_addr_t dma; >> + >> + if (!pages) >> + return NULL; > > Missing newline here. Coding style flamewar, go! Or not, since I have neither the energy or inclination, so I'll go with the status quo. (Here we come and here we come and here we go etc.) >> + if (dev) { >> + dma = dma_map_single(dev, pages, size, DMA_TO_DEVICE); >> + if (dma_mapping_error(dev, dma)) >> + goto out_free; >> + /* >> + * We depend on the IOMMU being able to work with any physical >> + * address directly, so if the DMA layer suggests it can't by >> + * giving us back some translation, that bodes very badly... >> + */ >> + if (WARN(dma != __arm_lpae_dma(dev, pages), >> + "Cannot accommodate DMA translation for IOMMU page tables\n")) > > Now that we have a struct device for the iommu, we could use dev_err to make > this diagnostic more useful. Good point. >> + goto out_unmap; >> + } > > Missing newline again... > >> + if (cfg->tlb->flush_pgtable) > > Why would you have both a dev and a flush callback? In which cases is the > DMA API insufficient? a) Bisectability given existing users. b) When the device is coherent, the DMA API stuff should be a nop even if a device was provided, therefore some other synchronisation is needed. >> + cfg->tlb->flush_pgtable(pages, size, cookie); > > ... and here (yeah, pedantry, but consistency keeps this file easier to > read). > >> + return pages; >> + >> +out_unmap: >> + dma_unmap_single(dev, dma, size, DMA_TO_DEVICE); >> +out_free: >> + free_pages_exact(pages, size); >> + return NULL; >> +} >> + >> +static void __arm_lpae_free_pages(void *pages, size_t size, >> + struct io_pgtable_cfg *cfg) >> +{ >> + struct device *dev = cfg->iommu_dev; >> + >> + if (dev) >> + dma_unmap_single(dev, __arm_lpae_dma(dev, pages), >> + size, DMA_TO_DEVICE); >> + free_pages_exact(pages, size); >> +} >> + >> +static void __arm_lpae_set_pte(arm_lpae_iopte *ptep, arm_lpae_iopte pte, >> + struct io_pgtable_cfg *cfg, void *cookie) >> +{ >> + struct device *dev = cfg->iommu_dev; >> + >> + *ptep = pte; >> + >> + if (dev) >> + dma_sync_single_for_device(dev, __arm_lpae_dma(dev, ptep), >> + sizeof(pte), DMA_TO_DEVICE); >> + if (cfg->tlb->flush_pgtable) >> + cfg->tlb->flush_pgtable(ptep, sizeof(pte), cookie); > > Could we kill the flush_pgtable callback completely and just stick in a > dma_wmb() here? Ideally, we've have something like dma_store_release, > which we could use to set the parent page table entry, but that's left > as a future exercise ;) I couldn't convince myself that there wouldn't never be some weird case where an IOMMU driver still needs to do something funky for a coherent device, so I left it in. Given that the existing implementations are either dsb or nothing, however, I agree it may be worth taking out now and only bringing back later if proven necessary. I would think we'd need at least wmb() though, since dma_wmb() only gives us a dmb on arm64; if the PTE is going from invalid to valid (i.e. no TLB involved), we'd have the normal cacheable write of the PTE potentially racing with an MMIO write after we return (the "do DMA with this address" command to the master) and I think we might need a dsb to avoid that - if the PTE write hasn't got far enough for the IOMMU to snoop it, the walk hits the stale invalid entry, aborts the incoming transaction and kills the device. >> diff --git a/drivers/iommu/io-pgtable.h b/drivers/iommu/io-pgtable.h >> index 10e32f6..39fe864 100644 >> --- a/drivers/iommu/io-pgtable.h >> +++ b/drivers/iommu/io-pgtable.h >> @@ -41,6 +41,7 @@ struct iommu_gather_ops { >> * @ias: Input address (iova) size, in bits. >> * @oas: Output address (paddr) size, in bits. >> * @tlb: TLB management callbacks for this set of tables. >> + * @iommu_dev: The owner of the page table memory (for DMA purposes). >> */ >> struct io_pgtable_cfg { >> #define IO_PGTABLE_QUIRK_ARM_NS (1 << 0) /* Set NS bit in PTEs */ >> @@ -49,6 +50,7 @@ struct io_pgtable_cfg { >> unsigned int ias; >> unsigned int oas; >> const struct iommu_gather_ops *tlb; >> + struct device *iommu_dev; > > I think we should also update the comments for iommu_gather_ops once > we decide on the fate of flush_pgtable. Agreed. I'm thinking I can add an extra patch to the end of the series removing it after the driver conversion patches. Robin. ^ permalink raw reply [flat|nested] 13+ messages in thread
[parent not found: <55B7A22B.6010608-5wv7dgnIgG8@public.gmane.org>]
* Re: [PATCH 1/5] iommu/io-pgtable-arm: Allow appropriate DMA API use [not found] ` <55B7A22B.6010608-5wv7dgnIgG8@public.gmane.org> @ 2015-07-28 15:52 ` Will Deacon 0 siblings, 0 replies; 13+ messages in thread From: Will Deacon @ 2015-07-28 15:52 UTC (permalink / raw) To: Robin Murphy Cc: laurent.pinchart+renesas-ryLnwIuWjnjg/C1BVhZhaw@public.gmane.org, linux-lFZ/pmaqli7XmaaqVzeoHQ@public.gmane.org, iommu-cunTk1MwBs9QetFLy7KEm3xJsTq8ys+cHZ5vskTnxNA@public.gmane.org On Tue, Jul 28, 2015 at 04:39:23PM +0100, Robin Murphy wrote: > On 28/07/15 11:17, Will Deacon wrote: > >> + if (cfg->tlb->flush_pgtable) > > > > Why would you have both a dev and a flush callback? In which cases is the > > DMA API insufficient? > > a) Bisectability given existing users. You could still make it an if (dev) .. else if (flush_pgtable) ... though. Then we could put the wmb() in the if () clause after the DMA-api calls and remove the conditional once everybody has been ported over. > >> +static void __arm_lpae_set_pte(arm_lpae_iopte *ptep, arm_lpae_iopte pte, > >> + struct io_pgtable_cfg *cfg, void *cookie) > >> +{ > >> + struct device *dev = cfg->iommu_dev; > >> + > >> + *ptep = pte; > >> + > >> + if (dev) > >> + dma_sync_single_for_device(dev, __arm_lpae_dma(dev, ptep), > >> + sizeof(pte), DMA_TO_DEVICE); > >> + if (cfg->tlb->flush_pgtable) > >> + cfg->tlb->flush_pgtable(ptep, sizeof(pte), cookie); > > > > Could we kill the flush_pgtable callback completely and just stick in a > > dma_wmb() here? Ideally, we've have something like dma_store_release, > > which we could use to set the parent page table entry, but that's left > > as a future exercise ;) > > I couldn't convince myself that there wouldn't never be some weird case > where an IOMMU driver still needs to do something funky for a coherent > device, so I left it in. Given that the existing implementations are > either dsb or nothing, however, I agree it may be worth taking out now > and only bringing back later if proven necessary. Yeah, let's keep it as simple as we can and avoid giving people callbacks unless they actually need them. > I would think we'd need at least wmb() though, since dma_wmb() only > gives us a dmb on arm64; if the PTE is going from invalid to valid (i.e. > no TLB involved), we'd have the normal cacheable write of the PTE > potentially racing with an MMIO write after we return (the "do DMA with > this address" command to the master) and I think we might need a dsb to > avoid that - if the PTE write hasn't got far enough for the IOMMU to > snoop it, the walk hits the stale invalid entry, aborts the incoming > transaction and kills the device. Yes, you're right. I was in a CPU-centric mindset and forgot that we can't deal with transient faults when going from invalid -> valid for a device mapping. Will ^ permalink raw reply [flat|nested] 13+ messages in thread
end of thread, other threads:[~2015-07-28 16:06 UTC | newest]
Thread overview: 13+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2015-07-27 18:18 [PATCH 1/5] iommu/io-pgtable-arm: Allow appropriate DMA API use Robin Murphy
[not found] ` <c8cc82fea26044e393ff857014d4146faac5ec1e.1438019069.git.robin.murphy-5wv7dgnIgG8@public.gmane.org>
2015-07-27 18:18 ` [PATCH 2/5] iommu/arm-smmu: Sort out coherency Robin Murphy
[not found] ` <4d5880f2131854bc59107ccc917993136e511341.1438019069.git.robin.murphy-5wv7dgnIgG8@public.gmane.org>
2015-07-28 9:43 ` Will Deacon
[not found] ` <20150728094349.GE29209-5wv7dgnIgG8@public.gmane.org>
2015-07-28 15:17 ` Robin Murphy
2015-07-27 18:18 ` [PATCH 3/5] iommu/arm-smmu: Clean up DMA API usage Robin Murphy
[not found] ` <921980a38ec7d35610c68e8e94235c55318ba80c.1438019069.git.robin.murphy-5wv7dgnIgG8@public.gmane.org>
2015-07-28 10:18 ` Will Deacon
[not found] ` <20150728101810.GG29209-5wv7dgnIgG8@public.gmane.org>
2015-07-28 15:48 ` Robin Murphy
[not found] ` <55B7A439.3000007-5wv7dgnIgG8@public.gmane.org>
2015-07-28 16:06 ` Will Deacon
2015-07-27 18:18 ` [PATCH 4/5] iommu/arm-smmu-v3: " Robin Murphy
2015-07-27 18:18 ` [PATCH 5/5] iommu/ipmmu-vmsa: " Robin Murphy
2015-07-28 10:17 ` [PATCH 1/5] iommu/io-pgtable-arm: Allow appropriate DMA API use Will Deacon
[not found] ` <20150728101722.GF29209-5wv7dgnIgG8@public.gmane.org>
2015-07-28 15:39 ` Robin Murphy
[not found] ` <55B7A22B.6010608-5wv7dgnIgG8@public.gmane.org>
2015-07-28 15:52 ` Will Deacon
This is a public inbox, see mirroring instructions for how to clone and mirror all data and code used for this inbox