* [RFC PATCH v1 01/15] iommu: Add DOMAIN_ATTR_SPLIT_TABLES
2019-03-01 19:38 ` Jordan Crouse
@ 2019-03-01 19:38 ` Jordan Crouse
-1 siblings, 0 replies; 46+ messages in thread
From: Jordan Crouse @ 2019-03-01 19:38 UTC (permalink / raw)
To: freedreno-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW
Cc: jean-philippe.brucker-5wv7dgnIgG8,
linux-arm-msm-u79uwXL29TY76Z2rM5mHXA, Joerg Roedel,
linux-kernel-u79uwXL29TY76Z2rM5mHXA,
iommu-cunTk1MwBs9QetFLy7KEm3xJsTq8ys+cHZ5vskTnxNA,
dianders-uWgjrcJnOmJ4cg9Nei1l7Q, hoegsberg-hpIqsD4AKlfQT0dZR+AlfA,
baolu.lu-VuQAYsv1563Yd54FQh9/CA
Add a new domain attribute to enable split pagetable support for devices
devices that support it.
Signed-off-by: Jordan Crouse <jcrouse@codeaurora.org>
---
include/linux/iommu.h | 1 +
1 file changed, 1 insertion(+)
diff --git a/include/linux/iommu.h b/include/linux/iommu.h
index e90da6b..3f2250b 100644
--- a/include/linux/iommu.h
+++ b/include/linux/iommu.h
@@ -125,6 +125,7 @@ enum iommu_attr {
DOMAIN_ATTR_FSL_PAMUV1,
DOMAIN_ATTR_NESTING, /* two stages of translation */
DOMAIN_ATTR_DMA_USE_FLUSH_QUEUE,
+ DOMAIN_ATTR_SPLIT_TABLES,
DOMAIN_ATTR_MAX,
};
--
2.7.4
_______________________________________________
Freedreno mailing list
Freedreno@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/freedreno
^ permalink raw reply related [flat|nested] 46+ messages in thread* [RFC PATCH v1 01/15] iommu: Add DOMAIN_ATTR_SPLIT_TABLES
@ 2019-03-01 19:38 ` Jordan Crouse
0 siblings, 0 replies; 46+ messages in thread
From: Jordan Crouse @ 2019-03-01 19:38 UTC (permalink / raw)
To: freedreno
Cc: jean-philippe.brucker, linux-arm-msm, dianders, hoegsberg,
baolu.lu, iommu, Joerg Roedel, linux-kernel
Add a new domain attribute to enable split pagetable support for devices
devices that support it.
Signed-off-by: Jordan Crouse <jcrouse@codeaurora.org>
---
include/linux/iommu.h | 1 +
1 file changed, 1 insertion(+)
diff --git a/include/linux/iommu.h b/include/linux/iommu.h
index e90da6b..3f2250b 100644
--- a/include/linux/iommu.h
+++ b/include/linux/iommu.h
@@ -125,6 +125,7 @@ enum iommu_attr {
DOMAIN_ATTR_FSL_PAMUV1,
DOMAIN_ATTR_NESTING, /* two stages of translation */
DOMAIN_ATTR_DMA_USE_FLUSH_QUEUE,
+ DOMAIN_ATTR_SPLIT_TABLES,
DOMAIN_ATTR_MAX,
};
--
2.7.4
^ permalink raw reply related [flat|nested] 46+ messages in thread
* [RFC PATCH v1 02/15] iommu/arm-smmu: Add split pagetable support for arm-smmu-v2
2019-03-01 19:38 ` Jordan Crouse
(?)
@ 2019-03-01 19:38 ` Jordan Crouse
-1 siblings, 0 replies; 46+ messages in thread
From: Jordan Crouse @ 2019-03-01 19:38 UTC (permalink / raw)
To: freedreno-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW
Cc: jean-philippe.brucker-5wv7dgnIgG8,
linux-arm-msm-u79uwXL29TY76Z2rM5mHXA, Joerg Roedel, Will Deacon,
linux-kernel-u79uwXL29TY76Z2rM5mHXA,
iommu-cunTk1MwBs9QetFLy7KEm3xJsTq8ys+cHZ5vskTnxNA,
dianders-uWgjrcJnOmJ4cg9Nei1l7Q, hoegsberg-hpIqsD4AKlfQT0dZR+AlfA,
Robin Murphy, linux-arm-kernel-IAPFreCvJWM7uuMidbF8XUB+6BGkLq7r,
baolu.lu-VuQAYsv1563Yd54FQh9/CA
Add support for a split pagetable (TTBR0/TTBR1) scheme for
arm-smmu-v2. If split pagetables are enabled, create a
pagetable for TTBR1 and set up the sign extension bit so
that all IOVAs with that bit set are mapped and translated
from the TTBR1 pagetable.
Signed-off-by: Jordan Crouse <jcrouse@codeaurora.org>
---
drivers/iommu/arm-smmu-regs.h | 18 +++++
drivers/iommu/arm-smmu.c | 149 +++++++++++++++++++++++++++++++++++++----
drivers/iommu/io-pgtable-arm.c | 3 +-
3 files changed, 154 insertions(+), 16 deletions(-)
diff --git a/drivers/iommu/arm-smmu-regs.h b/drivers/iommu/arm-smmu-regs.h
index a1226e4..56f9709 100644
--- a/drivers/iommu/arm-smmu-regs.h
+++ b/drivers/iommu/arm-smmu-regs.h
@@ -193,7 +193,25 @@ enum arm_smmu_s2cr_privcfg {
#define RESUME_RETRY (0 << 0)
#define RESUME_TERMINATE (1 << 0)
+#define TTBCR_EPD1 (1 << 23)
+#define TTBCR_T1SZ_SHIFT 16
+#define TTBCR_IRGN1_SHIFT 24
+#define TTBCR_ORGN1_SHIFT 26
+#define TTBCR_RGN_WBWA 1
+#define TTBCR_SH1_SHIFT 28
+#define TTBCR_SH_IS 3
+
+#define TTBCR_TG1_16K (1 << 30)
+#define TTBCR_TG1_4K (2 << 30)
+#define TTBCR_TG1_64K (3 << 30)
+
#define TTBCR2_SEP_SHIFT 15
+#define TTBCR2_SEP_31 (0x0 << TTBCR2_SEP_SHIFT)
+#define TTBCR2_SEP_35 (0x1 << TTBCR2_SEP_SHIFT)
+#define TTBCR2_SEP_39 (0x2 << TTBCR2_SEP_SHIFT)
+#define TTBCR2_SEP_41 (0x3 << TTBCR2_SEP_SHIFT)
+#define TTBCR2_SEP_43 (0x4 << TTBCR2_SEP_SHIFT)
+#define TTBCR2_SEP_47 (0x5 << TTBCR2_SEP_SHIFT)
#define TTBCR2_SEP_UPSTREAM (0x7 << TTBCR2_SEP_SHIFT)
#define TTBCR2_AS (1 << 4)
diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c
index af18a7e..05eb126 100644
--- a/drivers/iommu/arm-smmu.c
+++ b/drivers/iommu/arm-smmu.c
@@ -151,6 +151,7 @@ struct arm_smmu_cb {
u32 tcr[2];
u32 mair[2];
struct arm_smmu_cfg *cfg;
+ u64 split_table_mask;
};
struct arm_smmu_master_cfg {
@@ -208,6 +209,7 @@ struct arm_smmu_device {
unsigned long va_size;
unsigned long ipa_size;
unsigned long pa_size;
+ unsigned long ubs_size;
unsigned long pgsize_bitmap;
u32 num_global_irqs;
@@ -252,13 +254,14 @@ enum arm_smmu_domain_stage {
struct arm_smmu_domain {
struct arm_smmu_device *smmu;
- struct io_pgtable_ops *pgtbl_ops;
+ struct io_pgtable_ops *pgtbl_ops[2];
const struct iommu_gather_ops *tlb_ops;
struct arm_smmu_cfg cfg;
enum arm_smmu_domain_stage stage;
bool non_strict;
struct mutex init_mutex; /* Protects smmu pointer */
spinlock_t cb_lock; /* Serialises ATS1* ops and TLB syncs */
+ u32 attributes;
struct iommu_domain domain;
};
@@ -618,6 +621,69 @@ static irqreturn_t arm_smmu_global_fault(int irq, void *dev)
return IRQ_HANDLED;
}
+static void arm_smmu_init_ttbr1(struct arm_smmu_domain *smmu_domain,
+ struct io_pgtable_cfg *pgtbl_cfg)
+{
+ struct arm_smmu_device *smmu = smmu_domain->smmu;
+ struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
+ struct arm_smmu_cb *cb = &smmu_domain->smmu->cbs[cfg->cbndx];
+ int pgsize = 1 << __ffs(pgtbl_cfg->pgsize_bitmap);
+
+ /* Enable speculative walks through the TTBR1 */
+ cb->tcr[0] &= ~TTBCR_EPD1;
+
+ cb->tcr[0] |= TTBCR_SH_IS << TTBCR_SH1_SHIFT;
+ cb->tcr[0] |= TTBCR_RGN_WBWA << TTBCR_IRGN1_SHIFT;
+ cb->tcr[0] |= TTBCR_RGN_WBWA << TTBCR_ORGN1_SHIFT;
+
+ switch (pgsize) {
+ case SZ_4K:
+ cb->tcr[0] |= TTBCR_TG1_4K;
+ break;
+ case SZ_16K:
+ cb->tcr[0] |= TTBCR_TG1_16K;
+ break;
+ case SZ_64K:
+ cb->tcr[0] |= TTBCR_TG1_64K;
+ break;
+ }
+
+ cb->tcr[0] |= (64ULL - smmu->va_size) << TTBCR_T1SZ_SHIFT;
+
+ /* Clear the existing SEP configuration */
+ cb->tcr[1] &= ~TTBCR2_SEP_UPSTREAM;
+
+ /* Set up the sign extend bit */
+ switch (smmu->va_size) {
+ case 32:
+ cb->tcr[1] |= TTBCR2_SEP_31;
+ cb->split_table_mask = (1ULL << 31);
+ break;
+ case 36:
+ cb->tcr[1] |= TTBCR2_SEP_35;
+ cb->split_table_mask = (1ULL << 35);
+ break;
+ case 40:
+ cb->tcr[1] |= TTBCR2_SEP_39;
+ cb->split_table_mask = (1ULL << 39);
+ break;
+ case 42:
+ cb->tcr[1] |= TTBCR2_SEP_41;
+ cb->split_table_mask = (1ULL << 41);
+ break;
+ case 44:
+ cb->tcr[1] |= TTBCR2_SEP_43;
+ cb->split_table_mask = (1ULL << 43);
+ break;
+ case 48:
+ cb->tcr[1] |= TTBCR2_SEP_UPSTREAM;
+ cb->split_table_mask = (1ULL << 48);
+ }
+
+ cb->ttbr[1] = pgtbl_cfg->arm_lpae_s1_cfg.ttbr[0];
+ cb->ttbr[1] |= (u64)cfg->asid << TTBRn_ASID_SHIFT;
+}
+
static void arm_smmu_init_context_bank(struct arm_smmu_domain *smmu_domain,
struct io_pgtable_cfg *pgtbl_cfg)
{
@@ -650,8 +716,12 @@ static void arm_smmu_init_context_bank(struct arm_smmu_domain *smmu_domain,
} else {
cb->ttbr[0] = pgtbl_cfg->arm_lpae_s1_cfg.ttbr[0];
cb->ttbr[0] |= (u64)cfg->asid << TTBRn_ASID_SHIFT;
- cb->ttbr[1] = pgtbl_cfg->arm_lpae_s1_cfg.ttbr[1];
- cb->ttbr[1] |= (u64)cfg->asid << TTBRn_ASID_SHIFT;
+
+ /*
+ * Set TTBR1 to empty by default - it will get
+ * programmed later if it is enabled
+ */
+ cb->ttbr[1] = (u64)cfg->asid << TTBRn_ASID_SHIFT;
}
} else {
cb->ttbr[0] = pgtbl_cfg->arm_lpae_s2_cfg.vttbr;
@@ -760,11 +830,13 @@ static int arm_smmu_init_domain_context(struct iommu_domain *domain,
{
int irq, start, ret = 0;
unsigned long ias, oas;
- struct io_pgtable_ops *pgtbl_ops;
+ struct io_pgtable_ops *pgtbl_ops[2];
struct io_pgtable_cfg pgtbl_cfg;
enum io_pgtable_fmt fmt;
struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
+ bool split_tables =
+ (smmu_domain->attributes & (1 << DOMAIN_ATTR_SPLIT_TABLES));
mutex_lock(&smmu_domain->init_mutex);
if (smmu_domain->smmu)
@@ -794,8 +866,11 @@ static int arm_smmu_init_domain_context(struct iommu_domain *domain,
*
* Note that you can't actually request stage-2 mappings.
*/
- if (!(smmu->features & ARM_SMMU_FEAT_TRANS_S1))
+ if (!(smmu->features & ARM_SMMU_FEAT_TRANS_S1)) {
smmu_domain->stage = ARM_SMMU_DOMAIN_S2;
+ /* FIXME: fail instead? */
+ split_tables = false;
+ }
if (!(smmu->features & ARM_SMMU_FEAT_TRANS_S2))
smmu_domain->stage = ARM_SMMU_DOMAIN_S1;
@@ -812,8 +887,11 @@ static int arm_smmu_init_domain_context(struct iommu_domain *domain,
if (IS_ENABLED(CONFIG_IOMMU_IO_PGTABLE_ARMV7S) &&
!IS_ENABLED(CONFIG_64BIT) && !IS_ENABLED(CONFIG_ARM_LPAE) &&
(smmu->features & ARM_SMMU_FEAT_FMT_AARCH32_S) &&
- (smmu_domain->stage == ARM_SMMU_DOMAIN_S1))
+ (smmu_domain->stage == ARM_SMMU_DOMAIN_S1)) {
+ /* FIXME: fail instead? */
+ split_tables = false;
cfg->fmt = ARM_SMMU_CTX_FMT_AARCH32_S;
+ }
if ((IS_ENABLED(CONFIG_64BIT) || cfg->fmt == ARM_SMMU_CTX_FMT_NONE) &&
(smmu->features & (ARM_SMMU_FEAT_FMT_AARCH64_64K |
ARM_SMMU_FEAT_FMT_AARCH64_16K |
@@ -903,8 +981,8 @@ static int arm_smmu_init_domain_context(struct iommu_domain *domain,
pgtbl_cfg.quirks |= IO_PGTABLE_QUIRK_NON_STRICT;
smmu_domain->smmu = smmu;
- pgtbl_ops = alloc_io_pgtable_ops(fmt, &pgtbl_cfg, smmu_domain);
- if (!pgtbl_ops) {
+ pgtbl_ops[0] = alloc_io_pgtable_ops(fmt, &pgtbl_cfg, smmu_domain);
+ if (!pgtbl_ops[0]) {
ret = -ENOMEM;
goto out_clear_smmu;
}
@@ -916,6 +994,22 @@ static int arm_smmu_init_domain_context(struct iommu_domain *domain,
/* Initialise the context bank with our page table cfg */
arm_smmu_init_context_bank(smmu_domain, &pgtbl_cfg);
+
+ pgtbl_ops[1] = NULL;
+
+ if (split_tables) {
+ /* FIXME: I think it is safe to reuse pgtbl_cfg here */
+ pgtbl_ops[1] = alloc_io_pgtable_ops(fmt, &pgtbl_cfg,
+ smmu_domain);
+ if (!pgtbl_ops[1]) {
+ free_io_pgtable_ops(pgtbl_ops[0]);
+ ret = -ENOMEM;
+ goto out_clear_smmu;
+ }
+
+ arm_smmu_init_ttbr1(smmu_domain, &pgtbl_cfg);
+ }
+
arm_smmu_write_context_bank(smmu, cfg->cbndx);
/*
@@ -934,7 +1028,9 @@ static int arm_smmu_init_domain_context(struct iommu_domain *domain,
mutex_unlock(&smmu_domain->init_mutex);
/* Publish page table ops for map/unmap */
- smmu_domain->pgtbl_ops = pgtbl_ops;
+ smmu_domain->pgtbl_ops[0] = pgtbl_ops[0];
+ smmu_domain->pgtbl_ops[1] = pgtbl_ops[1];
+
return 0;
out_clear_smmu:
@@ -970,7 +1066,9 @@ static void arm_smmu_destroy_domain_context(struct iommu_domain *domain)
devm_free_irq(smmu->dev, irq, domain);
}
- free_io_pgtable_ops(smmu_domain->pgtbl_ops);
+ free_io_pgtable_ops(smmu_domain->pgtbl_ops[0]);
+ free_io_pgtable_ops(smmu_domain->pgtbl_ops[1]);
+
__arm_smmu_free_bitmap(smmu->context_map, cfg->cbndx);
arm_smmu_rpm_put(smmu);
@@ -1285,10 +1383,23 @@ static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev)
return ret;
}
+static struct io_pgtable_ops *
+arm_smmu_get_pgtbl_ops(struct iommu_domain *domain, unsigned long iova)
+{
+ struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
+ struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
+ struct arm_smmu_cb *cb = &smmu_domain->smmu->cbs[cfg->cbndx];
+
+ if (iova & cb->split_table_mask)
+ return smmu_domain->pgtbl_ops[1];
+
+ return smmu_domain->pgtbl_ops[0];
+}
+
static int arm_smmu_map(struct iommu_domain *domain, unsigned long iova,
phys_addr_t paddr, size_t size, int prot)
{
- struct io_pgtable_ops *ops = to_smmu_domain(domain)->pgtbl_ops;
+ struct io_pgtable_ops *ops = arm_smmu_get_pgtbl_ops(domain, iova);
struct arm_smmu_device *smmu = to_smmu_domain(domain)->smmu;
int ret;
@@ -1305,7 +1416,7 @@ static int arm_smmu_map(struct iommu_domain *domain, unsigned long iova,
static size_t arm_smmu_unmap(struct iommu_domain *domain, unsigned long iova,
size_t size)
{
- struct io_pgtable_ops *ops = to_smmu_domain(domain)->pgtbl_ops;
+ struct io_pgtable_ops *ops = arm_smmu_get_pgtbl_ops(domain, iova);
struct arm_smmu_device *smmu = to_smmu_domain(domain)->smmu;
size_t ret;
@@ -1349,7 +1460,7 @@ static phys_addr_t arm_smmu_iova_to_phys_hard(struct iommu_domain *domain,
struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
struct arm_smmu_device *smmu = smmu_domain->smmu;
struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
- struct io_pgtable_ops *ops= smmu_domain->pgtbl_ops;
+ struct io_pgtable_ops *ops = arm_smmu_get_pgtbl_ops(domain, iova);
struct device *dev = smmu->dev;
void __iomem *cb_base;
u32 tmp;
@@ -1397,7 +1508,7 @@ static phys_addr_t arm_smmu_iova_to_phys(struct iommu_domain *domain,
dma_addr_t iova)
{
struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
- struct io_pgtable_ops *ops = smmu_domain->pgtbl_ops;
+ struct io_pgtable_ops *ops = arm_smmu_get_pgtbl_ops(domain, iova);
if (domain->type == IOMMU_DOMAIN_IDENTITY)
return iova;
@@ -1584,6 +1695,11 @@ static int arm_smmu_domain_get_attr(struct iommu_domain *domain,
case DOMAIN_ATTR_NESTING:
*(int *)data = (smmu_domain->stage == ARM_SMMU_DOMAIN_NESTED);
return 0;
+ case DOMAIN_ATTR_SPLIT_TABLES:
+ *((int *)data) =
+ !!(smmu_domain->attributes &
+ (1 << DOMAIN_ATTR_SPLIT_TABLES));
+ return 0;
default:
return -ENODEV;
}
@@ -1624,6 +1740,11 @@ static int arm_smmu_domain_set_attr(struct iommu_domain *domain,
else
smmu_domain->stage = ARM_SMMU_DOMAIN_S1;
break;
+ case DOMAIN_ATTR_SPLIT_TABLES:
+ if (*((int *)data))
+ smmu_domain->attributes |=
+ (1 << DOMAIN_ATTR_SPLIT_TABLES);
+ break;
default:
ret = -ENODEV;
}
diff --git a/drivers/iommu/io-pgtable-arm.c b/drivers/iommu/io-pgtable-arm.c
index 237cacd..dc9fb2e 100644
--- a/drivers/iommu/io-pgtable-arm.c
+++ b/drivers/iommu/io-pgtable-arm.c
@@ -475,8 +475,7 @@ static int arm_lpae_map(struct io_pgtable_ops *ops, unsigned long iova,
if (!(iommu_prot & (IOMMU_READ | IOMMU_WRITE)))
return 0;
- if (WARN_ON(iova >= (1ULL << data->iop.cfg.ias) ||
- paddr >= (1ULL << data->iop.cfg.oas)))
+ if (WARN_ON(paddr >= (1ULL << data->iop.cfg.oas)))
return -ERANGE;
prot = arm_lpae_prot_to_pte(data, iommu_prot);
--
2.7.4
_______________________________________________
Freedreno mailing list
Freedreno@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/freedreno
^ permalink raw reply related [flat|nested] 46+ messages in thread* [RFC PATCH v1 02/15] iommu/arm-smmu: Add split pagetable support for arm-smmu-v2
@ 2019-03-01 19:38 ` Jordan Crouse
0 siblings, 0 replies; 46+ messages in thread
From: Jordan Crouse @ 2019-03-01 19:38 UTC (permalink / raw)
To: freedreno
Cc: jean-philippe.brucker, linux-arm-msm, dianders, hoegsberg,
baolu.lu, linux-kernel, iommu, Robin Murphy, Will Deacon,
Joerg Roedel, linux-arm-kernel
Add support for a split pagetable (TTBR0/TTBR1) scheme for
arm-smmu-v2. If split pagetables are enabled, create a
pagetable for TTBR1 and set up the sign extension bit so
that all IOVAs with that bit set are mapped and translated
from the TTBR1 pagetable.
Signed-off-by: Jordan Crouse <jcrouse@codeaurora.org>
---
drivers/iommu/arm-smmu-regs.h | 18 +++++
drivers/iommu/arm-smmu.c | 149 +++++++++++++++++++++++++++++++++++++----
drivers/iommu/io-pgtable-arm.c | 3 +-
3 files changed, 154 insertions(+), 16 deletions(-)
diff --git a/drivers/iommu/arm-smmu-regs.h b/drivers/iommu/arm-smmu-regs.h
index a1226e4..56f9709 100644
--- a/drivers/iommu/arm-smmu-regs.h
+++ b/drivers/iommu/arm-smmu-regs.h
@@ -193,7 +193,25 @@ enum arm_smmu_s2cr_privcfg {
#define RESUME_RETRY (0 << 0)
#define RESUME_TERMINATE (1 << 0)
+#define TTBCR_EPD1 (1 << 23)
+#define TTBCR_T1SZ_SHIFT 16
+#define TTBCR_IRGN1_SHIFT 24
+#define TTBCR_ORGN1_SHIFT 26
+#define TTBCR_RGN_WBWA 1
+#define TTBCR_SH1_SHIFT 28
+#define TTBCR_SH_IS 3
+
+#define TTBCR_TG1_16K (1 << 30)
+#define TTBCR_TG1_4K (2 << 30)
+#define TTBCR_TG1_64K (3 << 30)
+
#define TTBCR2_SEP_SHIFT 15
+#define TTBCR2_SEP_31 (0x0 << TTBCR2_SEP_SHIFT)
+#define TTBCR2_SEP_35 (0x1 << TTBCR2_SEP_SHIFT)
+#define TTBCR2_SEP_39 (0x2 << TTBCR2_SEP_SHIFT)
+#define TTBCR2_SEP_41 (0x3 << TTBCR2_SEP_SHIFT)
+#define TTBCR2_SEP_43 (0x4 << TTBCR2_SEP_SHIFT)
+#define TTBCR2_SEP_47 (0x5 << TTBCR2_SEP_SHIFT)
#define TTBCR2_SEP_UPSTREAM (0x7 << TTBCR2_SEP_SHIFT)
#define TTBCR2_AS (1 << 4)
diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c
index af18a7e..05eb126 100644
--- a/drivers/iommu/arm-smmu.c
+++ b/drivers/iommu/arm-smmu.c
@@ -151,6 +151,7 @@ struct arm_smmu_cb {
u32 tcr[2];
u32 mair[2];
struct arm_smmu_cfg *cfg;
+ u64 split_table_mask;
};
struct arm_smmu_master_cfg {
@@ -208,6 +209,7 @@ struct arm_smmu_device {
unsigned long va_size;
unsigned long ipa_size;
unsigned long pa_size;
+ unsigned long ubs_size;
unsigned long pgsize_bitmap;
u32 num_global_irqs;
@@ -252,13 +254,14 @@ enum arm_smmu_domain_stage {
struct arm_smmu_domain {
struct arm_smmu_device *smmu;
- struct io_pgtable_ops *pgtbl_ops;
+ struct io_pgtable_ops *pgtbl_ops[2];
const struct iommu_gather_ops *tlb_ops;
struct arm_smmu_cfg cfg;
enum arm_smmu_domain_stage stage;
bool non_strict;
struct mutex init_mutex; /* Protects smmu pointer */
spinlock_t cb_lock; /* Serialises ATS1* ops and TLB syncs */
+ u32 attributes;
struct iommu_domain domain;
};
@@ -618,6 +621,69 @@ static irqreturn_t arm_smmu_global_fault(int irq, void *dev)
return IRQ_HANDLED;
}
+static void arm_smmu_init_ttbr1(struct arm_smmu_domain *smmu_domain,
+ struct io_pgtable_cfg *pgtbl_cfg)
+{
+ struct arm_smmu_device *smmu = smmu_domain->smmu;
+ struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
+ struct arm_smmu_cb *cb = &smmu_domain->smmu->cbs[cfg->cbndx];
+ int pgsize = 1 << __ffs(pgtbl_cfg->pgsize_bitmap);
+
+ /* Enable speculative walks through the TTBR1 */
+ cb->tcr[0] &= ~TTBCR_EPD1;
+
+ cb->tcr[0] |= TTBCR_SH_IS << TTBCR_SH1_SHIFT;
+ cb->tcr[0] |= TTBCR_RGN_WBWA << TTBCR_IRGN1_SHIFT;
+ cb->tcr[0] |= TTBCR_RGN_WBWA << TTBCR_ORGN1_SHIFT;
+
+ switch (pgsize) {
+ case SZ_4K:
+ cb->tcr[0] |= TTBCR_TG1_4K;
+ break;
+ case SZ_16K:
+ cb->tcr[0] |= TTBCR_TG1_16K;
+ break;
+ case SZ_64K:
+ cb->tcr[0] |= TTBCR_TG1_64K;
+ break;
+ }
+
+ cb->tcr[0] |= (64ULL - smmu->va_size) << TTBCR_T1SZ_SHIFT;
+
+ /* Clear the existing SEP configuration */
+ cb->tcr[1] &= ~TTBCR2_SEP_UPSTREAM;
+
+ /* Set up the sign extend bit */
+ switch (smmu->va_size) {
+ case 32:
+ cb->tcr[1] |= TTBCR2_SEP_31;
+ cb->split_table_mask = (1ULL << 31);
+ break;
+ case 36:
+ cb->tcr[1] |= TTBCR2_SEP_35;
+ cb->split_table_mask = (1ULL << 35);
+ break;
+ case 40:
+ cb->tcr[1] |= TTBCR2_SEP_39;
+ cb->split_table_mask = (1ULL << 39);
+ break;
+ case 42:
+ cb->tcr[1] |= TTBCR2_SEP_41;
+ cb->split_table_mask = (1ULL << 41);
+ break;
+ case 44:
+ cb->tcr[1] |= TTBCR2_SEP_43;
+ cb->split_table_mask = (1ULL << 43);
+ break;
+ case 48:
+ cb->tcr[1] |= TTBCR2_SEP_UPSTREAM;
+ cb->split_table_mask = (1ULL << 48);
+ }
+
+ cb->ttbr[1] = pgtbl_cfg->arm_lpae_s1_cfg.ttbr[0];
+ cb->ttbr[1] |= (u64)cfg->asid << TTBRn_ASID_SHIFT;
+}
+
static void arm_smmu_init_context_bank(struct arm_smmu_domain *smmu_domain,
struct io_pgtable_cfg *pgtbl_cfg)
{
@@ -650,8 +716,12 @@ static void arm_smmu_init_context_bank(struct arm_smmu_domain *smmu_domain,
} else {
cb->ttbr[0] = pgtbl_cfg->arm_lpae_s1_cfg.ttbr[0];
cb->ttbr[0] |= (u64)cfg->asid << TTBRn_ASID_SHIFT;
- cb->ttbr[1] = pgtbl_cfg->arm_lpae_s1_cfg.ttbr[1];
- cb->ttbr[1] |= (u64)cfg->asid << TTBRn_ASID_SHIFT;
+
+ /*
+ * Set TTBR1 to empty by default - it will get
+ * programmed later if it is enabled
+ */
+ cb->ttbr[1] = (u64)cfg->asid << TTBRn_ASID_SHIFT;
}
} else {
cb->ttbr[0] = pgtbl_cfg->arm_lpae_s2_cfg.vttbr;
@@ -760,11 +830,13 @@ static int arm_smmu_init_domain_context(struct iommu_domain *domain,
{
int irq, start, ret = 0;
unsigned long ias, oas;
- struct io_pgtable_ops *pgtbl_ops;
+ struct io_pgtable_ops *pgtbl_ops[2];
struct io_pgtable_cfg pgtbl_cfg;
enum io_pgtable_fmt fmt;
struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
+ bool split_tables =
+ (smmu_domain->attributes & (1 << DOMAIN_ATTR_SPLIT_TABLES));
mutex_lock(&smmu_domain->init_mutex);
if (smmu_domain->smmu)
@@ -794,8 +866,11 @@ static int arm_smmu_init_domain_context(struct iommu_domain *domain,
*
* Note that you can't actually request stage-2 mappings.
*/
- if (!(smmu->features & ARM_SMMU_FEAT_TRANS_S1))
+ if (!(smmu->features & ARM_SMMU_FEAT_TRANS_S1)) {
smmu_domain->stage = ARM_SMMU_DOMAIN_S2;
+ /* FIXME: fail instead? */
+ split_tables = false;
+ }
if (!(smmu->features & ARM_SMMU_FEAT_TRANS_S2))
smmu_domain->stage = ARM_SMMU_DOMAIN_S1;
@@ -812,8 +887,11 @@ static int arm_smmu_init_domain_context(struct iommu_domain *domain,
if (IS_ENABLED(CONFIG_IOMMU_IO_PGTABLE_ARMV7S) &&
!IS_ENABLED(CONFIG_64BIT) && !IS_ENABLED(CONFIG_ARM_LPAE) &&
(smmu->features & ARM_SMMU_FEAT_FMT_AARCH32_S) &&
- (smmu_domain->stage == ARM_SMMU_DOMAIN_S1))
+ (smmu_domain->stage == ARM_SMMU_DOMAIN_S1)) {
+ /* FIXME: fail instead? */
+ split_tables = false;
cfg->fmt = ARM_SMMU_CTX_FMT_AARCH32_S;
+ }
if ((IS_ENABLED(CONFIG_64BIT) || cfg->fmt == ARM_SMMU_CTX_FMT_NONE) &&
(smmu->features & (ARM_SMMU_FEAT_FMT_AARCH64_64K |
ARM_SMMU_FEAT_FMT_AARCH64_16K |
@@ -903,8 +981,8 @@ static int arm_smmu_init_domain_context(struct iommu_domain *domain,
pgtbl_cfg.quirks |= IO_PGTABLE_QUIRK_NON_STRICT;
smmu_domain->smmu = smmu;
- pgtbl_ops = alloc_io_pgtable_ops(fmt, &pgtbl_cfg, smmu_domain);
- if (!pgtbl_ops) {
+ pgtbl_ops[0] = alloc_io_pgtable_ops(fmt, &pgtbl_cfg, smmu_domain);
+ if (!pgtbl_ops[0]) {
ret = -ENOMEM;
goto out_clear_smmu;
}
@@ -916,6 +994,22 @@ static int arm_smmu_init_domain_context(struct iommu_domain *domain,
/* Initialise the context bank with our page table cfg */
arm_smmu_init_context_bank(smmu_domain, &pgtbl_cfg);
+
+ pgtbl_ops[1] = NULL;
+
+ if (split_tables) {
+ /* FIXME: I think it is safe to reuse pgtbl_cfg here */
+ pgtbl_ops[1] = alloc_io_pgtable_ops(fmt, &pgtbl_cfg,
+ smmu_domain);
+ if (!pgtbl_ops[1]) {
+ free_io_pgtable_ops(pgtbl_ops[0]);
+ ret = -ENOMEM;
+ goto out_clear_smmu;
+ }
+
+ arm_smmu_init_ttbr1(smmu_domain, &pgtbl_cfg);
+ }
+
arm_smmu_write_context_bank(smmu, cfg->cbndx);
/*
@@ -934,7 +1028,9 @@ static int arm_smmu_init_domain_context(struct iommu_domain *domain,
mutex_unlock(&smmu_domain->init_mutex);
/* Publish page table ops for map/unmap */
- smmu_domain->pgtbl_ops = pgtbl_ops;
+ smmu_domain->pgtbl_ops[0] = pgtbl_ops[0];
+ smmu_domain->pgtbl_ops[1] = pgtbl_ops[1];
+
return 0;
out_clear_smmu:
@@ -970,7 +1066,9 @@ static void arm_smmu_destroy_domain_context(struct iommu_domain *domain)
devm_free_irq(smmu->dev, irq, domain);
}
- free_io_pgtable_ops(smmu_domain->pgtbl_ops);
+ free_io_pgtable_ops(smmu_domain->pgtbl_ops[0]);
+ free_io_pgtable_ops(smmu_domain->pgtbl_ops[1]);
+
__arm_smmu_free_bitmap(smmu->context_map, cfg->cbndx);
arm_smmu_rpm_put(smmu);
@@ -1285,10 +1383,23 @@ static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev)
return ret;
}
+static struct io_pgtable_ops *
+arm_smmu_get_pgtbl_ops(struct iommu_domain *domain, unsigned long iova)
+{
+ struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
+ struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
+ struct arm_smmu_cb *cb = &smmu_domain->smmu->cbs[cfg->cbndx];
+
+ if (iova & cb->split_table_mask)
+ return smmu_domain->pgtbl_ops[1];
+
+ return smmu_domain->pgtbl_ops[0];
+}
+
static int arm_smmu_map(struct iommu_domain *domain, unsigned long iova,
phys_addr_t paddr, size_t size, int prot)
{
- struct io_pgtable_ops *ops = to_smmu_domain(domain)->pgtbl_ops;
+ struct io_pgtable_ops *ops = arm_smmu_get_pgtbl_ops(domain, iova);
struct arm_smmu_device *smmu = to_smmu_domain(domain)->smmu;
int ret;
@@ -1305,7 +1416,7 @@ static int arm_smmu_map(struct iommu_domain *domain, unsigned long iova,
static size_t arm_smmu_unmap(struct iommu_domain *domain, unsigned long iova,
size_t size)
{
- struct io_pgtable_ops *ops = to_smmu_domain(domain)->pgtbl_ops;
+ struct io_pgtable_ops *ops = arm_smmu_get_pgtbl_ops(domain, iova);
struct arm_smmu_device *smmu = to_smmu_domain(domain)->smmu;
size_t ret;
@@ -1349,7 +1460,7 @@ static phys_addr_t arm_smmu_iova_to_phys_hard(struct iommu_domain *domain,
struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
struct arm_smmu_device *smmu = smmu_domain->smmu;
struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
- struct io_pgtable_ops *ops= smmu_domain->pgtbl_ops;
+ struct io_pgtable_ops *ops = arm_smmu_get_pgtbl_ops(domain, iova);
struct device *dev = smmu->dev;
void __iomem *cb_base;
u32 tmp;
@@ -1397,7 +1508,7 @@ static phys_addr_t arm_smmu_iova_to_phys(struct iommu_domain *domain,
dma_addr_t iova)
{
struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
- struct io_pgtable_ops *ops = smmu_domain->pgtbl_ops;
+ struct io_pgtable_ops *ops = arm_smmu_get_pgtbl_ops(domain, iova);
if (domain->type == IOMMU_DOMAIN_IDENTITY)
return iova;
@@ -1584,6 +1695,11 @@ static int arm_smmu_domain_get_attr(struct iommu_domain *domain,
case DOMAIN_ATTR_NESTING:
*(int *)data = (smmu_domain->stage == ARM_SMMU_DOMAIN_NESTED);
return 0;
+ case DOMAIN_ATTR_SPLIT_TABLES:
+ *((int *)data) =
+ !!(smmu_domain->attributes &
+ (1 << DOMAIN_ATTR_SPLIT_TABLES));
+ return 0;
default:
return -ENODEV;
}
@@ -1624,6 +1740,11 @@ static int arm_smmu_domain_set_attr(struct iommu_domain *domain,
else
smmu_domain->stage = ARM_SMMU_DOMAIN_S1;
break;
+ case DOMAIN_ATTR_SPLIT_TABLES:
+ if (*((int *)data))
+ smmu_domain->attributes |=
+ (1 << DOMAIN_ATTR_SPLIT_TABLES);
+ break;
default:
ret = -ENODEV;
}
diff --git a/drivers/iommu/io-pgtable-arm.c b/drivers/iommu/io-pgtable-arm.c
index 237cacd..dc9fb2e 100644
--- a/drivers/iommu/io-pgtable-arm.c
+++ b/drivers/iommu/io-pgtable-arm.c
@@ -475,8 +475,7 @@ static int arm_lpae_map(struct io_pgtable_ops *ops, unsigned long iova,
if (!(iommu_prot & (IOMMU_READ | IOMMU_WRITE)))
return 0;
- if (WARN_ON(iova >= (1ULL << data->iop.cfg.ias) ||
- paddr >= (1ULL << data->iop.cfg.oas)))
+ if (WARN_ON(paddr >= (1ULL << data->iop.cfg.oas)))
return -ERANGE;
prot = arm_lpae_prot_to_pte(data, iommu_prot);
--
2.7.4
^ permalink raw reply related [flat|nested] 46+ messages in thread* [RFC PATCH v1 02/15] iommu/arm-smmu: Add split pagetable support for arm-smmu-v2
@ 2019-03-01 19:38 ` Jordan Crouse
0 siblings, 0 replies; 46+ messages in thread
From: Jordan Crouse @ 2019-03-01 19:38 UTC (permalink / raw)
To: freedreno
Cc: jean-philippe.brucker, linux-arm-msm, Joerg Roedel, Will Deacon,
linux-kernel, iommu, dianders, hoegsberg, Robin Murphy,
linux-arm-kernel, baolu.lu
Add support for a split pagetable (TTBR0/TTBR1) scheme for
arm-smmu-v2. If split pagetables are enabled, create a
pagetable for TTBR1 and set up the sign extension bit so
that all IOVAs with that bit set are mapped and translated
from the TTBR1 pagetable.
Signed-off-by: Jordan Crouse <jcrouse@codeaurora.org>
---
drivers/iommu/arm-smmu-regs.h | 18 +++++
drivers/iommu/arm-smmu.c | 149 +++++++++++++++++++++++++++++++++++++----
drivers/iommu/io-pgtable-arm.c | 3 +-
3 files changed, 154 insertions(+), 16 deletions(-)
diff --git a/drivers/iommu/arm-smmu-regs.h b/drivers/iommu/arm-smmu-regs.h
index a1226e4..56f9709 100644
--- a/drivers/iommu/arm-smmu-regs.h
+++ b/drivers/iommu/arm-smmu-regs.h
@@ -193,7 +193,25 @@ enum arm_smmu_s2cr_privcfg {
#define RESUME_RETRY (0 << 0)
#define RESUME_TERMINATE (1 << 0)
+#define TTBCR_EPD1 (1 << 23)
+#define TTBCR_T1SZ_SHIFT 16
+#define TTBCR_IRGN1_SHIFT 24
+#define TTBCR_ORGN1_SHIFT 26
+#define TTBCR_RGN_WBWA 1
+#define TTBCR_SH1_SHIFT 28
+#define TTBCR_SH_IS 3
+
+#define TTBCR_TG1_16K (1 << 30)
+#define TTBCR_TG1_4K (2 << 30)
+#define TTBCR_TG1_64K (3 << 30)
+
#define TTBCR2_SEP_SHIFT 15
+#define TTBCR2_SEP_31 (0x0 << TTBCR2_SEP_SHIFT)
+#define TTBCR2_SEP_35 (0x1 << TTBCR2_SEP_SHIFT)
+#define TTBCR2_SEP_39 (0x2 << TTBCR2_SEP_SHIFT)
+#define TTBCR2_SEP_41 (0x3 << TTBCR2_SEP_SHIFT)
+#define TTBCR2_SEP_43 (0x4 << TTBCR2_SEP_SHIFT)
+#define TTBCR2_SEP_47 (0x5 << TTBCR2_SEP_SHIFT)
#define TTBCR2_SEP_UPSTREAM (0x7 << TTBCR2_SEP_SHIFT)
#define TTBCR2_AS (1 << 4)
diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c
index af18a7e..05eb126 100644
--- a/drivers/iommu/arm-smmu.c
+++ b/drivers/iommu/arm-smmu.c
@@ -151,6 +151,7 @@ struct arm_smmu_cb {
u32 tcr[2];
u32 mair[2];
struct arm_smmu_cfg *cfg;
+ u64 split_table_mask;
};
struct arm_smmu_master_cfg {
@@ -208,6 +209,7 @@ struct arm_smmu_device {
unsigned long va_size;
unsigned long ipa_size;
unsigned long pa_size;
+ unsigned long ubs_size;
unsigned long pgsize_bitmap;
u32 num_global_irqs;
@@ -252,13 +254,14 @@ enum arm_smmu_domain_stage {
struct arm_smmu_domain {
struct arm_smmu_device *smmu;
- struct io_pgtable_ops *pgtbl_ops;
+ struct io_pgtable_ops *pgtbl_ops[2];
const struct iommu_gather_ops *tlb_ops;
struct arm_smmu_cfg cfg;
enum arm_smmu_domain_stage stage;
bool non_strict;
struct mutex init_mutex; /* Protects smmu pointer */
spinlock_t cb_lock; /* Serialises ATS1* ops and TLB syncs */
+ u32 attributes;
struct iommu_domain domain;
};
@@ -618,6 +621,69 @@ static irqreturn_t arm_smmu_global_fault(int irq, void *dev)
return IRQ_HANDLED;
}
+static void arm_smmu_init_ttbr1(struct arm_smmu_domain *smmu_domain,
+ struct io_pgtable_cfg *pgtbl_cfg)
+{
+ struct arm_smmu_device *smmu = smmu_domain->smmu;
+ struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
+ struct arm_smmu_cb *cb = &smmu_domain->smmu->cbs[cfg->cbndx];
+ int pgsize = 1 << __ffs(pgtbl_cfg->pgsize_bitmap);
+
+ /* Enable speculative walks through the TTBR1 */
+ cb->tcr[0] &= ~TTBCR_EPD1;
+
+ cb->tcr[0] |= TTBCR_SH_IS << TTBCR_SH1_SHIFT;
+ cb->tcr[0] |= TTBCR_RGN_WBWA << TTBCR_IRGN1_SHIFT;
+ cb->tcr[0] |= TTBCR_RGN_WBWA << TTBCR_ORGN1_SHIFT;
+
+ switch (pgsize) {
+ case SZ_4K:
+ cb->tcr[0] |= TTBCR_TG1_4K;
+ break;
+ case SZ_16K:
+ cb->tcr[0] |= TTBCR_TG1_16K;
+ break;
+ case SZ_64K:
+ cb->tcr[0] |= TTBCR_TG1_64K;
+ break;
+ }
+
+ cb->tcr[0] |= (64ULL - smmu->va_size) << TTBCR_T1SZ_SHIFT;
+
+ /* Clear the existing SEP configuration */
+ cb->tcr[1] &= ~TTBCR2_SEP_UPSTREAM;
+
+ /* Set up the sign extend bit */
+ switch (smmu->va_size) {
+ case 32:
+ cb->tcr[1] |= TTBCR2_SEP_31;
+ cb->split_table_mask = (1ULL << 31);
+ break;
+ case 36:
+ cb->tcr[1] |= TTBCR2_SEP_35;
+ cb->split_table_mask = (1ULL << 35);
+ break;
+ case 40:
+ cb->tcr[1] |= TTBCR2_SEP_39;
+ cb->split_table_mask = (1ULL << 39);
+ break;
+ case 42:
+ cb->tcr[1] |= TTBCR2_SEP_41;
+ cb->split_table_mask = (1ULL << 41);
+ break;
+ case 44:
+ cb->tcr[1] |= TTBCR2_SEP_43;
+ cb->split_table_mask = (1ULL << 43);
+ break;
+ case 48:
+ cb->tcr[1] |= TTBCR2_SEP_UPSTREAM;
+ cb->split_table_mask = (1ULL << 48);
+ }
+
+ cb->ttbr[1] = pgtbl_cfg->arm_lpae_s1_cfg.ttbr[0];
+ cb->ttbr[1] |= (u64)cfg->asid << TTBRn_ASID_SHIFT;
+}
+
static void arm_smmu_init_context_bank(struct arm_smmu_domain *smmu_domain,
struct io_pgtable_cfg *pgtbl_cfg)
{
@@ -650,8 +716,12 @@ static void arm_smmu_init_context_bank(struct arm_smmu_domain *smmu_domain,
} else {
cb->ttbr[0] = pgtbl_cfg->arm_lpae_s1_cfg.ttbr[0];
cb->ttbr[0] |= (u64)cfg->asid << TTBRn_ASID_SHIFT;
- cb->ttbr[1] = pgtbl_cfg->arm_lpae_s1_cfg.ttbr[1];
- cb->ttbr[1] |= (u64)cfg->asid << TTBRn_ASID_SHIFT;
+
+ /*
+ * Set TTBR1 to empty by default - it will get
+ * programmed later if it is enabled
+ */
+ cb->ttbr[1] = (u64)cfg->asid << TTBRn_ASID_SHIFT;
}
} else {
cb->ttbr[0] = pgtbl_cfg->arm_lpae_s2_cfg.vttbr;
@@ -760,11 +830,13 @@ static int arm_smmu_init_domain_context(struct iommu_domain *domain,
{
int irq, start, ret = 0;
unsigned long ias, oas;
- struct io_pgtable_ops *pgtbl_ops;
+ struct io_pgtable_ops *pgtbl_ops[2];
struct io_pgtable_cfg pgtbl_cfg;
enum io_pgtable_fmt fmt;
struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
+ bool split_tables =
+ (smmu_domain->attributes & (1 << DOMAIN_ATTR_SPLIT_TABLES));
mutex_lock(&smmu_domain->init_mutex);
if (smmu_domain->smmu)
@@ -794,8 +866,11 @@ static int arm_smmu_init_domain_context(struct iommu_domain *domain,
*
* Note that you can't actually request stage-2 mappings.
*/
- if (!(smmu->features & ARM_SMMU_FEAT_TRANS_S1))
+ if (!(smmu->features & ARM_SMMU_FEAT_TRANS_S1)) {
smmu_domain->stage = ARM_SMMU_DOMAIN_S2;
+ /* FIXME: fail instead? */
+ split_tables = false;
+ }
if (!(smmu->features & ARM_SMMU_FEAT_TRANS_S2))
smmu_domain->stage = ARM_SMMU_DOMAIN_S1;
@@ -812,8 +887,11 @@ static int arm_smmu_init_domain_context(struct iommu_domain *domain,
if (IS_ENABLED(CONFIG_IOMMU_IO_PGTABLE_ARMV7S) &&
!IS_ENABLED(CONFIG_64BIT) && !IS_ENABLED(CONFIG_ARM_LPAE) &&
(smmu->features & ARM_SMMU_FEAT_FMT_AARCH32_S) &&
- (smmu_domain->stage == ARM_SMMU_DOMAIN_S1))
+ (smmu_domain->stage == ARM_SMMU_DOMAIN_S1)) {
+ /* FIXME: fail instead? */
+ split_tables = false;
cfg->fmt = ARM_SMMU_CTX_FMT_AARCH32_S;
+ }
if ((IS_ENABLED(CONFIG_64BIT) || cfg->fmt == ARM_SMMU_CTX_FMT_NONE) &&
(smmu->features & (ARM_SMMU_FEAT_FMT_AARCH64_64K |
ARM_SMMU_FEAT_FMT_AARCH64_16K |
@@ -903,8 +981,8 @@ static int arm_smmu_init_domain_context(struct iommu_domain *domain,
pgtbl_cfg.quirks |= IO_PGTABLE_QUIRK_NON_STRICT;
smmu_domain->smmu = smmu;
- pgtbl_ops = alloc_io_pgtable_ops(fmt, &pgtbl_cfg, smmu_domain);
- if (!pgtbl_ops) {
+ pgtbl_ops[0] = alloc_io_pgtable_ops(fmt, &pgtbl_cfg, smmu_domain);
+ if (!pgtbl_ops[0]) {
ret = -ENOMEM;
goto out_clear_smmu;
}
@@ -916,6 +994,22 @@ static int arm_smmu_init_domain_context(struct iommu_domain *domain,
/* Initialise the context bank with our page table cfg */
arm_smmu_init_context_bank(smmu_domain, &pgtbl_cfg);
+
+ pgtbl_ops[1] = NULL;
+
+ if (split_tables) {
+ /* FIXME: I think it is safe to reuse pgtbl_cfg here */
+ pgtbl_ops[1] = alloc_io_pgtable_ops(fmt, &pgtbl_cfg,
+ smmu_domain);
+ if (!pgtbl_ops[1]) {
+ free_io_pgtable_ops(pgtbl_ops[0]);
+ ret = -ENOMEM;
+ goto out_clear_smmu;
+ }
+
+ arm_smmu_init_ttbr1(smmu_domain, &pgtbl_cfg);
+ }
+
arm_smmu_write_context_bank(smmu, cfg->cbndx);
/*
@@ -934,7 +1028,9 @@ static int arm_smmu_init_domain_context(struct iommu_domain *domain,
mutex_unlock(&smmu_domain->init_mutex);
/* Publish page table ops for map/unmap */
- smmu_domain->pgtbl_ops = pgtbl_ops;
+ smmu_domain->pgtbl_ops[0] = pgtbl_ops[0];
+ smmu_domain->pgtbl_ops[1] = pgtbl_ops[1];
+
return 0;
out_clear_smmu:
@@ -970,7 +1066,9 @@ static void arm_smmu_destroy_domain_context(struct iommu_domain *domain)
devm_free_irq(smmu->dev, irq, domain);
}
- free_io_pgtable_ops(smmu_domain->pgtbl_ops);
+ free_io_pgtable_ops(smmu_domain->pgtbl_ops[0]);
+ free_io_pgtable_ops(smmu_domain->pgtbl_ops[1]);
+
__arm_smmu_free_bitmap(smmu->context_map, cfg->cbndx);
arm_smmu_rpm_put(smmu);
@@ -1285,10 +1383,23 @@ static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev)
return ret;
}
+static struct io_pgtable_ops *
+arm_smmu_get_pgtbl_ops(struct iommu_domain *domain, unsigned long iova)
+{
+ struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
+ struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
+ struct arm_smmu_cb *cb = &smmu_domain->smmu->cbs[cfg->cbndx];
+
+ if (iova & cb->split_table_mask)
+ return smmu_domain->pgtbl_ops[1];
+
+ return smmu_domain->pgtbl_ops[0];
+}
+
static int arm_smmu_map(struct iommu_domain *domain, unsigned long iova,
phys_addr_t paddr, size_t size, int prot)
{
- struct io_pgtable_ops *ops = to_smmu_domain(domain)->pgtbl_ops;
+ struct io_pgtable_ops *ops = arm_smmu_get_pgtbl_ops(domain, iova);
struct arm_smmu_device *smmu = to_smmu_domain(domain)->smmu;
int ret;
@@ -1305,7 +1416,7 @@ static int arm_smmu_map(struct iommu_domain *domain, unsigned long iova,
static size_t arm_smmu_unmap(struct iommu_domain *domain, unsigned long iova,
size_t size)
{
- struct io_pgtable_ops *ops = to_smmu_domain(domain)->pgtbl_ops;
+ struct io_pgtable_ops *ops = arm_smmu_get_pgtbl_ops(domain, iova);
struct arm_smmu_device *smmu = to_smmu_domain(domain)->smmu;
size_t ret;
@@ -1349,7 +1460,7 @@ static phys_addr_t arm_smmu_iova_to_phys_hard(struct iommu_domain *domain,
struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
struct arm_smmu_device *smmu = smmu_domain->smmu;
struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
- struct io_pgtable_ops *ops= smmu_domain->pgtbl_ops;
+ struct io_pgtable_ops *ops = arm_smmu_get_pgtbl_ops(domain, iova);
struct device *dev = smmu->dev;
void __iomem *cb_base;
u32 tmp;
@@ -1397,7 +1508,7 @@ static phys_addr_t arm_smmu_iova_to_phys(struct iommu_domain *domain,
dma_addr_t iova)
{
struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
- struct io_pgtable_ops *ops = smmu_domain->pgtbl_ops;
+ struct io_pgtable_ops *ops = arm_smmu_get_pgtbl_ops(domain, iova);
if (domain->type == IOMMU_DOMAIN_IDENTITY)
return iova;
@@ -1584,6 +1695,11 @@ static int arm_smmu_domain_get_attr(struct iommu_domain *domain,
case DOMAIN_ATTR_NESTING:
*(int *)data = (smmu_domain->stage == ARM_SMMU_DOMAIN_NESTED);
return 0;
+ case DOMAIN_ATTR_SPLIT_TABLES:
+ *((int *)data) =
+ !!(smmu_domain->attributes &
+ (1 << DOMAIN_ATTR_SPLIT_TABLES));
+ return 0;
default:
return -ENODEV;
}
@@ -1624,6 +1740,11 @@ static int arm_smmu_domain_set_attr(struct iommu_domain *domain,
else
smmu_domain->stage = ARM_SMMU_DOMAIN_S1;
break;
+ case DOMAIN_ATTR_SPLIT_TABLES:
+ if (*((int *)data))
+ smmu_domain->attributes |=
+ (1 << DOMAIN_ATTR_SPLIT_TABLES);
+ break;
default:
ret = -ENODEV;
}
diff --git a/drivers/iommu/io-pgtable-arm.c b/drivers/iommu/io-pgtable-arm.c
index 237cacd..dc9fb2e 100644
--- a/drivers/iommu/io-pgtable-arm.c
+++ b/drivers/iommu/io-pgtable-arm.c
@@ -475,8 +475,7 @@ static int arm_lpae_map(struct io_pgtable_ops *ops, unsigned long iova,
if (!(iommu_prot & (IOMMU_READ | IOMMU_WRITE)))
return 0;
- if (WARN_ON(iova >= (1ULL << data->iop.cfg.ias) ||
- paddr >= (1ULL << data->iop.cfg.oas)))
+ if (WARN_ON(paddr >= (1ULL << data->iop.cfg.oas)))
return -ERANGE;
prot = arm_lpae_prot_to_pte(data, iommu_prot);
--
2.7.4
_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel
^ permalink raw reply related [flat|nested] 46+ messages in thread[parent not found: <1551469117-3404-3-git-send-email-jcrouse-sgV2jX0FEOL9JmXXK+q4OQ@public.gmane.org>]
* Re: [RFC PATCH v1 02/15] iommu/arm-smmu: Add split pagetable support for arm-smmu-v2
2019-03-01 19:38 ` Jordan Crouse
(?)
@ 2019-03-01 20:25 ` Rob Clark
-1 siblings, 0 replies; 46+ messages in thread
From: Rob Clark @ 2019-03-01 20:25 UTC (permalink / raw)
To: Jordan Crouse
Cc: Jean-Philippe Brucker, linux-arm-msm, Joerg Roedel, Will Deacon,
Linux Kernel Mailing List,
list-Y9sIeH5OGRo@public.gmane.org:IOMMU DRIVERS <iommu-cunTk1MwBs9QetFLy7KEm3xJsTq8ys+cHZ5vskTnxNA@public.gmane.org>, Joerg Roedel <joro-zLv9SwRftAIdnm+yROfE0A@public.gmane.org>, ,
dianders-uWgjrcJnOmJ4cg9Nei1l7Q, Kristian H. Kristensen,
baolu.lu-VuQAYsv1563Yd54FQh9/CA, freedreno,
moderated list:ARM/FREESCALE IMX / MXC ARM ARCHITECTURE,
Robin Murphy
On Fri, Mar 1, 2019 at 2:38 PM Jordan Crouse <jcrouse@codeaurora.org> wrote:
>
> Add support for a split pagetable (TTBR0/TTBR1) scheme for
> arm-smmu-v2. If split pagetables are enabled, create a
> pagetable for TTBR1 and set up the sign extension bit so
> that all IOVAs with that bit set are mapped and translated
> from the TTBR1 pagetable.
>
> Signed-off-by: Jordan Crouse <jcrouse@codeaurora.org>
> ---
>
> drivers/iommu/arm-smmu-regs.h | 18 +++++
> drivers/iommu/arm-smmu.c | 149 +++++++++++++++++++++++++++++++++++++----
> drivers/iommu/io-pgtable-arm.c | 3 +-
> 3 files changed, 154 insertions(+), 16 deletions(-)
>
> diff --git a/drivers/iommu/arm-smmu-regs.h b/drivers/iommu/arm-smmu-regs.h
> index a1226e4..56f9709 100644
> --- a/drivers/iommu/arm-smmu-regs.h
> +++ b/drivers/iommu/arm-smmu-regs.h
> @@ -193,7 +193,25 @@ enum arm_smmu_s2cr_privcfg {
> #define RESUME_RETRY (0 << 0)
> #define RESUME_TERMINATE (1 << 0)
>
> +#define TTBCR_EPD1 (1 << 23)
> +#define TTBCR_T1SZ_SHIFT 16
> +#define TTBCR_IRGN1_SHIFT 24
> +#define TTBCR_ORGN1_SHIFT 26
> +#define TTBCR_RGN_WBWA 1
> +#define TTBCR_SH1_SHIFT 28
> +#define TTBCR_SH_IS 3
> +
> +#define TTBCR_TG1_16K (1 << 30)
> +#define TTBCR_TG1_4K (2 << 30)
> +#define TTBCR_TG1_64K (3 << 30)
> +
> #define TTBCR2_SEP_SHIFT 15
> +#define TTBCR2_SEP_31 (0x0 << TTBCR2_SEP_SHIFT)
> +#define TTBCR2_SEP_35 (0x1 << TTBCR2_SEP_SHIFT)
> +#define TTBCR2_SEP_39 (0x2 << TTBCR2_SEP_SHIFT)
> +#define TTBCR2_SEP_41 (0x3 << TTBCR2_SEP_SHIFT)
> +#define TTBCR2_SEP_43 (0x4 << TTBCR2_SEP_SHIFT)
> +#define TTBCR2_SEP_47 (0x5 << TTBCR2_SEP_SHIFT)
> #define TTBCR2_SEP_UPSTREAM (0x7 << TTBCR2_SEP_SHIFT)
> #define TTBCR2_AS (1 << 4)
>
> diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c
> index af18a7e..05eb126 100644
> --- a/drivers/iommu/arm-smmu.c
> +++ b/drivers/iommu/arm-smmu.c
> @@ -151,6 +151,7 @@ struct arm_smmu_cb {
> u32 tcr[2];
> u32 mair[2];
> struct arm_smmu_cfg *cfg;
> + u64 split_table_mask;
> };
>
> struct arm_smmu_master_cfg {
> @@ -208,6 +209,7 @@ struct arm_smmu_device {
> unsigned long va_size;
> unsigned long ipa_size;
> unsigned long pa_size;
> + unsigned long ubs_size;
> unsigned long pgsize_bitmap;
>
> u32 num_global_irqs;
> @@ -252,13 +254,14 @@ enum arm_smmu_domain_stage {
>
> struct arm_smmu_domain {
> struct arm_smmu_device *smmu;
> - struct io_pgtable_ops *pgtbl_ops;
> + struct io_pgtable_ops *pgtbl_ops[2];
> const struct iommu_gather_ops *tlb_ops;
> struct arm_smmu_cfg cfg;
> enum arm_smmu_domain_stage stage;
> bool non_strict;
> struct mutex init_mutex; /* Protects smmu pointer */
> spinlock_t cb_lock; /* Serialises ATS1* ops and TLB syncs */
> + u32 attributes;
> struct iommu_domain domain;
> };
>
> @@ -618,6 +621,69 @@ static irqreturn_t arm_smmu_global_fault(int irq, void *dev)
> return IRQ_HANDLED;
> }
>
> +static void arm_smmu_init_ttbr1(struct arm_smmu_domain *smmu_domain,
> + struct io_pgtable_cfg *pgtbl_cfg)
> +{
> + struct arm_smmu_device *smmu = smmu_domain->smmu;
> + struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
> + struct arm_smmu_cb *cb = &smmu_domain->smmu->cbs[cfg->cbndx];
> + int pgsize = 1 << __ffs(pgtbl_cfg->pgsize_bitmap);
> +
> + /* Enable speculative walks through the TTBR1 */
> + cb->tcr[0] &= ~TTBCR_EPD1;
> +
> + cb->tcr[0] |= TTBCR_SH_IS << TTBCR_SH1_SHIFT;
> + cb->tcr[0] |= TTBCR_RGN_WBWA << TTBCR_IRGN1_SHIFT;
> + cb->tcr[0] |= TTBCR_RGN_WBWA << TTBCR_ORGN1_SHIFT;
> +
> + switch (pgsize) {
> + case SZ_4K:
> + cb->tcr[0] |= TTBCR_TG1_4K;
> + break;
> + case SZ_16K:
> + cb->tcr[0] |= TTBCR_TG1_16K;
> + break;
> + case SZ_64K:
> + cb->tcr[0] |= TTBCR_TG1_64K;
> + break;
> + }
> +
> + cb->tcr[0] |= (64ULL - smmu->va_size) << TTBCR_T1SZ_SHIFT;
> +
> + /* Clear the existing SEP configuration */
> + cb->tcr[1] &= ~TTBCR2_SEP_UPSTREAM;
> +
> + /* Set up the sign extend bit */
> + switch (smmu->va_size) {
> + case 32:
> + cb->tcr[1] |= TTBCR2_SEP_31;
> + cb->split_table_mask = (1ULL << 31);
> + break;
> + case 36:
> + cb->tcr[1] |= TTBCR2_SEP_35;
> + cb->split_table_mask = (1ULL << 35);
> + break;
> + case 40:
> + cb->tcr[1] |= TTBCR2_SEP_39;
> + cb->split_table_mask = (1ULL << 39);
> + break;
> + case 42:
> + cb->tcr[1] |= TTBCR2_SEP_41;
> + cb->split_table_mask = (1ULL << 41);
> + break;
> + case 44:
> + cb->tcr[1] |= TTBCR2_SEP_43;
> + cb->split_table_mask = (1ULL << 43);
> + break;
> + case 48:
> + cb->tcr[1] |= TTBCR2_SEP_UPSTREAM;
> + cb->split_table_mask = (1ULL << 48);
> + }
> +
> + cb->ttbr[1] = pgtbl_cfg->arm_lpae_s1_cfg.ttbr[0];
> + cb->ttbr[1] |= (u64)cfg->asid << TTBRn_ASID_SHIFT;
> +}
> +
> static void arm_smmu_init_context_bank(struct arm_smmu_domain *smmu_domain,
> struct io_pgtable_cfg *pgtbl_cfg)
> {
> @@ -650,8 +716,12 @@ static void arm_smmu_init_context_bank(struct arm_smmu_domain *smmu_domain,
> } else {
> cb->ttbr[0] = pgtbl_cfg->arm_lpae_s1_cfg.ttbr[0];
> cb->ttbr[0] |= (u64)cfg->asid << TTBRn_ASID_SHIFT;
> - cb->ttbr[1] = pgtbl_cfg->arm_lpae_s1_cfg.ttbr[1];
> - cb->ttbr[1] |= (u64)cfg->asid << TTBRn_ASID_SHIFT;
> +
> + /*
> + * Set TTBR1 to empty by default - it will get
> + * programmed later if it is enabled
> + */
> + cb->ttbr[1] = (u64)cfg->asid << TTBRn_ASID_SHIFT;
> }
> } else {
> cb->ttbr[0] = pgtbl_cfg->arm_lpae_s2_cfg.vttbr;
> @@ -760,11 +830,13 @@ static int arm_smmu_init_domain_context(struct iommu_domain *domain,
> {
> int irq, start, ret = 0;
> unsigned long ias, oas;
> - struct io_pgtable_ops *pgtbl_ops;
> + struct io_pgtable_ops *pgtbl_ops[2];
> struct io_pgtable_cfg pgtbl_cfg;
> enum io_pgtable_fmt fmt;
> struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
> struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
> + bool split_tables =
> + (smmu_domain->attributes & (1 << DOMAIN_ATTR_SPLIT_TABLES));
BIT(DOMAIN_ATTR_SPLIT_TABLES) ?
>
> mutex_lock(&smmu_domain->init_mutex);
> if (smmu_domain->smmu)
> @@ -794,8 +866,11 @@ static int arm_smmu_init_domain_context(struct iommu_domain *domain,
> *
> * Note that you can't actually request stage-2 mappings.
> */
> - if (!(smmu->features & ARM_SMMU_FEAT_TRANS_S1))
> + if (!(smmu->features & ARM_SMMU_FEAT_TRANS_S1)) {
> smmu_domain->stage = ARM_SMMU_DOMAIN_S2;
> + /* FIXME: fail instead? */
> + split_tables = false;
yeah, I think we want to return an error somewhere if not supported.
I think we want to fall back to not using per-process pagetables if
this fails.
BR,
-R
> + }
> if (!(smmu->features & ARM_SMMU_FEAT_TRANS_S2))
> smmu_domain->stage = ARM_SMMU_DOMAIN_S1;
>
> @@ -812,8 +887,11 @@ static int arm_smmu_init_domain_context(struct iommu_domain *domain,
> if (IS_ENABLED(CONFIG_IOMMU_IO_PGTABLE_ARMV7S) &&
> !IS_ENABLED(CONFIG_64BIT) && !IS_ENABLED(CONFIG_ARM_LPAE) &&
> (smmu->features & ARM_SMMU_FEAT_FMT_AARCH32_S) &&
> - (smmu_domain->stage == ARM_SMMU_DOMAIN_S1))
> + (smmu_domain->stage == ARM_SMMU_DOMAIN_S1)) {
> + /* FIXME: fail instead? */
> + split_tables = false;
> cfg->fmt = ARM_SMMU_CTX_FMT_AARCH32_S;
> + }
> if ((IS_ENABLED(CONFIG_64BIT) || cfg->fmt == ARM_SMMU_CTX_FMT_NONE) &&
> (smmu->features & (ARM_SMMU_FEAT_FMT_AARCH64_64K |
> ARM_SMMU_FEAT_FMT_AARCH64_16K |
> @@ -903,8 +981,8 @@ static int arm_smmu_init_domain_context(struct iommu_domain *domain,
> pgtbl_cfg.quirks |= IO_PGTABLE_QUIRK_NON_STRICT;
>
> smmu_domain->smmu = smmu;
> - pgtbl_ops = alloc_io_pgtable_ops(fmt, &pgtbl_cfg, smmu_domain);
> - if (!pgtbl_ops) {
> + pgtbl_ops[0] = alloc_io_pgtable_ops(fmt, &pgtbl_cfg, smmu_domain);
> + if (!pgtbl_ops[0]) {
> ret = -ENOMEM;
> goto out_clear_smmu;
> }
> @@ -916,6 +994,22 @@ static int arm_smmu_init_domain_context(struct iommu_domain *domain,
>
> /* Initialise the context bank with our page table cfg */
> arm_smmu_init_context_bank(smmu_domain, &pgtbl_cfg);
> +
> + pgtbl_ops[1] = NULL;
> +
> + if (split_tables) {
> + /* FIXME: I think it is safe to reuse pgtbl_cfg here */
> + pgtbl_ops[1] = alloc_io_pgtable_ops(fmt, &pgtbl_cfg,
> + smmu_domain);
> + if (!pgtbl_ops[1]) {
> + free_io_pgtable_ops(pgtbl_ops[0]);
> + ret = -ENOMEM;
> + goto out_clear_smmu;
> + }
> +
> + arm_smmu_init_ttbr1(smmu_domain, &pgtbl_cfg);
> + }
> +
> arm_smmu_write_context_bank(smmu, cfg->cbndx);
>
> /*
> @@ -934,7 +1028,9 @@ static int arm_smmu_init_domain_context(struct iommu_domain *domain,
> mutex_unlock(&smmu_domain->init_mutex);
>
> /* Publish page table ops for map/unmap */
> - smmu_domain->pgtbl_ops = pgtbl_ops;
> + smmu_domain->pgtbl_ops[0] = pgtbl_ops[0];
> + smmu_domain->pgtbl_ops[1] = pgtbl_ops[1];
> +
> return 0;
>
> out_clear_smmu:
> @@ -970,7 +1066,9 @@ static void arm_smmu_destroy_domain_context(struct iommu_domain *domain)
> devm_free_irq(smmu->dev, irq, domain);
> }
>
> - free_io_pgtable_ops(smmu_domain->pgtbl_ops);
> + free_io_pgtable_ops(smmu_domain->pgtbl_ops[0]);
> + free_io_pgtable_ops(smmu_domain->pgtbl_ops[1]);
> +
> __arm_smmu_free_bitmap(smmu->context_map, cfg->cbndx);
>
> arm_smmu_rpm_put(smmu);
> @@ -1285,10 +1383,23 @@ static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev)
> return ret;
> }
>
> +static struct io_pgtable_ops *
> +arm_smmu_get_pgtbl_ops(struct iommu_domain *domain, unsigned long iova)
> +{
> + struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
> + struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
> + struct arm_smmu_cb *cb = &smmu_domain->smmu->cbs[cfg->cbndx];
> +
> + if (iova & cb->split_table_mask)
> + return smmu_domain->pgtbl_ops[1];
> +
> + return smmu_domain->pgtbl_ops[0];
> +}
> +
> static int arm_smmu_map(struct iommu_domain *domain, unsigned long iova,
> phys_addr_t paddr, size_t size, int prot)
> {
> - struct io_pgtable_ops *ops = to_smmu_domain(domain)->pgtbl_ops;
> + struct io_pgtable_ops *ops = arm_smmu_get_pgtbl_ops(domain, iova);
> struct arm_smmu_device *smmu = to_smmu_domain(domain)->smmu;
> int ret;
>
> @@ -1305,7 +1416,7 @@ static int arm_smmu_map(struct iommu_domain *domain, unsigned long iova,
> static size_t arm_smmu_unmap(struct iommu_domain *domain, unsigned long iova,
> size_t size)
> {
> - struct io_pgtable_ops *ops = to_smmu_domain(domain)->pgtbl_ops;
> + struct io_pgtable_ops *ops = arm_smmu_get_pgtbl_ops(domain, iova);
> struct arm_smmu_device *smmu = to_smmu_domain(domain)->smmu;
> size_t ret;
>
> @@ -1349,7 +1460,7 @@ static phys_addr_t arm_smmu_iova_to_phys_hard(struct iommu_domain *domain,
> struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
> struct arm_smmu_device *smmu = smmu_domain->smmu;
> struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
> - struct io_pgtable_ops *ops= smmu_domain->pgtbl_ops;
> + struct io_pgtable_ops *ops = arm_smmu_get_pgtbl_ops(domain, iova);
> struct device *dev = smmu->dev;
> void __iomem *cb_base;
> u32 tmp;
> @@ -1397,7 +1508,7 @@ static phys_addr_t arm_smmu_iova_to_phys(struct iommu_domain *domain,
> dma_addr_t iova)
> {
> struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
> - struct io_pgtable_ops *ops = smmu_domain->pgtbl_ops;
> + struct io_pgtable_ops *ops = arm_smmu_get_pgtbl_ops(domain, iova);
>
> if (domain->type == IOMMU_DOMAIN_IDENTITY)
> return iova;
> @@ -1584,6 +1695,11 @@ static int arm_smmu_domain_get_attr(struct iommu_domain *domain,
> case DOMAIN_ATTR_NESTING:
> *(int *)data = (smmu_domain->stage == ARM_SMMU_DOMAIN_NESTED);
> return 0;
> + case DOMAIN_ATTR_SPLIT_TABLES:
> + *((int *)data) =
> + !!(smmu_domain->attributes &
> + (1 << DOMAIN_ATTR_SPLIT_TABLES));
> + return 0;
> default:
> return -ENODEV;
> }
> @@ -1624,6 +1740,11 @@ static int arm_smmu_domain_set_attr(struct iommu_domain *domain,
> else
> smmu_domain->stage = ARM_SMMU_DOMAIN_S1;
> break;
> + case DOMAIN_ATTR_SPLIT_TABLES:
> + if (*((int *)data))
> + smmu_domain->attributes |=
> + (1 << DOMAIN_ATTR_SPLIT_TABLES);
> + break;
> default:
> ret = -ENODEV;
> }
> diff --git a/drivers/iommu/io-pgtable-arm.c b/drivers/iommu/io-pgtable-arm.c
> index 237cacd..dc9fb2e 100644
> --- a/drivers/iommu/io-pgtable-arm.c
> +++ b/drivers/iommu/io-pgtable-arm.c
> @@ -475,8 +475,7 @@ static int arm_lpae_map(struct io_pgtable_ops *ops, unsigned long iova,
> if (!(iommu_prot & (IOMMU_READ | IOMMU_WRITE)))
> return 0;
>
> - if (WARN_ON(iova >= (1ULL << data->iop.cfg.ias) ||
> - paddr >= (1ULL << data->iop.cfg.oas)))
> + if (WARN_ON(paddr >= (1ULL << data->iop.cfg.oas)))
> return -ERANGE;
>
> prot = arm_lpae_prot_to_pte(data, iommu_prot);
> --
> 2.7.4
>
> _______________________________________________
> Freedreno mailing list
> Freedreno@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/freedreno
_______________________________________________
Freedreno mailing list
Freedreno@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/freedreno
^ permalink raw reply [flat|nested] 46+ messages in thread* Re: [Freedreno] [RFC PATCH v1 02/15] iommu/arm-smmu: Add split pagetable support for arm-smmu-v2
@ 2019-03-01 20:25 ` Rob Clark
0 siblings, 0 replies; 46+ messages in thread
From: Rob Clark @ 2019-03-01 20:25 UTC (permalink / raw)
To: Jordan Crouse
Cc: freedreno, Jean-Philippe Brucker, linux-arm-msm, Joerg Roedel,
Will Deacon, Linux Kernel Mailing List,
list@263.net:IOMMU DRIVERS <iommu@lists.linux-foundation.org>, Joerg Roedel <joro@8bytes.org>,,
dianders, Kristian H. Kristensen, Robin Murphy,
moderated list:ARM/FREESCALE IMX / MXC ARM ARCHITECTURE, baolu.lu
On Fri, Mar 1, 2019 at 2:38 PM Jordan Crouse <jcrouse@codeaurora.org> wrote:
>
> Add support for a split pagetable (TTBR0/TTBR1) scheme for
> arm-smmu-v2. If split pagetables are enabled, create a
> pagetable for TTBR1 and set up the sign extension bit so
> that all IOVAs with that bit set are mapped and translated
> from the TTBR1 pagetable.
>
> Signed-off-by: Jordan Crouse <jcrouse@codeaurora.org>
> ---
>
> drivers/iommu/arm-smmu-regs.h | 18 +++++
> drivers/iommu/arm-smmu.c | 149 +++++++++++++++++++++++++++++++++++++----
> drivers/iommu/io-pgtable-arm.c | 3 +-
> 3 files changed, 154 insertions(+), 16 deletions(-)
>
> diff --git a/drivers/iommu/arm-smmu-regs.h b/drivers/iommu/arm-smmu-regs.h
> index a1226e4..56f9709 100644
> --- a/drivers/iommu/arm-smmu-regs.h
> +++ b/drivers/iommu/arm-smmu-regs.h
> @@ -193,7 +193,25 @@ enum arm_smmu_s2cr_privcfg {
> #define RESUME_RETRY (0 << 0)
> #define RESUME_TERMINATE (1 << 0)
>
> +#define TTBCR_EPD1 (1 << 23)
> +#define TTBCR_T1SZ_SHIFT 16
> +#define TTBCR_IRGN1_SHIFT 24
> +#define TTBCR_ORGN1_SHIFT 26
> +#define TTBCR_RGN_WBWA 1
> +#define TTBCR_SH1_SHIFT 28
> +#define TTBCR_SH_IS 3
> +
> +#define TTBCR_TG1_16K (1 << 30)
> +#define TTBCR_TG1_4K (2 << 30)
> +#define TTBCR_TG1_64K (3 << 30)
> +
> #define TTBCR2_SEP_SHIFT 15
> +#define TTBCR2_SEP_31 (0x0 << TTBCR2_SEP_SHIFT)
> +#define TTBCR2_SEP_35 (0x1 << TTBCR2_SEP_SHIFT)
> +#define TTBCR2_SEP_39 (0x2 << TTBCR2_SEP_SHIFT)
> +#define TTBCR2_SEP_41 (0x3 << TTBCR2_SEP_SHIFT)
> +#define TTBCR2_SEP_43 (0x4 << TTBCR2_SEP_SHIFT)
> +#define TTBCR2_SEP_47 (0x5 << TTBCR2_SEP_SHIFT)
> #define TTBCR2_SEP_UPSTREAM (0x7 << TTBCR2_SEP_SHIFT)
> #define TTBCR2_AS (1 << 4)
>
> diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c
> index af18a7e..05eb126 100644
> --- a/drivers/iommu/arm-smmu.c
> +++ b/drivers/iommu/arm-smmu.c
> @@ -151,6 +151,7 @@ struct arm_smmu_cb {
> u32 tcr[2];
> u32 mair[2];
> struct arm_smmu_cfg *cfg;
> + u64 split_table_mask;
> };
>
> struct arm_smmu_master_cfg {
> @@ -208,6 +209,7 @@ struct arm_smmu_device {
> unsigned long va_size;
> unsigned long ipa_size;
> unsigned long pa_size;
> + unsigned long ubs_size;
> unsigned long pgsize_bitmap;
>
> u32 num_global_irqs;
> @@ -252,13 +254,14 @@ enum arm_smmu_domain_stage {
>
> struct arm_smmu_domain {
> struct arm_smmu_device *smmu;
> - struct io_pgtable_ops *pgtbl_ops;
> + struct io_pgtable_ops *pgtbl_ops[2];
> const struct iommu_gather_ops *tlb_ops;
> struct arm_smmu_cfg cfg;
> enum arm_smmu_domain_stage stage;
> bool non_strict;
> struct mutex init_mutex; /* Protects smmu pointer */
> spinlock_t cb_lock; /* Serialises ATS1* ops and TLB syncs */
> + u32 attributes;
> struct iommu_domain domain;
> };
>
> @@ -618,6 +621,69 @@ static irqreturn_t arm_smmu_global_fault(int irq, void *dev)
> return IRQ_HANDLED;
> }
>
> +static void arm_smmu_init_ttbr1(struct arm_smmu_domain *smmu_domain,
> + struct io_pgtable_cfg *pgtbl_cfg)
> +{
> + struct arm_smmu_device *smmu = smmu_domain->smmu;
> + struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
> + struct arm_smmu_cb *cb = &smmu_domain->smmu->cbs[cfg->cbndx];
> + int pgsize = 1 << __ffs(pgtbl_cfg->pgsize_bitmap);
> +
> + /* Enable speculative walks through the TTBR1 */
> + cb->tcr[0] &= ~TTBCR_EPD1;
> +
> + cb->tcr[0] |= TTBCR_SH_IS << TTBCR_SH1_SHIFT;
> + cb->tcr[0] |= TTBCR_RGN_WBWA << TTBCR_IRGN1_SHIFT;
> + cb->tcr[0] |= TTBCR_RGN_WBWA << TTBCR_ORGN1_SHIFT;
> +
> + switch (pgsize) {
> + case SZ_4K:
> + cb->tcr[0] |= TTBCR_TG1_4K;
> + break;
> + case SZ_16K:
> + cb->tcr[0] |= TTBCR_TG1_16K;
> + break;
> + case SZ_64K:
> + cb->tcr[0] |= TTBCR_TG1_64K;
> + break;
> + }
> +
> + cb->tcr[0] |= (64ULL - smmu->va_size) << TTBCR_T1SZ_SHIFT;
> +
> + /* Clear the existing SEP configuration */
> + cb->tcr[1] &= ~TTBCR2_SEP_UPSTREAM;
> +
> + /* Set up the sign extend bit */
> + switch (smmu->va_size) {
> + case 32:
> + cb->tcr[1] |= TTBCR2_SEP_31;
> + cb->split_table_mask = (1ULL << 31);
> + break;
> + case 36:
> + cb->tcr[1] |= TTBCR2_SEP_35;
> + cb->split_table_mask = (1ULL << 35);
> + break;
> + case 40:
> + cb->tcr[1] |= TTBCR2_SEP_39;
> + cb->split_table_mask = (1ULL << 39);
> + break;
> + case 42:
> + cb->tcr[1] |= TTBCR2_SEP_41;
> + cb->split_table_mask = (1ULL << 41);
> + break;
> + case 44:
> + cb->tcr[1] |= TTBCR2_SEP_43;
> + cb->split_table_mask = (1ULL << 43);
> + break;
> + case 48:
> + cb->tcr[1] |= TTBCR2_SEP_UPSTREAM;
> + cb->split_table_mask = (1ULL << 48);
> + }
> +
> + cb->ttbr[1] = pgtbl_cfg->arm_lpae_s1_cfg.ttbr[0];
> + cb->ttbr[1] |= (u64)cfg->asid << TTBRn_ASID_SHIFT;
> +}
> +
> static void arm_smmu_init_context_bank(struct arm_smmu_domain *smmu_domain,
> struct io_pgtable_cfg *pgtbl_cfg)
> {
> @@ -650,8 +716,12 @@ static void arm_smmu_init_context_bank(struct arm_smmu_domain *smmu_domain,
> } else {
> cb->ttbr[0] = pgtbl_cfg->arm_lpae_s1_cfg.ttbr[0];
> cb->ttbr[0] |= (u64)cfg->asid << TTBRn_ASID_SHIFT;
> - cb->ttbr[1] = pgtbl_cfg->arm_lpae_s1_cfg.ttbr[1];
> - cb->ttbr[1] |= (u64)cfg->asid << TTBRn_ASID_SHIFT;
> +
> + /*
> + * Set TTBR1 to empty by default - it will get
> + * programmed later if it is enabled
> + */
> + cb->ttbr[1] = (u64)cfg->asid << TTBRn_ASID_SHIFT;
> }
> } else {
> cb->ttbr[0] = pgtbl_cfg->arm_lpae_s2_cfg.vttbr;
> @@ -760,11 +830,13 @@ static int arm_smmu_init_domain_context(struct iommu_domain *domain,
> {
> int irq, start, ret = 0;
> unsigned long ias, oas;
> - struct io_pgtable_ops *pgtbl_ops;
> + struct io_pgtable_ops *pgtbl_ops[2];
> struct io_pgtable_cfg pgtbl_cfg;
> enum io_pgtable_fmt fmt;
> struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
> struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
> + bool split_tables =
> + (smmu_domain->attributes & (1 << DOMAIN_ATTR_SPLIT_TABLES));
BIT(DOMAIN_ATTR_SPLIT_TABLES) ?
>
> mutex_lock(&smmu_domain->init_mutex);
> if (smmu_domain->smmu)
> @@ -794,8 +866,11 @@ static int arm_smmu_init_domain_context(struct iommu_domain *domain,
> *
> * Note that you can't actually request stage-2 mappings.
> */
> - if (!(smmu->features & ARM_SMMU_FEAT_TRANS_S1))
> + if (!(smmu->features & ARM_SMMU_FEAT_TRANS_S1)) {
> smmu_domain->stage = ARM_SMMU_DOMAIN_S2;
> + /* FIXME: fail instead? */
> + split_tables = false;
yeah, I think we want to return an error somewhere if not supported.
I think we want to fall back to not using per-process pagetables if
this fails.
BR,
-R
> + }
> if (!(smmu->features & ARM_SMMU_FEAT_TRANS_S2))
> smmu_domain->stage = ARM_SMMU_DOMAIN_S1;
>
> @@ -812,8 +887,11 @@ static int arm_smmu_init_domain_context(struct iommu_domain *domain,
> if (IS_ENABLED(CONFIG_IOMMU_IO_PGTABLE_ARMV7S) &&
> !IS_ENABLED(CONFIG_64BIT) && !IS_ENABLED(CONFIG_ARM_LPAE) &&
> (smmu->features & ARM_SMMU_FEAT_FMT_AARCH32_S) &&
> - (smmu_domain->stage == ARM_SMMU_DOMAIN_S1))
> + (smmu_domain->stage == ARM_SMMU_DOMAIN_S1)) {
> + /* FIXME: fail instead? */
> + split_tables = false;
> cfg->fmt = ARM_SMMU_CTX_FMT_AARCH32_S;
> + }
> if ((IS_ENABLED(CONFIG_64BIT) || cfg->fmt == ARM_SMMU_CTX_FMT_NONE) &&
> (smmu->features & (ARM_SMMU_FEAT_FMT_AARCH64_64K |
> ARM_SMMU_FEAT_FMT_AARCH64_16K |
> @@ -903,8 +981,8 @@ static int arm_smmu_init_domain_context(struct iommu_domain *domain,
> pgtbl_cfg.quirks |= IO_PGTABLE_QUIRK_NON_STRICT;
>
> smmu_domain->smmu = smmu;
> - pgtbl_ops = alloc_io_pgtable_ops(fmt, &pgtbl_cfg, smmu_domain);
> - if (!pgtbl_ops) {
> + pgtbl_ops[0] = alloc_io_pgtable_ops(fmt, &pgtbl_cfg, smmu_domain);
> + if (!pgtbl_ops[0]) {
> ret = -ENOMEM;
> goto out_clear_smmu;
> }
> @@ -916,6 +994,22 @@ static int arm_smmu_init_domain_context(struct iommu_domain *domain,
>
> /* Initialise the context bank with our page table cfg */
> arm_smmu_init_context_bank(smmu_domain, &pgtbl_cfg);
> +
> + pgtbl_ops[1] = NULL;
> +
> + if (split_tables) {
> + /* FIXME: I think it is safe to reuse pgtbl_cfg here */
> + pgtbl_ops[1] = alloc_io_pgtable_ops(fmt, &pgtbl_cfg,
> + smmu_domain);
> + if (!pgtbl_ops[1]) {
> + free_io_pgtable_ops(pgtbl_ops[0]);
> + ret = -ENOMEM;
> + goto out_clear_smmu;
> + }
> +
> + arm_smmu_init_ttbr1(smmu_domain, &pgtbl_cfg);
> + }
> +
> arm_smmu_write_context_bank(smmu, cfg->cbndx);
>
> /*
> @@ -934,7 +1028,9 @@ static int arm_smmu_init_domain_context(struct iommu_domain *domain,
> mutex_unlock(&smmu_domain->init_mutex);
>
> /* Publish page table ops for map/unmap */
> - smmu_domain->pgtbl_ops = pgtbl_ops;
> + smmu_domain->pgtbl_ops[0] = pgtbl_ops[0];
> + smmu_domain->pgtbl_ops[1] = pgtbl_ops[1];
> +
> return 0;
>
> out_clear_smmu:
> @@ -970,7 +1066,9 @@ static void arm_smmu_destroy_domain_context(struct iommu_domain *domain)
> devm_free_irq(smmu->dev, irq, domain);
> }
>
> - free_io_pgtable_ops(smmu_domain->pgtbl_ops);
> + free_io_pgtable_ops(smmu_domain->pgtbl_ops[0]);
> + free_io_pgtable_ops(smmu_domain->pgtbl_ops[1]);
> +
> __arm_smmu_free_bitmap(smmu->context_map, cfg->cbndx);
>
> arm_smmu_rpm_put(smmu);
> @@ -1285,10 +1383,23 @@ static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev)
> return ret;
> }
>
> +static struct io_pgtable_ops *
> +arm_smmu_get_pgtbl_ops(struct iommu_domain *domain, unsigned long iova)
> +{
> + struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
> + struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
> + struct arm_smmu_cb *cb = &smmu_domain->smmu->cbs[cfg->cbndx];
> +
> + if (iova & cb->split_table_mask)
> + return smmu_domain->pgtbl_ops[1];
> +
> + return smmu_domain->pgtbl_ops[0];
> +}
> +
> static int arm_smmu_map(struct iommu_domain *domain, unsigned long iova,
> phys_addr_t paddr, size_t size, int prot)
> {
> - struct io_pgtable_ops *ops = to_smmu_domain(domain)->pgtbl_ops;
> + struct io_pgtable_ops *ops = arm_smmu_get_pgtbl_ops(domain, iova);
> struct arm_smmu_device *smmu = to_smmu_domain(domain)->smmu;
> int ret;
>
> @@ -1305,7 +1416,7 @@ static int arm_smmu_map(struct iommu_domain *domain, unsigned long iova,
> static size_t arm_smmu_unmap(struct iommu_domain *domain, unsigned long iova,
> size_t size)
> {
> - struct io_pgtable_ops *ops = to_smmu_domain(domain)->pgtbl_ops;
> + struct io_pgtable_ops *ops = arm_smmu_get_pgtbl_ops(domain, iova);
> struct arm_smmu_device *smmu = to_smmu_domain(domain)->smmu;
> size_t ret;
>
> @@ -1349,7 +1460,7 @@ static phys_addr_t arm_smmu_iova_to_phys_hard(struct iommu_domain *domain,
> struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
> struct arm_smmu_device *smmu = smmu_domain->smmu;
> struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
> - struct io_pgtable_ops *ops= smmu_domain->pgtbl_ops;
> + struct io_pgtable_ops *ops = arm_smmu_get_pgtbl_ops(domain, iova);
> struct device *dev = smmu->dev;
> void __iomem *cb_base;
> u32 tmp;
> @@ -1397,7 +1508,7 @@ static phys_addr_t arm_smmu_iova_to_phys(struct iommu_domain *domain,
> dma_addr_t iova)
> {
> struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
> - struct io_pgtable_ops *ops = smmu_domain->pgtbl_ops;
> + struct io_pgtable_ops *ops = arm_smmu_get_pgtbl_ops(domain, iova);
>
> if (domain->type == IOMMU_DOMAIN_IDENTITY)
> return iova;
> @@ -1584,6 +1695,11 @@ static int arm_smmu_domain_get_attr(struct iommu_domain *domain,
> case DOMAIN_ATTR_NESTING:
> *(int *)data = (smmu_domain->stage == ARM_SMMU_DOMAIN_NESTED);
> return 0;
> + case DOMAIN_ATTR_SPLIT_TABLES:
> + *((int *)data) =
> + !!(smmu_domain->attributes &
> + (1 << DOMAIN_ATTR_SPLIT_TABLES));
> + return 0;
> default:
> return -ENODEV;
> }
> @@ -1624,6 +1740,11 @@ static int arm_smmu_domain_set_attr(struct iommu_domain *domain,
> else
> smmu_domain->stage = ARM_SMMU_DOMAIN_S1;
> break;
> + case DOMAIN_ATTR_SPLIT_TABLES:
> + if (*((int *)data))
> + smmu_domain->attributes |=
> + (1 << DOMAIN_ATTR_SPLIT_TABLES);
> + break;
> default:
> ret = -ENODEV;
> }
> diff --git a/drivers/iommu/io-pgtable-arm.c b/drivers/iommu/io-pgtable-arm.c
> index 237cacd..dc9fb2e 100644
> --- a/drivers/iommu/io-pgtable-arm.c
> +++ b/drivers/iommu/io-pgtable-arm.c
> @@ -475,8 +475,7 @@ static int arm_lpae_map(struct io_pgtable_ops *ops, unsigned long iova,
> if (!(iommu_prot & (IOMMU_READ | IOMMU_WRITE)))
> return 0;
>
> - if (WARN_ON(iova >= (1ULL << data->iop.cfg.ias) ||
> - paddr >= (1ULL << data->iop.cfg.oas)))
> + if (WARN_ON(paddr >= (1ULL << data->iop.cfg.oas)))
> return -ERANGE;
>
> prot = arm_lpae_prot_to_pte(data, iommu_prot);
> --
> 2.7.4
>
> _______________________________________________
> Freedreno mailing list
> Freedreno@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/freedreno
^ permalink raw reply [flat|nested] 46+ messages in thread* Re: [Freedreno] [RFC PATCH v1 02/15] iommu/arm-smmu: Add split pagetable support for arm-smmu-v2
@ 2019-03-01 20:25 ` Rob Clark
0 siblings, 0 replies; 46+ messages in thread
From: Rob Clark @ 2019-03-01 20:25 UTC (permalink / raw)
To: Jordan Crouse
Cc: Jean-Philippe Brucker, linux-arm-msm, Joerg Roedel, Will Deacon,
Linux Kernel Mailing List,
list@263.net:IOMMU DRIVERS <iommu@lists.linux-foundation.org>, Joerg Roedel <joro@8bytes.org>, ,
dianders, Kristian H. Kristensen, baolu.lu, freedreno,
moderated list:ARM/FREESCALE IMX / MXC ARM ARCHITECTURE,
Robin Murphy
On Fri, Mar 1, 2019 at 2:38 PM Jordan Crouse <jcrouse@codeaurora.org> wrote:
>
> Add support for a split pagetable (TTBR0/TTBR1) scheme for
> arm-smmu-v2. If split pagetables are enabled, create a
> pagetable for TTBR1 and set up the sign extension bit so
> that all IOVAs with that bit set are mapped and translated
> from the TTBR1 pagetable.
>
> Signed-off-by: Jordan Crouse <jcrouse@codeaurora.org>
> ---
>
> drivers/iommu/arm-smmu-regs.h | 18 +++++
> drivers/iommu/arm-smmu.c | 149 +++++++++++++++++++++++++++++++++++++----
> drivers/iommu/io-pgtable-arm.c | 3 +-
> 3 files changed, 154 insertions(+), 16 deletions(-)
>
> diff --git a/drivers/iommu/arm-smmu-regs.h b/drivers/iommu/arm-smmu-regs.h
> index a1226e4..56f9709 100644
> --- a/drivers/iommu/arm-smmu-regs.h
> +++ b/drivers/iommu/arm-smmu-regs.h
> @@ -193,7 +193,25 @@ enum arm_smmu_s2cr_privcfg {
> #define RESUME_RETRY (0 << 0)
> #define RESUME_TERMINATE (1 << 0)
>
> +#define TTBCR_EPD1 (1 << 23)
> +#define TTBCR_T1SZ_SHIFT 16
> +#define TTBCR_IRGN1_SHIFT 24
> +#define TTBCR_ORGN1_SHIFT 26
> +#define TTBCR_RGN_WBWA 1
> +#define TTBCR_SH1_SHIFT 28
> +#define TTBCR_SH_IS 3
> +
> +#define TTBCR_TG1_16K (1 << 30)
> +#define TTBCR_TG1_4K (2 << 30)
> +#define TTBCR_TG1_64K (3 << 30)
> +
> #define TTBCR2_SEP_SHIFT 15
> +#define TTBCR2_SEP_31 (0x0 << TTBCR2_SEP_SHIFT)
> +#define TTBCR2_SEP_35 (0x1 << TTBCR2_SEP_SHIFT)
> +#define TTBCR2_SEP_39 (0x2 << TTBCR2_SEP_SHIFT)
> +#define TTBCR2_SEP_41 (0x3 << TTBCR2_SEP_SHIFT)
> +#define TTBCR2_SEP_43 (0x4 << TTBCR2_SEP_SHIFT)
> +#define TTBCR2_SEP_47 (0x5 << TTBCR2_SEP_SHIFT)
> #define TTBCR2_SEP_UPSTREAM (0x7 << TTBCR2_SEP_SHIFT)
> #define TTBCR2_AS (1 << 4)
>
> diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c
> index af18a7e..05eb126 100644
> --- a/drivers/iommu/arm-smmu.c
> +++ b/drivers/iommu/arm-smmu.c
> @@ -151,6 +151,7 @@ struct arm_smmu_cb {
> u32 tcr[2];
> u32 mair[2];
> struct arm_smmu_cfg *cfg;
> + u64 split_table_mask;
> };
>
> struct arm_smmu_master_cfg {
> @@ -208,6 +209,7 @@ struct arm_smmu_device {
> unsigned long va_size;
> unsigned long ipa_size;
> unsigned long pa_size;
> + unsigned long ubs_size;
> unsigned long pgsize_bitmap;
>
> u32 num_global_irqs;
> @@ -252,13 +254,14 @@ enum arm_smmu_domain_stage {
>
> struct arm_smmu_domain {
> struct arm_smmu_device *smmu;
> - struct io_pgtable_ops *pgtbl_ops;
> + struct io_pgtable_ops *pgtbl_ops[2];
> const struct iommu_gather_ops *tlb_ops;
> struct arm_smmu_cfg cfg;
> enum arm_smmu_domain_stage stage;
> bool non_strict;
> struct mutex init_mutex; /* Protects smmu pointer */
> spinlock_t cb_lock; /* Serialises ATS1* ops and TLB syncs */
> + u32 attributes;
> struct iommu_domain domain;
> };
>
> @@ -618,6 +621,69 @@ static irqreturn_t arm_smmu_global_fault(int irq, void *dev)
> return IRQ_HANDLED;
> }
>
> +static void arm_smmu_init_ttbr1(struct arm_smmu_domain *smmu_domain,
> + struct io_pgtable_cfg *pgtbl_cfg)
> +{
> + struct arm_smmu_device *smmu = smmu_domain->smmu;
> + struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
> + struct arm_smmu_cb *cb = &smmu_domain->smmu->cbs[cfg->cbndx];
> + int pgsize = 1 << __ffs(pgtbl_cfg->pgsize_bitmap);
> +
> + /* Enable speculative walks through the TTBR1 */
> + cb->tcr[0] &= ~TTBCR_EPD1;
> +
> + cb->tcr[0] |= TTBCR_SH_IS << TTBCR_SH1_SHIFT;
> + cb->tcr[0] |= TTBCR_RGN_WBWA << TTBCR_IRGN1_SHIFT;
> + cb->tcr[0] |= TTBCR_RGN_WBWA << TTBCR_ORGN1_SHIFT;
> +
> + switch (pgsize) {
> + case SZ_4K:
> + cb->tcr[0] |= TTBCR_TG1_4K;
> + break;
> + case SZ_16K:
> + cb->tcr[0] |= TTBCR_TG1_16K;
> + break;
> + case SZ_64K:
> + cb->tcr[0] |= TTBCR_TG1_64K;
> + break;
> + }
> +
> + cb->tcr[0] |= (64ULL - smmu->va_size) << TTBCR_T1SZ_SHIFT;
> +
> + /* Clear the existing SEP configuration */
> + cb->tcr[1] &= ~TTBCR2_SEP_UPSTREAM;
> +
> + /* Set up the sign extend bit */
> + switch (smmu->va_size) {
> + case 32:
> + cb->tcr[1] |= TTBCR2_SEP_31;
> + cb->split_table_mask = (1ULL << 31);
> + break;
> + case 36:
> + cb->tcr[1] |= TTBCR2_SEP_35;
> + cb->split_table_mask = (1ULL << 35);
> + break;
> + case 40:
> + cb->tcr[1] |= TTBCR2_SEP_39;
> + cb->split_table_mask = (1ULL << 39);
> + break;
> + case 42:
> + cb->tcr[1] |= TTBCR2_SEP_41;
> + cb->split_table_mask = (1ULL << 41);
> + break;
> + case 44:
> + cb->tcr[1] |= TTBCR2_SEP_43;
> + cb->split_table_mask = (1ULL << 43);
> + break;
> + case 48:
> + cb->tcr[1] |= TTBCR2_SEP_UPSTREAM;
> + cb->split_table_mask = (1ULL << 48);
> + }
> +
> + cb->ttbr[1] = pgtbl_cfg->arm_lpae_s1_cfg.ttbr[0];
> + cb->ttbr[1] |= (u64)cfg->asid << TTBRn_ASID_SHIFT;
> +}
> +
> static void arm_smmu_init_context_bank(struct arm_smmu_domain *smmu_domain,
> struct io_pgtable_cfg *pgtbl_cfg)
> {
> @@ -650,8 +716,12 @@ static void arm_smmu_init_context_bank(struct arm_smmu_domain *smmu_domain,
> } else {
> cb->ttbr[0] = pgtbl_cfg->arm_lpae_s1_cfg.ttbr[0];
> cb->ttbr[0] |= (u64)cfg->asid << TTBRn_ASID_SHIFT;
> - cb->ttbr[1] = pgtbl_cfg->arm_lpae_s1_cfg.ttbr[1];
> - cb->ttbr[1] |= (u64)cfg->asid << TTBRn_ASID_SHIFT;
> +
> + /*
> + * Set TTBR1 to empty by default - it will get
> + * programmed later if it is enabled
> + */
> + cb->ttbr[1] = (u64)cfg->asid << TTBRn_ASID_SHIFT;
> }
> } else {
> cb->ttbr[0] = pgtbl_cfg->arm_lpae_s2_cfg.vttbr;
> @@ -760,11 +830,13 @@ static int arm_smmu_init_domain_context(struct iommu_domain *domain,
> {
> int irq, start, ret = 0;
> unsigned long ias, oas;
> - struct io_pgtable_ops *pgtbl_ops;
> + struct io_pgtable_ops *pgtbl_ops[2];
> struct io_pgtable_cfg pgtbl_cfg;
> enum io_pgtable_fmt fmt;
> struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
> struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
> + bool split_tables =
> + (smmu_domain->attributes & (1 << DOMAIN_ATTR_SPLIT_TABLES));
BIT(DOMAIN_ATTR_SPLIT_TABLES) ?
>
> mutex_lock(&smmu_domain->init_mutex);
> if (smmu_domain->smmu)
> @@ -794,8 +866,11 @@ static int arm_smmu_init_domain_context(struct iommu_domain *domain,
> *
> * Note that you can't actually request stage-2 mappings.
> */
> - if (!(smmu->features & ARM_SMMU_FEAT_TRANS_S1))
> + if (!(smmu->features & ARM_SMMU_FEAT_TRANS_S1)) {
> smmu_domain->stage = ARM_SMMU_DOMAIN_S2;
> + /* FIXME: fail instead? */
> + split_tables = false;
yeah, I think we want to return an error somewhere if not supported.
I think we want to fall back to not using per-process pagetables if
this fails.
BR,
-R
> + }
> if (!(smmu->features & ARM_SMMU_FEAT_TRANS_S2))
> smmu_domain->stage = ARM_SMMU_DOMAIN_S1;
>
> @@ -812,8 +887,11 @@ static int arm_smmu_init_domain_context(struct iommu_domain *domain,
> if (IS_ENABLED(CONFIG_IOMMU_IO_PGTABLE_ARMV7S) &&
> !IS_ENABLED(CONFIG_64BIT) && !IS_ENABLED(CONFIG_ARM_LPAE) &&
> (smmu->features & ARM_SMMU_FEAT_FMT_AARCH32_S) &&
> - (smmu_domain->stage == ARM_SMMU_DOMAIN_S1))
> + (smmu_domain->stage == ARM_SMMU_DOMAIN_S1)) {
> + /* FIXME: fail instead? */
> + split_tables = false;
> cfg->fmt = ARM_SMMU_CTX_FMT_AARCH32_S;
> + }
> if ((IS_ENABLED(CONFIG_64BIT) || cfg->fmt == ARM_SMMU_CTX_FMT_NONE) &&
> (smmu->features & (ARM_SMMU_FEAT_FMT_AARCH64_64K |
> ARM_SMMU_FEAT_FMT_AARCH64_16K |
> @@ -903,8 +981,8 @@ static int arm_smmu_init_domain_context(struct iommu_domain *domain,
> pgtbl_cfg.quirks |= IO_PGTABLE_QUIRK_NON_STRICT;
>
> smmu_domain->smmu = smmu;
> - pgtbl_ops = alloc_io_pgtable_ops(fmt, &pgtbl_cfg, smmu_domain);
> - if (!pgtbl_ops) {
> + pgtbl_ops[0] = alloc_io_pgtable_ops(fmt, &pgtbl_cfg, smmu_domain);
> + if (!pgtbl_ops[0]) {
> ret = -ENOMEM;
> goto out_clear_smmu;
> }
> @@ -916,6 +994,22 @@ static int arm_smmu_init_domain_context(struct iommu_domain *domain,
>
> /* Initialise the context bank with our page table cfg */
> arm_smmu_init_context_bank(smmu_domain, &pgtbl_cfg);
> +
> + pgtbl_ops[1] = NULL;
> +
> + if (split_tables) {
> + /* FIXME: I think it is safe to reuse pgtbl_cfg here */
> + pgtbl_ops[1] = alloc_io_pgtable_ops(fmt, &pgtbl_cfg,
> + smmu_domain);
> + if (!pgtbl_ops[1]) {
> + free_io_pgtable_ops(pgtbl_ops[0]);
> + ret = -ENOMEM;
> + goto out_clear_smmu;
> + }
> +
> + arm_smmu_init_ttbr1(smmu_domain, &pgtbl_cfg);
> + }
> +
> arm_smmu_write_context_bank(smmu, cfg->cbndx);
>
> /*
> @@ -934,7 +1028,9 @@ static int arm_smmu_init_domain_context(struct iommu_domain *domain,
> mutex_unlock(&smmu_domain->init_mutex);
>
> /* Publish page table ops for map/unmap */
> - smmu_domain->pgtbl_ops = pgtbl_ops;
> + smmu_domain->pgtbl_ops[0] = pgtbl_ops[0];
> + smmu_domain->pgtbl_ops[1] = pgtbl_ops[1];
> +
> return 0;
>
> out_clear_smmu:
> @@ -970,7 +1066,9 @@ static void arm_smmu_destroy_domain_context(struct iommu_domain *domain)
> devm_free_irq(smmu->dev, irq, domain);
> }
>
> - free_io_pgtable_ops(smmu_domain->pgtbl_ops);
> + free_io_pgtable_ops(smmu_domain->pgtbl_ops[0]);
> + free_io_pgtable_ops(smmu_domain->pgtbl_ops[1]);
> +
> __arm_smmu_free_bitmap(smmu->context_map, cfg->cbndx);
>
> arm_smmu_rpm_put(smmu);
> @@ -1285,10 +1383,23 @@ static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev)
> return ret;
> }
>
> +static struct io_pgtable_ops *
> +arm_smmu_get_pgtbl_ops(struct iommu_domain *domain, unsigned long iova)
> +{
> + struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
> + struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
> + struct arm_smmu_cb *cb = &smmu_domain->smmu->cbs[cfg->cbndx];
> +
> + if (iova & cb->split_table_mask)
> + return smmu_domain->pgtbl_ops[1];
> +
> + return smmu_domain->pgtbl_ops[0];
> +}
> +
> static int arm_smmu_map(struct iommu_domain *domain, unsigned long iova,
> phys_addr_t paddr, size_t size, int prot)
> {
> - struct io_pgtable_ops *ops = to_smmu_domain(domain)->pgtbl_ops;
> + struct io_pgtable_ops *ops = arm_smmu_get_pgtbl_ops(domain, iova);
> struct arm_smmu_device *smmu = to_smmu_domain(domain)->smmu;
> int ret;
>
> @@ -1305,7 +1416,7 @@ static int arm_smmu_map(struct iommu_domain *domain, unsigned long iova,
> static size_t arm_smmu_unmap(struct iommu_domain *domain, unsigned long iova,
> size_t size)
> {
> - struct io_pgtable_ops *ops = to_smmu_domain(domain)->pgtbl_ops;
> + struct io_pgtable_ops *ops = arm_smmu_get_pgtbl_ops(domain, iova);
> struct arm_smmu_device *smmu = to_smmu_domain(domain)->smmu;
> size_t ret;
>
> @@ -1349,7 +1460,7 @@ static phys_addr_t arm_smmu_iova_to_phys_hard(struct iommu_domain *domain,
> struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
> struct arm_smmu_device *smmu = smmu_domain->smmu;
> struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
> - struct io_pgtable_ops *ops= smmu_domain->pgtbl_ops;
> + struct io_pgtable_ops *ops = arm_smmu_get_pgtbl_ops(domain, iova);
> struct device *dev = smmu->dev;
> void __iomem *cb_base;
> u32 tmp;
> @@ -1397,7 +1508,7 @@ static phys_addr_t arm_smmu_iova_to_phys(struct iommu_domain *domain,
> dma_addr_t iova)
> {
> struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
> - struct io_pgtable_ops *ops = smmu_domain->pgtbl_ops;
> + struct io_pgtable_ops *ops = arm_smmu_get_pgtbl_ops(domain, iova);
>
> if (domain->type == IOMMU_DOMAIN_IDENTITY)
> return iova;
> @@ -1584,6 +1695,11 @@ static int arm_smmu_domain_get_attr(struct iommu_domain *domain,
> case DOMAIN_ATTR_NESTING:
> *(int *)data = (smmu_domain->stage == ARM_SMMU_DOMAIN_NESTED);
> return 0;
> + case DOMAIN_ATTR_SPLIT_TABLES:
> + *((int *)data) =
> + !!(smmu_domain->attributes &
> + (1 << DOMAIN_ATTR_SPLIT_TABLES));
> + return 0;
> default:
> return -ENODEV;
> }
> @@ -1624,6 +1740,11 @@ static int arm_smmu_domain_set_attr(struct iommu_domain *domain,
> else
> smmu_domain->stage = ARM_SMMU_DOMAIN_S1;
> break;
> + case DOMAIN_ATTR_SPLIT_TABLES:
> + if (*((int *)data))
> + smmu_domain->attributes |=
> + (1 << DOMAIN_ATTR_SPLIT_TABLES);
> + break;
> default:
> ret = -ENODEV;
> }
> diff --git a/drivers/iommu/io-pgtable-arm.c b/drivers/iommu/io-pgtable-arm.c
> index 237cacd..dc9fb2e 100644
> --- a/drivers/iommu/io-pgtable-arm.c
> +++ b/drivers/iommu/io-pgtable-arm.c
> @@ -475,8 +475,7 @@ static int arm_lpae_map(struct io_pgtable_ops *ops, unsigned long iova,
> if (!(iommu_prot & (IOMMU_READ | IOMMU_WRITE)))
> return 0;
>
> - if (WARN_ON(iova >= (1ULL << data->iop.cfg.ias) ||
> - paddr >= (1ULL << data->iop.cfg.oas)))
> + if (WARN_ON(paddr >= (1ULL << data->iop.cfg.oas)))
> return -ERANGE;
>
> prot = arm_lpae_prot_to_pte(data, iommu_prot);
> --
> 2.7.4
>
> _______________________________________________
> Freedreno mailing list
> Freedreno@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/freedreno
_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel
^ permalink raw reply [flat|nested] 46+ messages in thread
* [RFC PATCH v1 03/15] iommu/io-pgtable: Allow TLB operations to be optional
2019-03-01 19:38 ` Jordan Crouse
@ 2019-03-01 19:38 ` Jordan Crouse
-1 siblings, 0 replies; 46+ messages in thread
From: Jordan Crouse @ 2019-03-01 19:38 UTC (permalink / raw)
To: freedreno-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW
Cc: jean-philippe.brucker-5wv7dgnIgG8,
linux-arm-msm-u79uwXL29TY76Z2rM5mHXA, Joerg Roedel,
linux-kernel-u79uwXL29TY76Z2rM5mHXA,
iommu-cunTk1MwBs9QetFLy7KEm3xJsTq8ys+cHZ5vskTnxNA,
dianders-uWgjrcJnOmJ4cg9Nei1l7Q, hoegsberg-hpIqsD4AKlfQT0dZR+AlfA,
baolu.lu-VuQAYsv1563Yd54FQh9/CA
An upcoming change to arm-smmu will add auxiliary domains that will allow
a leaf driver to create and map additional pagetables for device
specific uses. By definition aux arm-smmu domains will not be allowed
to touch the hardware directly so allow for the TLB operations for
a given pagetable configuration to be NULL just in case the caller
accidentally calls for a flush with the wrong device.
Signed-off-by: Jordan Crouse <jcrouse@codeaurora.org>
---
drivers/iommu/io-pgtable.h | 10 +++++++---
1 file changed, 7 insertions(+), 3 deletions(-)
diff --git a/drivers/iommu/io-pgtable.h b/drivers/iommu/io-pgtable.h
index 47d5ae5..fbfd3c9 100644
--- a/drivers/iommu/io-pgtable.h
+++ b/drivers/iommu/io-pgtable.h
@@ -178,18 +178,22 @@ struct io_pgtable {
static inline void io_pgtable_tlb_flush_all(struct io_pgtable *iop)
{
- iop->cfg.tlb->tlb_flush_all(iop->cookie);
+ if (iop->cfg.tlb)
+ iop->cfg.tlb->tlb_flush_all(iop->cookie);
}
static inline void io_pgtable_tlb_add_flush(struct io_pgtable *iop,
unsigned long iova, size_t size, size_t granule, bool leaf)
{
- iop->cfg.tlb->tlb_add_flush(iova, size, granule, leaf, iop->cookie);
+ if (iop->cfg.tlb)
+ iop->cfg.tlb->tlb_add_flush(iova, size, granule, leaf,
+ iop->cookie);
}
static inline void io_pgtable_tlb_sync(struct io_pgtable *iop)
{
- iop->cfg.tlb->tlb_sync(iop->cookie);
+ if (iop->cfg.tlb)
+ iop->cfg.tlb->tlb_sync(iop->cookie);
}
/**
--
2.7.4
_______________________________________________
Freedreno mailing list
Freedreno@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/freedreno
^ permalink raw reply related [flat|nested] 46+ messages in thread* [RFC PATCH v1 03/15] iommu/io-pgtable: Allow TLB operations to be optional
@ 2019-03-01 19:38 ` Jordan Crouse
0 siblings, 0 replies; 46+ messages in thread
From: Jordan Crouse @ 2019-03-01 19:38 UTC (permalink / raw)
To: freedreno
Cc: jean-philippe.brucker, linux-arm-msm, dianders, hoegsberg,
baolu.lu, iommu, Joerg Roedel, linux-kernel
An upcoming change to arm-smmu will add auxiliary domains that will allow
a leaf driver to create and map additional pagetables for device
specific uses. By definition aux arm-smmu domains will not be allowed
to touch the hardware directly so allow for the TLB operations for
a given pagetable configuration to be NULL just in case the caller
accidentally calls for a flush with the wrong device.
Signed-off-by: Jordan Crouse <jcrouse@codeaurora.org>
---
drivers/iommu/io-pgtable.h | 10 +++++++---
1 file changed, 7 insertions(+), 3 deletions(-)
diff --git a/drivers/iommu/io-pgtable.h b/drivers/iommu/io-pgtable.h
index 47d5ae5..fbfd3c9 100644
--- a/drivers/iommu/io-pgtable.h
+++ b/drivers/iommu/io-pgtable.h
@@ -178,18 +178,22 @@ struct io_pgtable {
static inline void io_pgtable_tlb_flush_all(struct io_pgtable *iop)
{
- iop->cfg.tlb->tlb_flush_all(iop->cookie);
+ if (iop->cfg.tlb)
+ iop->cfg.tlb->tlb_flush_all(iop->cookie);
}
static inline void io_pgtable_tlb_add_flush(struct io_pgtable *iop,
unsigned long iova, size_t size, size_t granule, bool leaf)
{
- iop->cfg.tlb->tlb_add_flush(iova, size, granule, leaf, iop->cookie);
+ if (iop->cfg.tlb)
+ iop->cfg.tlb->tlb_add_flush(iova, size, granule, leaf,
+ iop->cookie);
}
static inline void io_pgtable_tlb_sync(struct io_pgtable *iop)
{
- iop->cfg.tlb->tlb_sync(iop->cookie);
+ if (iop->cfg.tlb)
+ iop->cfg.tlb->tlb_sync(iop->cookie);
}
/**
--
2.7.4
^ permalink raw reply related [flat|nested] 46+ messages in thread
* [RFC PATCH v1 04/15] iommu: Add DOMAIN_ATTR_PTBASE
2019-03-01 19:38 ` Jordan Crouse
@ 2019-03-01 19:38 ` Jordan Crouse
-1 siblings, 0 replies; 46+ messages in thread
From: Jordan Crouse @ 2019-03-01 19:38 UTC (permalink / raw)
To: freedreno-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW
Cc: jean-philippe.brucker-5wv7dgnIgG8,
linux-arm-msm-u79uwXL29TY76Z2rM5mHXA, Joerg Roedel,
linux-kernel-u79uwXL29TY76Z2rM5mHXA,
iommu-cunTk1MwBs9QetFLy7KEm3xJsTq8ys+cHZ5vskTnxNA,
dianders-uWgjrcJnOmJ4cg9Nei1l7Q, hoegsberg-hpIqsD4AKlfQT0dZR+AlfA,
baolu.lu-VuQAYsv1563Yd54FQh9/CA
Add an attribute to return the base address of the pagetable. This is used
by auxiliary domains from arm-smmu to return the address of the pagetable
to the leaf driver so that it can set the appropriate pagetable through
it's own means.
Signed-off-by: Jordan Crouse <jcrouse@codeaurora.org>
---
include/linux/iommu.h | 1 +
1 file changed, 1 insertion(+)
diff --git a/include/linux/iommu.h b/include/linux/iommu.h
index 3f2250b..dc60a71 100644
--- a/include/linux/iommu.h
+++ b/include/linux/iommu.h
@@ -126,6 +126,7 @@ enum iommu_attr {
DOMAIN_ATTR_NESTING, /* two stages of translation */
DOMAIN_ATTR_DMA_USE_FLUSH_QUEUE,
DOMAIN_ATTR_SPLIT_TABLES,
+ DOMAIN_ATTR_PTBASE,
DOMAIN_ATTR_MAX,
};
--
2.7.4
_______________________________________________
Freedreno mailing list
Freedreno@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/freedreno
^ permalink raw reply related [flat|nested] 46+ messages in thread* [RFC PATCH v1 04/15] iommu: Add DOMAIN_ATTR_PTBASE
@ 2019-03-01 19:38 ` Jordan Crouse
0 siblings, 0 replies; 46+ messages in thread
From: Jordan Crouse @ 2019-03-01 19:38 UTC (permalink / raw)
To: freedreno
Cc: jean-philippe.brucker, linux-arm-msm, dianders, hoegsberg,
baolu.lu, iommu, Joerg Roedel, linux-kernel
Add an attribute to return the base address of the pagetable. This is used
by auxiliary domains from arm-smmu to return the address of the pagetable
to the leaf driver so that it can set the appropriate pagetable through
it's own means.
Signed-off-by: Jordan Crouse <jcrouse@codeaurora.org>
---
include/linux/iommu.h | 1 +
1 file changed, 1 insertion(+)
diff --git a/include/linux/iommu.h b/include/linux/iommu.h
index 3f2250b..dc60a71 100644
--- a/include/linux/iommu.h
+++ b/include/linux/iommu.h
@@ -126,6 +126,7 @@ enum iommu_attr {
DOMAIN_ATTR_NESTING, /* two stages of translation */
DOMAIN_ATTR_DMA_USE_FLUSH_QUEUE,
DOMAIN_ATTR_SPLIT_TABLES,
+ DOMAIN_ATTR_PTBASE,
DOMAIN_ATTR_MAX,
};
--
2.7.4
^ permalink raw reply related [flat|nested] 46+ messages in thread* Re: [RFC PATCH v1 04/15] iommu: Add DOMAIN_ATTR_PTBASE
2019-03-01 19:38 ` Jordan Crouse
(?)
@ 2019-03-18 9:53 ` Joerg Roedel
[not found] ` <20190318095321.GA5417-zLv9SwRftAIdnm+yROfE0A@public.gmane.org>
-1 siblings, 1 reply; 46+ messages in thread
From: Joerg Roedel @ 2019-03-18 9:53 UTC (permalink / raw)
To: Jordan Crouse
Cc: freedreno, jean-philippe.brucker, linux-arm-msm, dianders,
hoegsberg, baolu.lu, iommu, linux-kernel
On Fri, Mar 01, 2019 at 12:38:26PM -0700, Jordan Crouse wrote:
> Add an attribute to return the base address of the pagetable. This is used
> by auxiliary domains from arm-smmu to return the address of the pagetable
> to the leaf driver so that it can set the appropriate pagetable through
> it's own means.
What is this going to be used for? Page-table management is supposed to
happen in the arm-smmu driver and the gpu driver only makes changes
through iommu_map/iommu_unmap calls.
Regards,
Joerg
^ permalink raw reply [flat|nested] 46+ messages in thread
* [RFC PATCH v1 05/15] iommu/arm-smmu: Add auxiliary domain support for arm-smmuv2
2019-03-01 19:38 ` Jordan Crouse
(?)
@ 2019-03-01 19:38 ` Jordan Crouse
-1 siblings, 0 replies; 46+ messages in thread
From: Jordan Crouse @ 2019-03-01 19:38 UTC (permalink / raw)
To: freedreno-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW
Cc: jean-philippe.brucker-5wv7dgnIgG8,
linux-arm-msm-u79uwXL29TY76Z2rM5mHXA, Joerg Roedel, Will Deacon,
linux-kernel-u79uwXL29TY76Z2rM5mHXA,
iommu-cunTk1MwBs9QetFLy7KEm3xJsTq8ys+cHZ5vskTnxNA,
dianders-uWgjrcJnOmJ4cg9Nei1l7Q, hoegsberg-hpIqsD4AKlfQT0dZR+AlfA,
Robin Murphy, linux-arm-kernel-IAPFreCvJWM7uuMidbF8XUB+6BGkLq7r,
baolu.lu-VuQAYsv1563Yd54FQh9/CA
Support the new auxiliary domain API for arm-smmuv2 to initialize and
support multiple pagetables for a SMMU device. Since the smmu-v2 hardware
doesn't have any built in support for switching the pagetable base it is
left as an exercise to the caller to actually use the pagetable; aux
domains in the IOMMU driver are only preoccupied with creating and managing
the pagetable memory.
Following is a pseudo code example of how a domain can be created
/* Check to see if aux domains are supported */
if (iommu_dev_has_feature(dev, IOMMU_DEV_FEAT_AUX)) {
iommu = iommu_domain_alloc(...);
if (iommu_aux_attach_device(domain, dev))
return FAIL;
/* Save the base address of the pagetable for use by the driver
iommu_domain_get_attr(domain, DOMAIN_ATTR_PTBASE, &ptbase);
}
After this 'domain' can be used like any other iommu domain to map and
unmap iova addresses in the pagetable. The driver/hardware can be used
to switch the pagetable according to its own specific implementation.
Signed-off-by: Jordan Crouse <jcrouse@codeaurora.org>
---
drivers/iommu/arm-smmu.c | 135 ++++++++++++++++++++++++++++++++++++++---------
1 file changed, 111 insertions(+), 24 deletions(-)
diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c
index 05eb126..b7b508e 100644
--- a/drivers/iommu/arm-smmu.c
+++ b/drivers/iommu/arm-smmu.c
@@ -263,6 +263,8 @@ struct arm_smmu_domain {
spinlock_t cb_lock; /* Serialises ATS1* ops and TLB syncs */
u32 attributes;
struct iommu_domain domain;
+ bool is_aux;
+ u64 ttbr0;
};
struct arm_smmu_option_prop {
@@ -874,6 +876,12 @@ static int arm_smmu_init_domain_context(struct iommu_domain *domain,
if (!(smmu->features & ARM_SMMU_FEAT_TRANS_S2))
smmu_domain->stage = ARM_SMMU_DOMAIN_S1;
+ /* Aux domains can only be created for stage-1 tables */
+ if (smmu_domain->is_aux && smmu_domain->stage != ARM_SMMU_DOMAIN_S1) {
+ ret = -EINVAL;
+ goto out_unlock;
+ }
+
/*
* Choosing a suitable context format is even more fiddly. Until we
* grow some way for the caller to express a preference, and/or move
@@ -920,7 +928,10 @@ static int arm_smmu_init_domain_context(struct iommu_domain *domain,
ias = min(ias, 32UL);
oas = min(oas, 32UL);
}
- smmu_domain->tlb_ops = &arm_smmu_s1_tlb_ops;
+
+ /* aux domains shouldn't touch hardware so no TLB ops */
+ if (!smmu_domain->is_aux)
+ smmu_domain->tlb_ops = &arm_smmu_s1_tlb_ops;
break;
case ARM_SMMU_DOMAIN_NESTED:
/*
@@ -939,32 +950,42 @@ static int arm_smmu_init_domain_context(struct iommu_domain *domain,
ias = min(ias, 40UL);
oas = min(oas, 40UL);
}
- if (smmu->version == ARM_SMMU_V2)
- smmu_domain->tlb_ops = &arm_smmu_s2_tlb_ops_v2;
- else
- smmu_domain->tlb_ops = &arm_smmu_s2_tlb_ops_v1;
+
+ if (!smmu_domain->is_aux) {
+ if (smmu->version == ARM_SMMU_V2)
+ smmu_domain->tlb_ops = &arm_smmu_s2_tlb_ops_v2;
+ else
+ smmu_domain->tlb_ops = &arm_smmu_s2_tlb_ops_v1;
+ }
break;
default:
ret = -EINVAL;
goto out_unlock;
}
- ret = __arm_smmu_alloc_bitmap(smmu->context_map, start,
- smmu->num_context_banks);
- if (ret < 0)
- goto out_unlock;
- cfg->cbndx = ret;
- if (smmu->version < ARM_SMMU_V2) {
- cfg->irptndx = atomic_inc_return(&smmu->irptndx);
- cfg->irptndx %= smmu->num_context_irqs;
- } else {
- cfg->irptndx = cfg->cbndx;
- }
+ /*
+ * Aux domains will use the same context bank assigned to the master
+ * domain for the device
+ */
+ if (!smmu_domain->is_aux) {
+ ret = __arm_smmu_alloc_bitmap(smmu->context_map, start,
+ smmu->num_context_banks);
+ if (ret < 0)
+ goto out_unlock;
- if (smmu_domain->stage == ARM_SMMU_DOMAIN_S2)
- cfg->vmid = cfg->cbndx + 1 + smmu->cavium_id_base;
- else
- cfg->asid = cfg->cbndx + smmu->cavium_id_base;
+ cfg->cbndx = ret;
+ if (smmu->version < ARM_SMMU_V2) {
+ cfg->irptndx = atomic_inc_return(&smmu->irptndx);
+ cfg->irptndx %= smmu->num_context_irqs;
+ } else {
+ cfg->irptndx = cfg->cbndx;
+ }
+
+ if (smmu_domain->stage == ARM_SMMU_DOMAIN_S2)
+ cfg->vmid = cfg->cbndx + 1 + smmu->cavium_id_base;
+ else
+ cfg->asid = cfg->cbndx + smmu->cavium_id_base;
+ }
pgtbl_cfg = (struct io_pgtable_cfg) {
.pgsize_bitmap = smmu->pgsize_bitmap,
@@ -987,16 +1008,26 @@ static int arm_smmu_init_domain_context(struct iommu_domain *domain,
goto out_clear_smmu;
}
+ /* Cache the TTBR0 for the aux domain */
+ smmu_domain->ttbr0 = pgtbl_cfg.arm_lpae_s1_cfg.ttbr[0];
+
/* Update the domain's page sizes to reflect the page table format */
domain->pgsize_bitmap = pgtbl_cfg.pgsize_bitmap;
domain->geometry.aperture_end = (1UL << ias) - 1;
domain->geometry.force_aperture = true;
+ pgtbl_ops[1] = NULL;
+
+ /*
+ * aux domains don't use split tables or program the hardware so we're
+ * done setting it up
+ */
+ if (smmu_domain->is_aux)
+ goto end;
+
/* Initialise the context bank with our page table cfg */
arm_smmu_init_context_bank(smmu_domain, &pgtbl_cfg);
- pgtbl_ops[1] = NULL;
-
if (split_tables) {
/* FIXME: I think it is safe to reuse pgtbl_cfg here */
pgtbl_ops[1] = alloc_io_pgtable_ops(fmt, &pgtbl_cfg,
@@ -1018,13 +1049,15 @@ static int arm_smmu_init_domain_context(struct iommu_domain *domain,
*/
irq = smmu->irqs[smmu->num_global_irqs + cfg->irptndx];
ret = devm_request_irq(smmu->dev, irq, arm_smmu_context_fault,
- IRQF_SHARED, "arm-smmu-context-fault", domain);
+ IRQF_SHARED, "arm-smmu-context-fault",
+ domain);
if (ret < 0) {
dev_err(smmu->dev, "failed to request context IRQ %d (%u)\n",
cfg->irptndx, irq);
cfg->irptndx = INVALID_IRPTNDX;
}
+end:
mutex_unlock(&smmu_domain->init_mutex);
/* Publish page table ops for map/unmap */
@@ -1050,6 +1083,12 @@ static void arm_smmu_destroy_domain_context(struct iommu_domain *domain)
if (!smmu || domain->type == IOMMU_DOMAIN_IDENTITY)
return;
+ /* All we need to do for aux devices is destroy the pagetable */
+ if (smmu_domain->is_aux) {
+ free_io_pgtable_ops(smmu_domain->pgtbl_ops[0]);
+ return;
+ }
+
ret = arm_smmu_rpm_get(smmu);
if (ret < 0)
return;
@@ -1330,6 +1369,39 @@ static int arm_smmu_domain_add_master(struct arm_smmu_domain *smmu_domain,
return 0;
}
+static bool arm_smmu_dev_has_feat(struct device *dev,
+ enum iommu_dev_features feat)
+{
+ /*
+ * FIXME: Should we do some hardware checking here, like to be sure this
+ * is a stage 1 and such?
+ */
+
+ /* Always allow aux domains */
+ if (feat == IOMMU_DEV_FEAT_AUX)
+ return true;
+
+ return false;
+}
+
+/* FIXME: Add stubs for dev_enable_feat and dev_disable_feat? */
+
+/* Set up a new aux domain and create a new pagetable with the same
+ * characteristics as the master
+ */
+static int arm_smmu_aux_attach_dev(struct iommu_domain *domain,
+ struct device *dev)
+{
+ struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
+ struct arm_smmu_device *smmu = fwspec_smmu(fwspec);
+ struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
+
+ smmu_domain->is_aux = true;
+
+ /* No power is needed because aux domain doesn't touch the hardware */
+ return arm_smmu_init_domain_context(domain, smmu);
+}
+
static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev)
{
int ret;
@@ -1342,6 +1414,8 @@ static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev)
return -ENXIO;
}
+ /* FIXME: Reject unmanged domains since those should be aux? */
+
/*
* FIXME: The arch/arm DMA API code tries to attach devices to its own
* domains between of_xlate() and add_device() - we have no way to cope
@@ -1388,7 +1462,13 @@ arm_smmu_get_pgtbl_ops(struct iommu_domain *domain, unsigned long iova)
{
struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
- struct arm_smmu_cb *cb = &smmu_domain->smmu->cbs[cfg->cbndx];
+ struct arm_smmu_cb *cb;
+
+ /* quick escape for domains that don't have split pagetables enabled */
+ if (!smmu_domain->pgtbl_ops[1])
+ return smmu_domain->pgtbl_ops[0];
+
+ cb = &smmu_domain->smmu->cbs[cfg->cbndx];
if (iova & cb->split_table_mask)
return smmu_domain->pgtbl_ops[1];
@@ -1700,6 +1780,11 @@ static int arm_smmu_domain_get_attr(struct iommu_domain *domain,
!!(smmu_domain->attributes &
(1 << DOMAIN_ATTR_SPLIT_TABLES));
return 0;
+ case DOMAIN_ATTR_PTBASE:
+ if (!smmu_domain->is_aux)
+ return -ENODEV;
+ *((u64 *)data) = smmu_domain->ttbr0;
+ return 0;
default:
return -ENODEV;
}
@@ -1810,7 +1895,9 @@ static struct iommu_ops arm_smmu_ops = {
.capable = arm_smmu_capable,
.domain_alloc = arm_smmu_domain_alloc,
.domain_free = arm_smmu_domain_free,
+ .dev_has_feat = arm_smmu_dev_has_feat,
.attach_dev = arm_smmu_attach_dev,
+ .aux_attach_dev = arm_smmu_aux_attach_dev,
.map = arm_smmu_map,
.unmap = arm_smmu_unmap,
.flush_iotlb_all = arm_smmu_flush_iotlb_all,
--
2.7.4
_______________________________________________
Freedreno mailing list
Freedreno@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/freedreno
^ permalink raw reply related [flat|nested] 46+ messages in thread* [RFC PATCH v1 05/15] iommu/arm-smmu: Add auxiliary domain support for arm-smmuv2
@ 2019-03-01 19:38 ` Jordan Crouse
0 siblings, 0 replies; 46+ messages in thread
From: Jordan Crouse @ 2019-03-01 19:38 UTC (permalink / raw)
To: freedreno
Cc: jean-philippe.brucker, linux-arm-msm, dianders, hoegsberg,
baolu.lu, linux-kernel, iommu, Robin Murphy, Will Deacon,
Joerg Roedel, linux-arm-kernel
Support the new auxiliary domain API for arm-smmuv2 to initialize and
support multiple pagetables for a SMMU device. Since the smmu-v2 hardware
doesn't have any built in support for switching the pagetable base it is
left as an exercise to the caller to actually use the pagetable; aux
domains in the IOMMU driver are only preoccupied with creating and managing
the pagetable memory.
Following is a pseudo code example of how a domain can be created
/* Check to see if aux domains are supported */
if (iommu_dev_has_feature(dev, IOMMU_DEV_FEAT_AUX)) {
iommu = iommu_domain_alloc(...);
if (iommu_aux_attach_device(domain, dev))
return FAIL;
/* Save the base address of the pagetable for use by the driver
iommu_domain_get_attr(domain, DOMAIN_ATTR_PTBASE, &ptbase);
}
After this 'domain' can be used like any other iommu domain to map and
unmap iova addresses in the pagetable. The driver/hardware can be used
to switch the pagetable according to its own specific implementation.
Signed-off-by: Jordan Crouse <jcrouse@codeaurora.org>
---
drivers/iommu/arm-smmu.c | 135 ++++++++++++++++++++++++++++++++++++++---------
1 file changed, 111 insertions(+), 24 deletions(-)
diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c
index 05eb126..b7b508e 100644
--- a/drivers/iommu/arm-smmu.c
+++ b/drivers/iommu/arm-smmu.c
@@ -263,6 +263,8 @@ struct arm_smmu_domain {
spinlock_t cb_lock; /* Serialises ATS1* ops and TLB syncs */
u32 attributes;
struct iommu_domain domain;
+ bool is_aux;
+ u64 ttbr0;
};
struct arm_smmu_option_prop {
@@ -874,6 +876,12 @@ static int arm_smmu_init_domain_context(struct iommu_domain *domain,
if (!(smmu->features & ARM_SMMU_FEAT_TRANS_S2))
smmu_domain->stage = ARM_SMMU_DOMAIN_S1;
+ /* Aux domains can only be created for stage-1 tables */
+ if (smmu_domain->is_aux && smmu_domain->stage != ARM_SMMU_DOMAIN_S1) {
+ ret = -EINVAL;
+ goto out_unlock;
+ }
+
/*
* Choosing a suitable context format is even more fiddly. Until we
* grow some way for the caller to express a preference, and/or move
@@ -920,7 +928,10 @@ static int arm_smmu_init_domain_context(struct iommu_domain *domain,
ias = min(ias, 32UL);
oas = min(oas, 32UL);
}
- smmu_domain->tlb_ops = &arm_smmu_s1_tlb_ops;
+
+ /* aux domains shouldn't touch hardware so no TLB ops */
+ if (!smmu_domain->is_aux)
+ smmu_domain->tlb_ops = &arm_smmu_s1_tlb_ops;
break;
case ARM_SMMU_DOMAIN_NESTED:
/*
@@ -939,32 +950,42 @@ static int arm_smmu_init_domain_context(struct iommu_domain *domain,
ias = min(ias, 40UL);
oas = min(oas, 40UL);
}
- if (smmu->version == ARM_SMMU_V2)
- smmu_domain->tlb_ops = &arm_smmu_s2_tlb_ops_v2;
- else
- smmu_domain->tlb_ops = &arm_smmu_s2_tlb_ops_v1;
+
+ if (!smmu_domain->is_aux) {
+ if (smmu->version == ARM_SMMU_V2)
+ smmu_domain->tlb_ops = &arm_smmu_s2_tlb_ops_v2;
+ else
+ smmu_domain->tlb_ops = &arm_smmu_s2_tlb_ops_v1;
+ }
break;
default:
ret = -EINVAL;
goto out_unlock;
}
- ret = __arm_smmu_alloc_bitmap(smmu->context_map, start,
- smmu->num_context_banks);
- if (ret < 0)
- goto out_unlock;
- cfg->cbndx = ret;
- if (smmu->version < ARM_SMMU_V2) {
- cfg->irptndx = atomic_inc_return(&smmu->irptndx);
- cfg->irptndx %= smmu->num_context_irqs;
- } else {
- cfg->irptndx = cfg->cbndx;
- }
+ /*
+ * Aux domains will use the same context bank assigned to the master
+ * domain for the device
+ */
+ if (!smmu_domain->is_aux) {
+ ret = __arm_smmu_alloc_bitmap(smmu->context_map, start,
+ smmu->num_context_banks);
+ if (ret < 0)
+ goto out_unlock;
- if (smmu_domain->stage == ARM_SMMU_DOMAIN_S2)
- cfg->vmid = cfg->cbndx + 1 + smmu->cavium_id_base;
- else
- cfg->asid = cfg->cbndx + smmu->cavium_id_base;
+ cfg->cbndx = ret;
+ if (smmu->version < ARM_SMMU_V2) {
+ cfg->irptndx = atomic_inc_return(&smmu->irptndx);
+ cfg->irptndx %= smmu->num_context_irqs;
+ } else {
+ cfg->irptndx = cfg->cbndx;
+ }
+
+ if (smmu_domain->stage == ARM_SMMU_DOMAIN_S2)
+ cfg->vmid = cfg->cbndx + 1 + smmu->cavium_id_base;
+ else
+ cfg->asid = cfg->cbndx + smmu->cavium_id_base;
+ }
pgtbl_cfg = (struct io_pgtable_cfg) {
.pgsize_bitmap = smmu->pgsize_bitmap,
@@ -987,16 +1008,26 @@ static int arm_smmu_init_domain_context(struct iommu_domain *domain,
goto out_clear_smmu;
}
+ /* Cache the TTBR0 for the aux domain */
+ smmu_domain->ttbr0 = pgtbl_cfg.arm_lpae_s1_cfg.ttbr[0];
+
/* Update the domain's page sizes to reflect the page table format */
domain->pgsize_bitmap = pgtbl_cfg.pgsize_bitmap;
domain->geometry.aperture_end = (1UL << ias) - 1;
domain->geometry.force_aperture = true;
+ pgtbl_ops[1] = NULL;
+
+ /*
+ * aux domains don't use split tables or program the hardware so we're
+ * done setting it up
+ */
+ if (smmu_domain->is_aux)
+ goto end;
+
/* Initialise the context bank with our page table cfg */
arm_smmu_init_context_bank(smmu_domain, &pgtbl_cfg);
- pgtbl_ops[1] = NULL;
-
if (split_tables) {
/* FIXME: I think it is safe to reuse pgtbl_cfg here */
pgtbl_ops[1] = alloc_io_pgtable_ops(fmt, &pgtbl_cfg,
@@ -1018,13 +1049,15 @@ static int arm_smmu_init_domain_context(struct iommu_domain *domain,
*/
irq = smmu->irqs[smmu->num_global_irqs + cfg->irptndx];
ret = devm_request_irq(smmu->dev, irq, arm_smmu_context_fault,
- IRQF_SHARED, "arm-smmu-context-fault", domain);
+ IRQF_SHARED, "arm-smmu-context-fault",
+ domain);
if (ret < 0) {
dev_err(smmu->dev, "failed to request context IRQ %d (%u)\n",
cfg->irptndx, irq);
cfg->irptndx = INVALID_IRPTNDX;
}
+end:
mutex_unlock(&smmu_domain->init_mutex);
/* Publish page table ops for map/unmap */
@@ -1050,6 +1083,12 @@ static void arm_smmu_destroy_domain_context(struct iommu_domain *domain)
if (!smmu || domain->type == IOMMU_DOMAIN_IDENTITY)
return;
+ /* All we need to do for aux devices is destroy the pagetable */
+ if (smmu_domain->is_aux) {
+ free_io_pgtable_ops(smmu_domain->pgtbl_ops[0]);
+ return;
+ }
+
ret = arm_smmu_rpm_get(smmu);
if (ret < 0)
return;
@@ -1330,6 +1369,39 @@ static int arm_smmu_domain_add_master(struct arm_smmu_domain *smmu_domain,
return 0;
}
+static bool arm_smmu_dev_has_feat(struct device *dev,
+ enum iommu_dev_features feat)
+{
+ /*
+ * FIXME: Should we do some hardware checking here, like to be sure this
+ * is a stage 1 and such?
+ */
+
+ /* Always allow aux domains */
+ if (feat == IOMMU_DEV_FEAT_AUX)
+ return true;
+
+ return false;
+}
+
+/* FIXME: Add stubs for dev_enable_feat and dev_disable_feat? */
+
+/* Set up a new aux domain and create a new pagetable with the same
+ * characteristics as the master
+ */
+static int arm_smmu_aux_attach_dev(struct iommu_domain *domain,
+ struct device *dev)
+{
+ struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
+ struct arm_smmu_device *smmu = fwspec_smmu(fwspec);
+ struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
+
+ smmu_domain->is_aux = true;
+
+ /* No power is needed because aux domain doesn't touch the hardware */
+ return arm_smmu_init_domain_context(domain, smmu);
+}
+
static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev)
{
int ret;
@@ -1342,6 +1414,8 @@ static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev)
return -ENXIO;
}
+ /* FIXME: Reject unmanged domains since those should be aux? */
+
/*
* FIXME: The arch/arm DMA API code tries to attach devices to its own
* domains between of_xlate() and add_device() - we have no way to cope
@@ -1388,7 +1462,13 @@ arm_smmu_get_pgtbl_ops(struct iommu_domain *domain, unsigned long iova)
{
struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
- struct arm_smmu_cb *cb = &smmu_domain->smmu->cbs[cfg->cbndx];
+ struct arm_smmu_cb *cb;
+
+ /* quick escape for domains that don't have split pagetables enabled */
+ if (!smmu_domain->pgtbl_ops[1])
+ return smmu_domain->pgtbl_ops[0];
+
+ cb = &smmu_domain->smmu->cbs[cfg->cbndx];
if (iova & cb->split_table_mask)
return smmu_domain->pgtbl_ops[1];
@@ -1700,6 +1780,11 @@ static int arm_smmu_domain_get_attr(struct iommu_domain *domain,
!!(smmu_domain->attributes &
(1 << DOMAIN_ATTR_SPLIT_TABLES));
return 0;
+ case DOMAIN_ATTR_PTBASE:
+ if (!smmu_domain->is_aux)
+ return -ENODEV;
+ *((u64 *)data) = smmu_domain->ttbr0;
+ return 0;
default:
return -ENODEV;
}
@@ -1810,7 +1895,9 @@ static struct iommu_ops arm_smmu_ops = {
.capable = arm_smmu_capable,
.domain_alloc = arm_smmu_domain_alloc,
.domain_free = arm_smmu_domain_free,
+ .dev_has_feat = arm_smmu_dev_has_feat,
.attach_dev = arm_smmu_attach_dev,
+ .aux_attach_dev = arm_smmu_aux_attach_dev,
.map = arm_smmu_map,
.unmap = arm_smmu_unmap,
.flush_iotlb_all = arm_smmu_flush_iotlb_all,
--
2.7.4
^ permalink raw reply related [flat|nested] 46+ messages in thread* [RFC PATCH v1 05/15] iommu/arm-smmu: Add auxiliary domain support for arm-smmuv2
@ 2019-03-01 19:38 ` Jordan Crouse
0 siblings, 0 replies; 46+ messages in thread
From: Jordan Crouse @ 2019-03-01 19:38 UTC (permalink / raw)
To: freedreno
Cc: jean-philippe.brucker, linux-arm-msm, Joerg Roedel, Will Deacon,
linux-kernel, iommu, dianders, hoegsberg, Robin Murphy,
linux-arm-kernel, baolu.lu
Support the new auxiliary domain API for arm-smmuv2 to initialize and
support multiple pagetables for a SMMU device. Since the smmu-v2 hardware
doesn't have any built in support for switching the pagetable base it is
left as an exercise to the caller to actually use the pagetable; aux
domains in the IOMMU driver are only preoccupied with creating and managing
the pagetable memory.
Following is a pseudo code example of how a domain can be created
/* Check to see if aux domains are supported */
if (iommu_dev_has_feature(dev, IOMMU_DEV_FEAT_AUX)) {
iommu = iommu_domain_alloc(...);
if (iommu_aux_attach_device(domain, dev))
return FAIL;
/* Save the base address of the pagetable for use by the driver
iommu_domain_get_attr(domain, DOMAIN_ATTR_PTBASE, &ptbase);
}
After this 'domain' can be used like any other iommu domain to map and
unmap iova addresses in the pagetable. The driver/hardware can be used
to switch the pagetable according to its own specific implementation.
Signed-off-by: Jordan Crouse <jcrouse@codeaurora.org>
---
drivers/iommu/arm-smmu.c | 135 ++++++++++++++++++++++++++++++++++++++---------
1 file changed, 111 insertions(+), 24 deletions(-)
diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c
index 05eb126..b7b508e 100644
--- a/drivers/iommu/arm-smmu.c
+++ b/drivers/iommu/arm-smmu.c
@@ -263,6 +263,8 @@ struct arm_smmu_domain {
spinlock_t cb_lock; /* Serialises ATS1* ops and TLB syncs */
u32 attributes;
struct iommu_domain domain;
+ bool is_aux;
+ u64 ttbr0;
};
struct arm_smmu_option_prop {
@@ -874,6 +876,12 @@ static int arm_smmu_init_domain_context(struct iommu_domain *domain,
if (!(smmu->features & ARM_SMMU_FEAT_TRANS_S2))
smmu_domain->stage = ARM_SMMU_DOMAIN_S1;
+ /* Aux domains can only be created for stage-1 tables */
+ if (smmu_domain->is_aux && smmu_domain->stage != ARM_SMMU_DOMAIN_S1) {
+ ret = -EINVAL;
+ goto out_unlock;
+ }
+
/*
* Choosing a suitable context format is even more fiddly. Until we
* grow some way for the caller to express a preference, and/or move
@@ -920,7 +928,10 @@ static int arm_smmu_init_domain_context(struct iommu_domain *domain,
ias = min(ias, 32UL);
oas = min(oas, 32UL);
}
- smmu_domain->tlb_ops = &arm_smmu_s1_tlb_ops;
+
+ /* aux domains shouldn't touch hardware so no TLB ops */
+ if (!smmu_domain->is_aux)
+ smmu_domain->tlb_ops = &arm_smmu_s1_tlb_ops;
break;
case ARM_SMMU_DOMAIN_NESTED:
/*
@@ -939,32 +950,42 @@ static int arm_smmu_init_domain_context(struct iommu_domain *domain,
ias = min(ias, 40UL);
oas = min(oas, 40UL);
}
- if (smmu->version == ARM_SMMU_V2)
- smmu_domain->tlb_ops = &arm_smmu_s2_tlb_ops_v2;
- else
- smmu_domain->tlb_ops = &arm_smmu_s2_tlb_ops_v1;
+
+ if (!smmu_domain->is_aux) {
+ if (smmu->version == ARM_SMMU_V2)
+ smmu_domain->tlb_ops = &arm_smmu_s2_tlb_ops_v2;
+ else
+ smmu_domain->tlb_ops = &arm_smmu_s2_tlb_ops_v1;
+ }
break;
default:
ret = -EINVAL;
goto out_unlock;
}
- ret = __arm_smmu_alloc_bitmap(smmu->context_map, start,
- smmu->num_context_banks);
- if (ret < 0)
- goto out_unlock;
- cfg->cbndx = ret;
- if (smmu->version < ARM_SMMU_V2) {
- cfg->irptndx = atomic_inc_return(&smmu->irptndx);
- cfg->irptndx %= smmu->num_context_irqs;
- } else {
- cfg->irptndx = cfg->cbndx;
- }
+ /*
+ * Aux domains will use the same context bank assigned to the master
+ * domain for the device
+ */
+ if (!smmu_domain->is_aux) {
+ ret = __arm_smmu_alloc_bitmap(smmu->context_map, start,
+ smmu->num_context_banks);
+ if (ret < 0)
+ goto out_unlock;
- if (smmu_domain->stage == ARM_SMMU_DOMAIN_S2)
- cfg->vmid = cfg->cbndx + 1 + smmu->cavium_id_base;
- else
- cfg->asid = cfg->cbndx + smmu->cavium_id_base;
+ cfg->cbndx = ret;
+ if (smmu->version < ARM_SMMU_V2) {
+ cfg->irptndx = atomic_inc_return(&smmu->irptndx);
+ cfg->irptndx %= smmu->num_context_irqs;
+ } else {
+ cfg->irptndx = cfg->cbndx;
+ }
+
+ if (smmu_domain->stage == ARM_SMMU_DOMAIN_S2)
+ cfg->vmid = cfg->cbndx + 1 + smmu->cavium_id_base;
+ else
+ cfg->asid = cfg->cbndx + smmu->cavium_id_base;
+ }
pgtbl_cfg = (struct io_pgtable_cfg) {
.pgsize_bitmap = smmu->pgsize_bitmap,
@@ -987,16 +1008,26 @@ static int arm_smmu_init_domain_context(struct iommu_domain *domain,
goto out_clear_smmu;
}
+ /* Cache the TTBR0 for the aux domain */
+ smmu_domain->ttbr0 = pgtbl_cfg.arm_lpae_s1_cfg.ttbr[0];
+
/* Update the domain's page sizes to reflect the page table format */
domain->pgsize_bitmap = pgtbl_cfg.pgsize_bitmap;
domain->geometry.aperture_end = (1UL << ias) - 1;
domain->geometry.force_aperture = true;
+ pgtbl_ops[1] = NULL;
+
+ /*
+ * aux domains don't use split tables or program the hardware so we're
+ * done setting it up
+ */
+ if (smmu_domain->is_aux)
+ goto end;
+
/* Initialise the context bank with our page table cfg */
arm_smmu_init_context_bank(smmu_domain, &pgtbl_cfg);
- pgtbl_ops[1] = NULL;
-
if (split_tables) {
/* FIXME: I think it is safe to reuse pgtbl_cfg here */
pgtbl_ops[1] = alloc_io_pgtable_ops(fmt, &pgtbl_cfg,
@@ -1018,13 +1049,15 @@ static int arm_smmu_init_domain_context(struct iommu_domain *domain,
*/
irq = smmu->irqs[smmu->num_global_irqs + cfg->irptndx];
ret = devm_request_irq(smmu->dev, irq, arm_smmu_context_fault,
- IRQF_SHARED, "arm-smmu-context-fault", domain);
+ IRQF_SHARED, "arm-smmu-context-fault",
+ domain);
if (ret < 0) {
dev_err(smmu->dev, "failed to request context IRQ %d (%u)\n",
cfg->irptndx, irq);
cfg->irptndx = INVALID_IRPTNDX;
}
+end:
mutex_unlock(&smmu_domain->init_mutex);
/* Publish page table ops for map/unmap */
@@ -1050,6 +1083,12 @@ static void arm_smmu_destroy_domain_context(struct iommu_domain *domain)
if (!smmu || domain->type == IOMMU_DOMAIN_IDENTITY)
return;
+ /* All we need to do for aux devices is destroy the pagetable */
+ if (smmu_domain->is_aux) {
+ free_io_pgtable_ops(smmu_domain->pgtbl_ops[0]);
+ return;
+ }
+
ret = arm_smmu_rpm_get(smmu);
if (ret < 0)
return;
@@ -1330,6 +1369,39 @@ static int arm_smmu_domain_add_master(struct arm_smmu_domain *smmu_domain,
return 0;
}
+static bool arm_smmu_dev_has_feat(struct device *dev,
+ enum iommu_dev_features feat)
+{
+ /*
+ * FIXME: Should we do some hardware checking here, like to be sure this
+ * is a stage 1 and such?
+ */
+
+ /* Always allow aux domains */
+ if (feat == IOMMU_DEV_FEAT_AUX)
+ return true;
+
+ return false;
+}
+
+/* FIXME: Add stubs for dev_enable_feat and dev_disable_feat? */
+
+/* Set up a new aux domain and create a new pagetable with the same
+ * characteristics as the master
+ */
+static int arm_smmu_aux_attach_dev(struct iommu_domain *domain,
+ struct device *dev)
+{
+ struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
+ struct arm_smmu_device *smmu = fwspec_smmu(fwspec);
+ struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
+
+ smmu_domain->is_aux = true;
+
+ /* No power is needed because aux domain doesn't touch the hardware */
+ return arm_smmu_init_domain_context(domain, smmu);
+}
+
static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev)
{
int ret;
@@ -1342,6 +1414,8 @@ static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev)
return -ENXIO;
}
+ /* FIXME: Reject unmanged domains since those should be aux? */
+
/*
* FIXME: The arch/arm DMA API code tries to attach devices to its own
* domains between of_xlate() and add_device() - we have no way to cope
@@ -1388,7 +1462,13 @@ arm_smmu_get_pgtbl_ops(struct iommu_domain *domain, unsigned long iova)
{
struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
- struct arm_smmu_cb *cb = &smmu_domain->smmu->cbs[cfg->cbndx];
+ struct arm_smmu_cb *cb;
+
+ /* quick escape for domains that don't have split pagetables enabled */
+ if (!smmu_domain->pgtbl_ops[1])
+ return smmu_domain->pgtbl_ops[0];
+
+ cb = &smmu_domain->smmu->cbs[cfg->cbndx];
if (iova & cb->split_table_mask)
return smmu_domain->pgtbl_ops[1];
@@ -1700,6 +1780,11 @@ static int arm_smmu_domain_get_attr(struct iommu_domain *domain,
!!(smmu_domain->attributes &
(1 << DOMAIN_ATTR_SPLIT_TABLES));
return 0;
+ case DOMAIN_ATTR_PTBASE:
+ if (!smmu_domain->is_aux)
+ return -ENODEV;
+ *((u64 *)data) = smmu_domain->ttbr0;
+ return 0;
default:
return -ENODEV;
}
@@ -1810,7 +1895,9 @@ static struct iommu_ops arm_smmu_ops = {
.capable = arm_smmu_capable,
.domain_alloc = arm_smmu_domain_alloc,
.domain_free = arm_smmu_domain_free,
+ .dev_has_feat = arm_smmu_dev_has_feat,
.attach_dev = arm_smmu_attach_dev,
+ .aux_attach_dev = arm_smmu_aux_attach_dev,
.map = arm_smmu_map,
.unmap = arm_smmu_unmap,
.flush_iotlb_all = arm_smmu_flush_iotlb_all,
--
2.7.4
_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel
^ permalink raw reply related [flat|nested] 46+ messages in thread* Re: [RFC PATCH v1 05/15] iommu/arm-smmu: Add auxiliary domain support for arm-smmuv2
2019-03-01 19:38 ` Jordan Crouse
@ 2019-03-04 12:19 ` Jean-Philippe Brucker
-1 siblings, 0 replies; 46+ messages in thread
From: Jean-Philippe Brucker @ 2019-03-04 12:19 UTC (permalink / raw)
To: Jordan Crouse, freedreno
Cc: linux-arm-msm, Will Deacon, linux-kernel, iommu, dianders,
hoegsberg, Robin Murphy, linux-arm-kernel
Hi Jordan,
On 01/03/2019 19:38, Jordan Crouse wrote:
> Support the new auxiliary domain API for arm-smmuv2 to initialize and
> support multiple pagetables for a SMMU device. Since the smmu-v2 hardware
> doesn't have any built in support for switching the pagetable base it is
> left as an exercise to the caller to actually use the pagetable; aux
> domains in the IOMMU driver are only preoccupied with creating and managing
> the pagetable memory.
>
> Following is a pseudo code example of how a domain can be created
>
> /* Check to see if aux domains are supported */
> if (iommu_dev_has_feature(dev, IOMMU_DEV_FEAT_AUX)) {
> iommu = iommu_domain_alloc(...);
>
> if (iommu_aux_attach_device(domain, dev))
> return FAIL;
>
> /* Save the base address of the pagetable for use by the driver
> iommu_domain_get_attr(domain, DOMAIN_ATTR_PTBASE, &ptbase);
> }
> After this 'domain' can be used like any other iommu domain to map and
> unmap iova addresses in the pagetable. The driver/hardware can be used
> to switch the pagetable according to its own specific implementation.
>
> Signed-off-by: Jordan Crouse <jcrouse@codeaurora.org>
[...]
> +static bool arm_smmu_dev_has_feat(struct device *dev,
> + enum iommu_dev_features feat)
> +{
> + /*
> + * FIXME: Should we do some hardware checking here, like to be sure this
> + * is a stage 1 and such?
> + */
> +
> + /* Always allow aux domains */
> + if (feat == IOMMU_DEV_FEAT_AUX)
> + return true;
If possible, we should only return true when SMMU and GPU are able to
coordinate and switch contexts. Can the feature be identified through ID
reg or compatible string?
If we plug a PCIe card with PASID behind a SMMUv2 'classic', and its
driver attempts to enable AUXD support, then this should return false.
> +
> + return false;
> +}
> +
> +/* FIXME: Add stubs for dev_enable_feat and dev_disable_feat? */
Ideally yes. Although SMMUv2 support for aux domains will likely only be
used by the MSM driver, using the same model in all IOMMU drivers would
ease moving things to common code later.
> +
> +/* Set up a new aux domain and create a new pagetable with the same
> + * characteristics as the master
> + */
> +static int arm_smmu_aux_attach_dev(struct iommu_domain *domain,
> + struct device *dev)
> +{
> + struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
> + struct arm_smmu_device *smmu = fwspec_smmu(fwspec);
> + struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
> +
> + smmu_domain->is_aux = true;
The API allows to attach the same domain to one device using
aux_attach_dev() and another using attach_dev(). For SMMUv3 we'll reject
this, since normal and aux domain are different things (one has PASID
tables, the other doesn't). Is this supported by SMMUv2? Otherwise some
sanity-check here might be necessary
> +
> + /* No power is needed because aux domain doesn't touch the hardware */
> + return arm_smmu_init_domain_context(domain, smmu);
> +}
> +
> static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev)
> {
> int ret;
> @@ -1342,6 +1414,8 @@ static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev)
> return -ENXIO;
> }
>
> + /* FIXME: Reject unmanged domains since those should be aux? */
No, unmanaged domains are also used by VFIO and a couple other drivers
that want to setup IOMMU mappings themselves.
Thanks,
Jean
> +
> /*
> * FIXME: The arch/arm DMA API code tries to attach devices to its own
> * domains between of_xlate() and add_device() - we have no way to cope
> @@ -1388,7 +1462,13 @@ arm_smmu_get_pgtbl_ops(struct iommu_domain *domain, unsigned long iova)
> {
> struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
> struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
> - struct arm_smmu_cb *cb = &smmu_domain->smmu->cbs[cfg->cbndx];
> + struct arm_smmu_cb *cb;
> +
> + /* quick escape for domains that don't have split pagetables enabled */
> + if (!smmu_domain->pgtbl_ops[1])
> + return smmu_domain->pgtbl_ops[0];
> +
> + cb = &smmu_domain->smmu->cbs[cfg->cbndx];
>
> if (iova & cb->split_table_mask)
> return smmu_domain->pgtbl_ops[1];
> @@ -1700,6 +1780,11 @@ static int arm_smmu_domain_get_attr(struct iommu_domain *domain,
> !!(smmu_domain->attributes &
> (1 << DOMAIN_ATTR_SPLIT_TABLES));
> return 0;
> + case DOMAIN_ATTR_PTBASE:
> + if (!smmu_domain->is_aux)
> + return -ENODEV;
> + *((u64 *)data) = smmu_domain->ttbr0;
> + return 0;
> default:
> return -ENODEV;
> }
> @@ -1810,7 +1895,9 @@ static struct iommu_ops arm_smmu_ops = {
> .capable = arm_smmu_capable,
> .domain_alloc = arm_smmu_domain_alloc,
> .domain_free = arm_smmu_domain_free,
> + .dev_has_feat = arm_smmu_dev_has_feat,
> .attach_dev = arm_smmu_attach_dev,
> + .aux_attach_dev = arm_smmu_aux_attach_dev,
> .map = arm_smmu_map,
> .unmap = arm_smmu_unmap,
> .flush_iotlb_all = arm_smmu_flush_iotlb_all,
>
^ permalink raw reply [flat|nested] 46+ messages in thread* Re: [RFC PATCH v1 05/15] iommu/arm-smmu: Add auxiliary domain support for arm-smmuv2
@ 2019-03-04 12:19 ` Jean-Philippe Brucker
0 siblings, 0 replies; 46+ messages in thread
From: Jean-Philippe Brucker @ 2019-03-04 12:19 UTC (permalink / raw)
To: Jordan Crouse, freedreno
Cc: linux-arm-msm, Will Deacon, linux-kernel, iommu, dianders,
hoegsberg, Robin Murphy, linux-arm-kernel
Hi Jordan,
On 01/03/2019 19:38, Jordan Crouse wrote:
> Support the new auxiliary domain API for arm-smmuv2 to initialize and
> support multiple pagetables for a SMMU device. Since the smmu-v2 hardware
> doesn't have any built in support for switching the pagetable base it is
> left as an exercise to the caller to actually use the pagetable; aux
> domains in the IOMMU driver are only preoccupied with creating and managing
> the pagetable memory.
>
> Following is a pseudo code example of how a domain can be created
>
> /* Check to see if aux domains are supported */
> if (iommu_dev_has_feature(dev, IOMMU_DEV_FEAT_AUX)) {
> iommu = iommu_domain_alloc(...);
>
> if (iommu_aux_attach_device(domain, dev))
> return FAIL;
>
> /* Save the base address of the pagetable for use by the driver
> iommu_domain_get_attr(domain, DOMAIN_ATTR_PTBASE, &ptbase);
> }
> After this 'domain' can be used like any other iommu domain to map and
> unmap iova addresses in the pagetable. The driver/hardware can be used
> to switch the pagetable according to its own specific implementation.
>
> Signed-off-by: Jordan Crouse <jcrouse@codeaurora.org>
[...]
> +static bool arm_smmu_dev_has_feat(struct device *dev,
> + enum iommu_dev_features feat)
> +{
> + /*
> + * FIXME: Should we do some hardware checking here, like to be sure this
> + * is a stage 1 and such?
> + */
> +
> + /* Always allow aux domains */
> + if (feat == IOMMU_DEV_FEAT_AUX)
> + return true;
If possible, we should only return true when SMMU and GPU are able to
coordinate and switch contexts. Can the feature be identified through ID
reg or compatible string?
If we plug a PCIe card with PASID behind a SMMUv2 'classic', and its
driver attempts to enable AUXD support, then this should return false.
> +
> + return false;
> +}
> +
> +/* FIXME: Add stubs for dev_enable_feat and dev_disable_feat? */
Ideally yes. Although SMMUv2 support for aux domains will likely only be
used by the MSM driver, using the same model in all IOMMU drivers would
ease moving things to common code later.
> +
> +/* Set up a new aux domain and create a new pagetable with the same
> + * characteristics as the master
> + */
> +static int arm_smmu_aux_attach_dev(struct iommu_domain *domain,
> + struct device *dev)
> +{
> + struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
> + struct arm_smmu_device *smmu = fwspec_smmu(fwspec);
> + struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
> +
> + smmu_domain->is_aux = true;
The API allows to attach the same domain to one device using
aux_attach_dev() and another using attach_dev(). For SMMUv3 we'll reject
this, since normal and aux domain are different things (one has PASID
tables, the other doesn't). Is this supported by SMMUv2? Otherwise some
sanity-check here might be necessary
> +
> + /* No power is needed because aux domain doesn't touch the hardware */
> + return arm_smmu_init_domain_context(domain, smmu);
> +}
> +
> static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev)
> {
> int ret;
> @@ -1342,6 +1414,8 @@ static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev)
> return -ENXIO;
> }
>
> + /* FIXME: Reject unmanged domains since those should be aux? */
No, unmanaged domains are also used by VFIO and a couple other drivers
that want to setup IOMMU mappings themselves.
Thanks,
Jean
> +
> /*
> * FIXME: The arch/arm DMA API code tries to attach devices to its own
> * domains between of_xlate() and add_device() - we have no way to cope
> @@ -1388,7 +1462,13 @@ arm_smmu_get_pgtbl_ops(struct iommu_domain *domain, unsigned long iova)
> {
> struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
> struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
> - struct arm_smmu_cb *cb = &smmu_domain->smmu->cbs[cfg->cbndx];
> + struct arm_smmu_cb *cb;
> +
> + /* quick escape for domains that don't have split pagetables enabled */
> + if (!smmu_domain->pgtbl_ops[1])
> + return smmu_domain->pgtbl_ops[0];
> +
> + cb = &smmu_domain->smmu->cbs[cfg->cbndx];
>
> if (iova & cb->split_table_mask)
> return smmu_domain->pgtbl_ops[1];
> @@ -1700,6 +1780,11 @@ static int arm_smmu_domain_get_attr(struct iommu_domain *domain,
> !!(smmu_domain->attributes &
> (1 << DOMAIN_ATTR_SPLIT_TABLES));
> return 0;
> + case DOMAIN_ATTR_PTBASE:
> + if (!smmu_domain->is_aux)
> + return -ENODEV;
> + *((u64 *)data) = smmu_domain->ttbr0;
> + return 0;
> default:
> return -ENODEV;
> }
> @@ -1810,7 +1895,9 @@ static struct iommu_ops arm_smmu_ops = {
> .capable = arm_smmu_capable,
> .domain_alloc = arm_smmu_domain_alloc,
> .domain_free = arm_smmu_domain_free,
> + .dev_has_feat = arm_smmu_dev_has_feat,
> .attach_dev = arm_smmu_attach_dev,
> + .aux_attach_dev = arm_smmu_aux_attach_dev,
> .map = arm_smmu_map,
> .unmap = arm_smmu_unmap,
> .flush_iotlb_all = arm_smmu_flush_iotlb_all,
>
_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel
^ permalink raw reply [flat|nested] 46+ messages in thread
* [RFC PATCH v1 06/15] drm/msm/adreno: Enable 64 bit mode by default on a5xx and a6xx targets
2019-03-01 19:38 ` Jordan Crouse
@ 2019-03-01 19:38 ` Jordan Crouse
-1 siblings, 0 replies; 46+ messages in thread
From: Jordan Crouse @ 2019-03-01 19:38 UTC (permalink / raw)
To: freedreno-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW
Cc: Kees Cook, jean-philippe.brucker-5wv7dgnIgG8,
linux-arm-msm-u79uwXL29TY76Z2rM5mHXA, Sharat Masetty,
linux-kernel-u79uwXL29TY76Z2rM5mHXA,
dri-devel-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW, David Airlie,
Rob Clark, dianders-uWgjrcJnOmJ4cg9Nei1l7Q,
hoegsberg-hpIqsD4AKlfQT0dZR+AlfA, Mamta Shukla, Thomas Zimmermann,
Daniel Vetter, Sean Paul, baolu.lu-VuQAYsv1563Yd54FQh9/CA
A5XX and newer GPUs can be run in either 32 or 64 bit mode. The GPU
registers and the microcode use 64 bit virtual addressing in either
case but the upper 32 bits are ignored if the GPU is in 32 bit mode.
There is no performance disadvantage to remaining in 64 bit mode even
if we are only generating 32 bit addresses so switch over now to prepare
for using addresses above 4G for targets that support them.
Signed-off-by: Jordan Crouse <jcrouse@codeaurora.org>
---
drivers/gpu/drm/msm/adreno/a5xx_gpu.c | 14 ++++++++++++++
drivers/gpu/drm/msm/adreno/a6xx_gpu.c | 14 ++++++++++++++
2 files changed, 28 insertions(+)
diff --git a/drivers/gpu/drm/msm/adreno/a5xx_gpu.c b/drivers/gpu/drm/msm/adreno/a5xx_gpu.c
index d5f5e56..45662d3 100644
--- a/drivers/gpu/drm/msm/adreno/a5xx_gpu.c
+++ b/drivers/gpu/drm/msm/adreno/a5xx_gpu.c
@@ -749,6 +749,20 @@ static int a5xx_hw_init(struct msm_gpu *gpu)
REG_A5XX_RBBM_SECVID_TSB_TRUSTED_BASE_HI, 0x00000000);
gpu_write(gpu, REG_A5XX_RBBM_SECVID_TSB_TRUSTED_SIZE, 0x00000000);
+ /* Put the GPU into 64 bit by default */
+ gpu_write(gpu, REG_A5XX_CP_ADDR_MODE_CNTL, 0x1);
+ gpu_write(gpu, REG_A5XX_VSC_ADDR_MODE_CNTL, 0x1);
+ gpu_write(gpu, REG_A5XX_GRAS_ADDR_MODE_CNTL, 0x1);
+ gpu_write(gpu, REG_A5XX_RB_ADDR_MODE_CNTL, 0x1);
+ gpu_write(gpu, REG_A5XX_PC_ADDR_MODE_CNTL, 0x1);
+ gpu_write(gpu, REG_A5XX_HLSQ_ADDR_MODE_CNTL, 0x1);
+ gpu_write(gpu, REG_A5XX_VFD_ADDR_MODE_CNTL, 0x1);
+ gpu_write(gpu, REG_A5XX_VPC_ADDR_MODE_CNTL, 0x1);
+ gpu_write(gpu, REG_A5XX_UCHE_ADDR_MODE_CNTL, 0x1);
+ gpu_write(gpu, REG_A5XX_SP_ADDR_MODE_CNTL, 0x1);
+ gpu_write(gpu, REG_A5XX_TPL1_ADDR_MODE_CNTL, 0x1);
+ gpu_write(gpu, REG_A5XX_RBBM_SECVID_TSB_ADDR_MODE_CNTL, 0x1);
+
ret = adreno_hw_init(gpu);
if (ret)
return ret;
diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c
index fefe773..1c20d59 100644
--- a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c
+++ b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c
@@ -375,6 +375,20 @@ static int a6xx_hw_init(struct msm_gpu *gpu)
REG_A6XX_RBBM_SECVID_TSB_TRUSTED_BASE_HI, 0x00000000);
gpu_write(gpu, REG_A6XX_RBBM_SECVID_TSB_TRUSTED_SIZE, 0x00000000);
+ /* Turn on 64 bit addressing for all blocks */
+ gpu_write(gpu, REG_A6XX_CP_ADDR_MODE_CNTL, 0x1);
+ gpu_write(gpu, REG_A6XX_VSC_ADDR_MODE_CNTL, 0x1);
+ gpu_write(gpu, REG_A6XX_GRAS_ADDR_MODE_CNTL, 0x1);
+ gpu_write(gpu, REG_A6XX_RB_ADDR_MODE_CNTL, 0x1);
+ gpu_write(gpu, REG_A6XX_PC_ADDR_MODE_CNTL, 0x1);
+ gpu_write(gpu, REG_A6XX_HLSQ_ADDR_MODE_CNTL, 0x1);
+ gpu_write(gpu, REG_A6XX_VFD_ADDR_MODE_CNTL, 0x1);
+ gpu_write(gpu, REG_A6XX_VPC_ADDR_MODE_CNTL, 0x1);
+ gpu_write(gpu, REG_A6XX_UCHE_ADDR_MODE_CNTL, 0x1);
+ gpu_write(gpu, REG_A6XX_SP_ADDR_MODE_CNTL, 0x1);
+ gpu_write(gpu, REG_A6XX_TPL1_ADDR_MODE_CNTL, 0x1);
+ gpu_write(gpu, REG_A6XX_RBBM_SECVID_TSB_ADDR_MODE_CNTL, 0x1);
+
/* enable hardware clockgating */
a6xx_set_hwcg(gpu, true);
--
2.7.4
_______________________________________________
Freedreno mailing list
Freedreno@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/freedreno
^ permalink raw reply related [flat|nested] 46+ messages in thread
* [RFC PATCH v1 06/15] drm/msm/adreno: Enable 64 bit mode by default on a5xx and a6xx targets
@ 2019-03-01 19:38 ` Jordan Crouse
0 siblings, 0 replies; 46+ messages in thread
From: Jordan Crouse @ 2019-03-01 19:38 UTC (permalink / raw)
To: freedreno
Cc: jean-philippe.brucker, linux-arm-msm, dianders, hoegsberg,
baolu.lu, Sean Paul, Kees Cook, Thomas Zimmermann, Sharat Masetty,
dri-devel, linux-kernel, Rob Clark, David Airlie, Mamta Shukla,
Daniel Vetter
A5XX and newer GPUs can be run in either 32 or 64 bit mode. The GPU
registers and the microcode use 64 bit virtual addressing in either
case but the upper 32 bits are ignored if the GPU is in 32 bit mode.
There is no performance disadvantage to remaining in 64 bit mode even
if we are only generating 32 bit addresses so switch over now to prepare
for using addresses above 4G for targets that support them.
Signed-off-by: Jordan Crouse <jcrouse@codeaurora.org>
---
drivers/gpu/drm/msm/adreno/a5xx_gpu.c | 14 ++++++++++++++
drivers/gpu/drm/msm/adreno/a6xx_gpu.c | 14 ++++++++++++++
2 files changed, 28 insertions(+)
diff --git a/drivers/gpu/drm/msm/adreno/a5xx_gpu.c b/drivers/gpu/drm/msm/adreno/a5xx_gpu.c
index d5f5e56..45662d3 100644
--- a/drivers/gpu/drm/msm/adreno/a5xx_gpu.c
+++ b/drivers/gpu/drm/msm/adreno/a5xx_gpu.c
@@ -749,6 +749,20 @@ static int a5xx_hw_init(struct msm_gpu *gpu)
REG_A5XX_RBBM_SECVID_TSB_TRUSTED_BASE_HI, 0x00000000);
gpu_write(gpu, REG_A5XX_RBBM_SECVID_TSB_TRUSTED_SIZE, 0x00000000);
+ /* Put the GPU into 64 bit by default */
+ gpu_write(gpu, REG_A5XX_CP_ADDR_MODE_CNTL, 0x1);
+ gpu_write(gpu, REG_A5XX_VSC_ADDR_MODE_CNTL, 0x1);
+ gpu_write(gpu, REG_A5XX_GRAS_ADDR_MODE_CNTL, 0x1);
+ gpu_write(gpu, REG_A5XX_RB_ADDR_MODE_CNTL, 0x1);
+ gpu_write(gpu, REG_A5XX_PC_ADDR_MODE_CNTL, 0x1);
+ gpu_write(gpu, REG_A5XX_HLSQ_ADDR_MODE_CNTL, 0x1);
+ gpu_write(gpu, REG_A5XX_VFD_ADDR_MODE_CNTL, 0x1);
+ gpu_write(gpu, REG_A5XX_VPC_ADDR_MODE_CNTL, 0x1);
+ gpu_write(gpu, REG_A5XX_UCHE_ADDR_MODE_CNTL, 0x1);
+ gpu_write(gpu, REG_A5XX_SP_ADDR_MODE_CNTL, 0x1);
+ gpu_write(gpu, REG_A5XX_TPL1_ADDR_MODE_CNTL, 0x1);
+ gpu_write(gpu, REG_A5XX_RBBM_SECVID_TSB_ADDR_MODE_CNTL, 0x1);
+
ret = adreno_hw_init(gpu);
if (ret)
return ret;
diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c
index fefe773..1c20d59 100644
--- a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c
+++ b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c
@@ -375,6 +375,20 @@ static int a6xx_hw_init(struct msm_gpu *gpu)
REG_A6XX_RBBM_SECVID_TSB_TRUSTED_BASE_HI, 0x00000000);
gpu_write(gpu, REG_A6XX_RBBM_SECVID_TSB_TRUSTED_SIZE, 0x00000000);
+ /* Turn on 64 bit addressing for all blocks */
+ gpu_write(gpu, REG_A6XX_CP_ADDR_MODE_CNTL, 0x1);
+ gpu_write(gpu, REG_A6XX_VSC_ADDR_MODE_CNTL, 0x1);
+ gpu_write(gpu, REG_A6XX_GRAS_ADDR_MODE_CNTL, 0x1);
+ gpu_write(gpu, REG_A6XX_RB_ADDR_MODE_CNTL, 0x1);
+ gpu_write(gpu, REG_A6XX_PC_ADDR_MODE_CNTL, 0x1);
+ gpu_write(gpu, REG_A6XX_HLSQ_ADDR_MODE_CNTL, 0x1);
+ gpu_write(gpu, REG_A6XX_VFD_ADDR_MODE_CNTL, 0x1);
+ gpu_write(gpu, REG_A6XX_VPC_ADDR_MODE_CNTL, 0x1);
+ gpu_write(gpu, REG_A6XX_UCHE_ADDR_MODE_CNTL, 0x1);
+ gpu_write(gpu, REG_A6XX_SP_ADDR_MODE_CNTL, 0x1);
+ gpu_write(gpu, REG_A6XX_TPL1_ADDR_MODE_CNTL, 0x1);
+ gpu_write(gpu, REG_A6XX_RBBM_SECVID_TSB_ADDR_MODE_CNTL, 0x1);
+
/* enable hardware clockgating */
a6xx_set_hwcg(gpu, true);
--
2.7.4
^ permalink raw reply related [flat|nested] 46+ messages in thread
* [RFC PATCH v1 08/15] drm/msm: Pass the MMU domain index in struct msm_file_private
2019-03-01 19:38 ` Jordan Crouse
@ 2019-03-01 19:38 ` Jordan Crouse
-1 siblings, 0 replies; 46+ messages in thread
From: Jordan Crouse @ 2019-03-01 19:38 UTC (permalink / raw)
To: freedreno-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW
Cc: jean-philippe.brucker-5wv7dgnIgG8,
linux-arm-msm-u79uwXL29TY76Z2rM5mHXA,
linux-kernel-u79uwXL29TY76Z2rM5mHXA,
dri-devel-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW, David Airlie,
Rob Clark, dianders-uWgjrcJnOmJ4cg9Nei1l7Q,
hoegsberg-hpIqsD4AKlfQT0dZR+AlfA, Daniel Vetter, Sean Paul,
baolu.lu-VuQAYsv1563Yd54FQh9/CA
Pass the index of the MMU domain in struct msm_file_private instead
of assuming gpu->id throughout the submit path. This clears the way
to change ctx->aspace to a per-instance pagetable.
Signed-off-by: Jordan Crouse <jcrouse@codeaurora.org>
---
drivers/gpu/drm/msm/msm_drv.c | 2 ++
drivers/gpu/drm/msm/msm_drv.h | 1 +
drivers/gpu/drm/msm/msm_gem.h | 1 +
drivers/gpu/drm/msm/msm_gem_submit.c | 13 ++++++++-----
drivers/gpu/drm/msm/msm_gpu.c | 5 ++---
5 files changed, 14 insertions(+), 8 deletions(-)
diff --git a/drivers/gpu/drm/msm/msm_drv.c b/drivers/gpu/drm/msm/msm_drv.c
index 906b2bb..cac2293 100644
--- a/drivers/gpu/drm/msm/msm_drv.c
+++ b/drivers/gpu/drm/msm/msm_drv.c
@@ -607,6 +607,7 @@ static void load_gpu(struct drm_device *dev)
static int context_init(struct drm_device *dev, struct drm_file *file)
{
+ struct msm_drm_private *priv = dev->dev_private;
struct msm_file_private *ctx;
ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
@@ -615,6 +616,7 @@ static int context_init(struct drm_device *dev, struct drm_file *file)
msm_submitqueue_init(dev, ctx);
+ ctx->aspace = priv->gpu->aspace;
file->driver_priv = ctx;
return 0;
diff --git a/drivers/gpu/drm/msm/msm_drv.h b/drivers/gpu/drm/msm/msm_drv.h
index cb642fe..3140e8f 100644
--- a/drivers/gpu/drm/msm/msm_drv.h
+++ b/drivers/gpu/drm/msm/msm_drv.h
@@ -68,6 +68,7 @@ struct msm_file_private {
rwlock_t queuelock;
struct list_head submitqueues;
int queueid;
+ struct msm_gem_address_space *aspace;
};
enum msm_mdp_plane_property {
diff --git a/drivers/gpu/drm/msm/msm_gem.h b/drivers/gpu/drm/msm/msm_gem.h
index 2064fac..5e21d01 100644
--- a/drivers/gpu/drm/msm/msm_gem.h
+++ b/drivers/gpu/drm/msm/msm_gem.h
@@ -142,6 +142,7 @@ void msm_gem_vunmap(struct drm_gem_object *obj, enum msm_gem_lock subclass);
struct msm_gem_submit {
struct drm_device *dev;
struct msm_gpu *gpu;
+ struct msm_gem_address_space *aspace;
struct list_head node; /* node in ring submit list */
struct list_head bo_list;
struct ww_acquire_ctx ticket;
diff --git a/drivers/gpu/drm/msm/msm_gem_submit.c b/drivers/gpu/drm/msm/msm_gem_submit.c
index 12b983f..79d95b2 100644
--- a/drivers/gpu/drm/msm/msm_gem_submit.c
+++ b/drivers/gpu/drm/msm/msm_gem_submit.c
@@ -32,8 +32,9 @@
#define BO_PINNED 0x2000
static struct msm_gem_submit *submit_create(struct drm_device *dev,
- struct msm_gpu *gpu, struct msm_gpu_submitqueue *queue,
- uint32_t nr_bos, uint32_t nr_cmds)
+ struct msm_gpu *gpu, struct msm_gem_address_space *aspace,
+ struct msm_gpu_submitqueue *queue, uint32_t nr_bos,
+ uint32_t nr_cmds)
{
struct msm_gem_submit *submit;
uint64_t sz = sizeof(*submit) + ((u64)nr_bos * sizeof(submit->bos[0])) +
@@ -47,6 +48,7 @@ static struct msm_gem_submit *submit_create(struct drm_device *dev,
return NULL;
submit->dev = dev;
+ submit->aspace = aspace;
submit->gpu = gpu;
submit->fence = NULL;
submit->cmd = (void *)&submit->bos[nr_bos];
@@ -170,7 +172,7 @@ static void submit_unlock_unpin_bo(struct msm_gem_submit *submit,
struct msm_gem_object *msm_obj = submit->bos[i].obj;
if (submit->bos[i].flags & BO_PINNED)
- msm_gem_unpin_iova(&msm_obj->base, submit->gpu->aspace);
+ msm_gem_unpin_iova(&msm_obj->base, submit->aspace);
if (submit->bos[i].flags & BO_LOCKED)
ww_mutex_unlock(&msm_obj->resv->lock);
@@ -274,7 +276,7 @@ static int submit_pin_objects(struct msm_gem_submit *submit)
/* if locking succeeded, pin bo: */
ret = msm_gem_get_and_pin_iova(&msm_obj->base,
- submit->gpu->aspace, &iova);
+ submit->aspace, &iova);
if (ret)
break;
@@ -487,7 +489,8 @@ int msm_ioctl_gem_submit(struct drm_device *dev, void *data,
}
}
- submit = submit_create(dev, gpu, queue, args->nr_bos, args->nr_cmds);
+ submit = submit_create(dev, gpu, ctx->aspace, queue, args->nr_bos,
+ args->nr_cmds);
if (!submit) {
ret = -ENOMEM;
goto out_unlock;
diff --git a/drivers/gpu/drm/msm/msm_gpu.c b/drivers/gpu/drm/msm/msm_gpu.c
index 10babd1..79b71b1 100644
--- a/drivers/gpu/drm/msm/msm_gpu.c
+++ b/drivers/gpu/drm/msm/msm_gpu.c
@@ -693,7 +693,7 @@ static void retire_submit(struct msm_gpu *gpu, struct msm_ringbuffer *ring,
struct msm_gem_object *msm_obj = submit->bos[i].obj;
/* move to inactive: */
msm_gem_move_to_inactive(&msm_obj->base);
- msm_gem_unpin_iova(&msm_obj->base, gpu->aspace);
+ msm_gem_unpin_iova(&msm_obj->base, submit->aspace);
drm_gem_object_put(&msm_obj->base);
}
@@ -777,8 +777,7 @@ void msm_gpu_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit,
/* submit takes a reference to the bo and iova until retired: */
drm_gem_object_get(&msm_obj->base);
- msm_gem_get_and_pin_iova(&msm_obj->base,
- submit->gpu->aspace, &iova);
+ msm_gem_get_and_pin_iova(&msm_obj->base, submit->aspace, &iova);
if (submit->bos[i].flags & MSM_SUBMIT_BO_WRITE)
msm_gem_move_to_active(&msm_obj->base, gpu, true, submit->fence);
--
2.7.4
_______________________________________________
Freedreno mailing list
Freedreno@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/freedreno
^ permalink raw reply related [flat|nested] 46+ messages in thread* [RFC PATCH v1 08/15] drm/msm: Pass the MMU domain index in struct msm_file_private
@ 2019-03-01 19:38 ` Jordan Crouse
0 siblings, 0 replies; 46+ messages in thread
From: Jordan Crouse @ 2019-03-01 19:38 UTC (permalink / raw)
To: freedreno
Cc: jean-philippe.brucker, linux-arm-msm, dianders, hoegsberg,
baolu.lu, Sean Paul, linux-kernel, dri-devel, Rob Clark,
David Airlie, Daniel Vetter
Pass the index of the MMU domain in struct msm_file_private instead
of assuming gpu->id throughout the submit path. This clears the way
to change ctx->aspace to a per-instance pagetable.
Signed-off-by: Jordan Crouse <jcrouse@codeaurora.org>
---
drivers/gpu/drm/msm/msm_drv.c | 2 ++
drivers/gpu/drm/msm/msm_drv.h | 1 +
drivers/gpu/drm/msm/msm_gem.h | 1 +
drivers/gpu/drm/msm/msm_gem_submit.c | 13 ++++++++-----
drivers/gpu/drm/msm/msm_gpu.c | 5 ++---
5 files changed, 14 insertions(+), 8 deletions(-)
diff --git a/drivers/gpu/drm/msm/msm_drv.c b/drivers/gpu/drm/msm/msm_drv.c
index 906b2bb..cac2293 100644
--- a/drivers/gpu/drm/msm/msm_drv.c
+++ b/drivers/gpu/drm/msm/msm_drv.c
@@ -607,6 +607,7 @@ static void load_gpu(struct drm_device *dev)
static int context_init(struct drm_device *dev, struct drm_file *file)
{
+ struct msm_drm_private *priv = dev->dev_private;
struct msm_file_private *ctx;
ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
@@ -615,6 +616,7 @@ static int context_init(struct drm_device *dev, struct drm_file *file)
msm_submitqueue_init(dev, ctx);
+ ctx->aspace = priv->gpu->aspace;
file->driver_priv = ctx;
return 0;
diff --git a/drivers/gpu/drm/msm/msm_drv.h b/drivers/gpu/drm/msm/msm_drv.h
index cb642fe..3140e8f 100644
--- a/drivers/gpu/drm/msm/msm_drv.h
+++ b/drivers/gpu/drm/msm/msm_drv.h
@@ -68,6 +68,7 @@ struct msm_file_private {
rwlock_t queuelock;
struct list_head submitqueues;
int queueid;
+ struct msm_gem_address_space *aspace;
};
enum msm_mdp_plane_property {
diff --git a/drivers/gpu/drm/msm/msm_gem.h b/drivers/gpu/drm/msm/msm_gem.h
index 2064fac..5e21d01 100644
--- a/drivers/gpu/drm/msm/msm_gem.h
+++ b/drivers/gpu/drm/msm/msm_gem.h
@@ -142,6 +142,7 @@ void msm_gem_vunmap(struct drm_gem_object *obj, enum msm_gem_lock subclass);
struct msm_gem_submit {
struct drm_device *dev;
struct msm_gpu *gpu;
+ struct msm_gem_address_space *aspace;
struct list_head node; /* node in ring submit list */
struct list_head bo_list;
struct ww_acquire_ctx ticket;
diff --git a/drivers/gpu/drm/msm/msm_gem_submit.c b/drivers/gpu/drm/msm/msm_gem_submit.c
index 12b983f..79d95b2 100644
--- a/drivers/gpu/drm/msm/msm_gem_submit.c
+++ b/drivers/gpu/drm/msm/msm_gem_submit.c
@@ -32,8 +32,9 @@
#define BO_PINNED 0x2000
static struct msm_gem_submit *submit_create(struct drm_device *dev,
- struct msm_gpu *gpu, struct msm_gpu_submitqueue *queue,
- uint32_t nr_bos, uint32_t nr_cmds)
+ struct msm_gpu *gpu, struct msm_gem_address_space *aspace,
+ struct msm_gpu_submitqueue *queue, uint32_t nr_bos,
+ uint32_t nr_cmds)
{
struct msm_gem_submit *submit;
uint64_t sz = sizeof(*submit) + ((u64)nr_bos * sizeof(submit->bos[0])) +
@@ -47,6 +48,7 @@ static struct msm_gem_submit *submit_create(struct drm_device *dev,
return NULL;
submit->dev = dev;
+ submit->aspace = aspace;
submit->gpu = gpu;
submit->fence = NULL;
submit->cmd = (void *)&submit->bos[nr_bos];
@@ -170,7 +172,7 @@ static void submit_unlock_unpin_bo(struct msm_gem_submit *submit,
struct msm_gem_object *msm_obj = submit->bos[i].obj;
if (submit->bos[i].flags & BO_PINNED)
- msm_gem_unpin_iova(&msm_obj->base, submit->gpu->aspace);
+ msm_gem_unpin_iova(&msm_obj->base, submit->aspace);
if (submit->bos[i].flags & BO_LOCKED)
ww_mutex_unlock(&msm_obj->resv->lock);
@@ -274,7 +276,7 @@ static int submit_pin_objects(struct msm_gem_submit *submit)
/* if locking succeeded, pin bo: */
ret = msm_gem_get_and_pin_iova(&msm_obj->base,
- submit->gpu->aspace, &iova);
+ submit->aspace, &iova);
if (ret)
break;
@@ -487,7 +489,8 @@ int msm_ioctl_gem_submit(struct drm_device *dev, void *data,
}
}
- submit = submit_create(dev, gpu, queue, args->nr_bos, args->nr_cmds);
+ submit = submit_create(dev, gpu, ctx->aspace, queue, args->nr_bos,
+ args->nr_cmds);
if (!submit) {
ret = -ENOMEM;
goto out_unlock;
diff --git a/drivers/gpu/drm/msm/msm_gpu.c b/drivers/gpu/drm/msm/msm_gpu.c
index 10babd1..79b71b1 100644
--- a/drivers/gpu/drm/msm/msm_gpu.c
+++ b/drivers/gpu/drm/msm/msm_gpu.c
@@ -693,7 +693,7 @@ static void retire_submit(struct msm_gpu *gpu, struct msm_ringbuffer *ring,
struct msm_gem_object *msm_obj = submit->bos[i].obj;
/* move to inactive: */
msm_gem_move_to_inactive(&msm_obj->base);
- msm_gem_unpin_iova(&msm_obj->base, gpu->aspace);
+ msm_gem_unpin_iova(&msm_obj->base, submit->aspace);
drm_gem_object_put(&msm_obj->base);
}
@@ -777,8 +777,7 @@ void msm_gpu_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit,
/* submit takes a reference to the bo and iova until retired: */
drm_gem_object_get(&msm_obj->base);
- msm_gem_get_and_pin_iova(&msm_obj->base,
- submit->gpu->aspace, &iova);
+ msm_gem_get_and_pin_iova(&msm_obj->base, submit->aspace, &iova);
if (submit->bos[i].flags & MSM_SUBMIT_BO_WRITE)
msm_gem_move_to_active(&msm_obj->base, gpu, true, submit->fence);
--
2.7.4
^ permalink raw reply related [flat|nested] 46+ messages in thread
* [RFC PATCH v1 09/15] drm/msm/gpu: Move address space setup to the GPU targets
2019-03-01 19:38 ` Jordan Crouse
@ 2019-03-01 19:38 ` Jordan Crouse
-1 siblings, 0 replies; 46+ messages in thread
From: Jordan Crouse @ 2019-03-01 19:38 UTC (permalink / raw)
To: freedreno-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW
Cc: Kees Cook, Jonathan Marek, jean-philippe.brucker-5wv7dgnIgG8,
linux-arm-msm-u79uwXL29TY76Z2rM5mHXA, Sharat Masetty,
linux-kernel-u79uwXL29TY76Z2rM5mHXA,
dri-devel-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW, Bjorn Andersson,
David Airlie, Rob Clark, dianders-uWgjrcJnOmJ4cg9Nei1l7Q,
hoegsberg-hpIqsD4AKlfQT0dZR+AlfA, Mamta Shukla, Thomas Zimmermann,
Daniel Vetter, Sean Paul, baolu.lu-VuQAYsv1563Yd54FQh9/CA
Move the address space steup code out of the generic msm GPU code to
to the individual GPU targets. This allows us to do target specific
setup such as gpummu for a2xx or split pagetables and per-instance
pagetables for newer a5xx and a6xx targets. All this is at the
expense of duplicated code in some of the target files but I think
it pays for itself in improved code flow and flexibility.
Signed-off-by: Jordan Crouse <jcrouse@codeaurora.org>
---
drivers/gpu/drm/msm/adreno/a2xx_gpu.c | 37 ++++++++++++++++------
drivers/gpu/drm/msm/adreno/a3xx_gpu.c | 50 ++++++++++++++++++++++--------
drivers/gpu/drm/msm/adreno/a4xx_gpu.c | 51 +++++++++++++++++++++++--------
drivers/gpu/drm/msm/adreno/a5xx_gpu.c | 37 +++++++++++++++++++---
drivers/gpu/drm/msm/adreno/a6xx_gpu.c | 37 +++++++++++++++++++---
drivers/gpu/drm/msm/adreno/adreno_gpu.c | 7 -----
drivers/gpu/drm/msm/msm_gem.h | 1 +
drivers/gpu/drm/msm/msm_gpu.c | 54 ++-------------------------------
drivers/gpu/drm/msm/msm_gpu.h | 2 ++
9 files changed, 173 insertions(+), 103 deletions(-)
diff --git a/drivers/gpu/drm/msm/adreno/a2xx_gpu.c b/drivers/gpu/drm/msm/adreno/a2xx_gpu.c
index 1f83bc1..49241d0 100644
--- a/drivers/gpu/drm/msm/adreno/a2xx_gpu.c
+++ b/drivers/gpu/drm/msm/adreno/a2xx_gpu.c
@@ -401,6 +401,30 @@ static struct msm_gpu_state *a2xx_gpu_state_get(struct msm_gpu *gpu)
return state;
}
+static struct msm_gem_address_space *
+a2xx_create_address_space(struct msm_gpu *gpu)
+{
+ struct msm_gem_address_space *aspace;
+ int ret;
+
+ aspace = msm_gem_address_space_create_a2xx(&gpu->pdev->dev, gpu,
+ "gpu", SZ_16M, SZ_16M + 0xff * SZ_64K);
+ if (IS_ERR(aspace)) {
+ DRM_DEV_ERROR(gpu->dev->dev,
+ "No memory protection without MMU\n");
+ return ERR_PTR(-ENXIO);
+ }
+
+ ret = aspace->mmu->funcs->attach(aspace->mmu, NULL, 0);
+ if (ret) {
+ msm_gem_address_space_put(aspace);
+ return ERR_PTR(ret);
+ }
+
+ return aspace;
+}
+
+
/* Register offset defines for A2XX - copy of A3XX */
static const unsigned int a2xx_register_offsets[REG_ADRENO_REGISTER_MAX] = {
REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_BASE, REG_AXXX_CP_RB_BASE),
@@ -429,6 +453,7 @@ static const struct adreno_gpu_funcs funcs = {
#endif
.gpu_state_get = a2xx_gpu_state_get,
.gpu_state_put = adreno_gpu_state_put,
+ .create_address_space = a2xx_create_address_space,
},
};
@@ -473,16 +498,8 @@ struct msm_gpu *a2xx_gpu_init(struct drm_device *dev)
adreno_gpu->reg_offsets = a2xx_register_offsets;
ret = adreno_gpu_init(dev, pdev, adreno_gpu, &funcs, 1);
- if (ret)
- goto fail;
-
- if (!gpu->aspace) {
- dev_err(dev->dev, "No memory protection without MMU\n");
- ret = -ENXIO;
- goto fail;
- }
-
- return gpu;
+ if (!ret)
+ return gpu;
fail:
if (a2xx_gpu)
diff --git a/drivers/gpu/drm/msm/adreno/a3xx_gpu.c b/drivers/gpu/drm/msm/adreno/a3xx_gpu.c
index c3b4bc6..33ab5e8 100644
--- a/drivers/gpu/drm/msm/adreno/a3xx_gpu.c
+++ b/drivers/gpu/drm/msm/adreno/a3xx_gpu.c
@@ -21,6 +21,7 @@
# include <mach/ocmem.h>
#endif
+#include "msm_gem.h"
#include "a3xx_gpu.h"
#define A3XX_INT0_MASK \
@@ -433,6 +434,41 @@ static struct msm_gpu_state *a3xx_gpu_state_get(struct msm_gpu *gpu)
return state;
}
+static struct msm_gem_address_space *
+a3xx_create_address_space(struct msm_gpu *gpu)
+{
+ struct msm_gem_address_space *aspace;
+ struct iommu_domain *iommu;
+ int ret;
+
+ iommu = iommu_domain_alloc(&platform_bus_type);
+ if (!iommu) {
+ DRM_DEV_ERROR(gpu->dev->dev,
+ "No memory protection without IOMMU\n");
+ return ERR_PTR(-ENXIO);
+ }
+
+ iommu->geometry.aperture_start = SZ_16M;
+ iommu->geometry.aperture_end = 0xffffffff;
+
+ aspace = msm_gem_address_space_create(&gpu->pdev->dev, iommu, "gpu");
+ if (IS_ERR(aspace)) {
+ iommu_domain_free(iommu);
+ DRM_DEV_ERROR(gpu->dev->dev, "failed to init mmu: %ld\n",
+ PTR_ERR(aspace));
+ return aspace;
+ }
+
+ ret = aspace->mmu->funcs->attach(aspace->mmu, NULL, 0);
+ if (ret) {
+ msm_gem_address_space_put(aspace);
+ return ERR_PTR(ret);
+ }
+
+ return aspace;
+}
+
+
/* Register offset defines for A3XX */
static const unsigned int a3xx_register_offsets[REG_ADRENO_REGISTER_MAX] = {
REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_BASE, REG_AXXX_CP_RB_BASE),
@@ -461,6 +497,7 @@ static const struct adreno_gpu_funcs funcs = {
#endif
.gpu_state_get = a3xx_gpu_state_get,
.gpu_state_put = adreno_gpu_state_put,
+ .create_address_space = a3xx_create_address_space,
},
};
@@ -520,19 +557,6 @@ struct msm_gpu *a3xx_gpu_init(struct drm_device *dev)
#endif
}
- if (!gpu->aspace) {
- /* TODO we think it is possible to configure the GPU to
- * restrict access to VRAM carveout. But the required
- * registers are unknown. For now just bail out and
- * limp along with just modesetting. If it turns out
- * to not be possible to restrict access, then we must
- * implement a cmdstream validator.
- */
- DRM_DEV_ERROR(dev->dev, "No memory protection without IOMMU\n");
- ret = -ENXIO;
- goto fail;
- }
-
return gpu;
fail:
diff --git a/drivers/gpu/drm/msm/adreno/a4xx_gpu.c b/drivers/gpu/drm/msm/adreno/a4xx_gpu.c
index 18f9a8e..08a5729 100644
--- a/drivers/gpu/drm/msm/adreno/a4xx_gpu.c
+++ b/drivers/gpu/drm/msm/adreno/a4xx_gpu.c
@@ -15,6 +15,8 @@
# include <soc/qcom/ocmem.h>
#endif
+#include "msm_gem.h"
+
#define A4XX_INT0_MASK \
(A4XX_INT0_RBBM_AHB_ERROR | \
A4XX_INT0_RBBM_ATB_BUS_OVERFLOW | \
@@ -530,6 +532,41 @@ static int a4xx_get_timestamp(struct msm_gpu *gpu, uint64_t *value)
return 0;
}
+static struct msm_gem_address_space *
+a4xx_create_address_space(struct msm_gpu *gpu)
+{
+ struct msm_gem_address_space *aspace;
+ struct iommu_domain *iommu;
+ int ret;
+
+ iommu = iommu_domain_alloc(&platform_bus_type);
+ if (!iommu) {
+ DRM_DEV_ERROR(gpu->dev->dev,
+ "No memory protection without IOMMU\n");
+ return ERR_PTR(-ENXIO);
+ }
+
+ iommu->geometry.aperture_start = SZ_16M;
+ iommu->geometry.aperture_end = 0xffffffff;
+
+ aspace = msm_gem_address_space_create(&gpu->pdev->dev, iommu, "gpu");
+ if (IS_ERR(aspace)) {
+ iommu_domain_free(iommu);
+ DRM_DEV_ERROR(gpu->dev->dev, "failed to init mmu: %ld\n",
+ PTR_ERR(aspace));
+ return aspace;
+ }
+
+ ret = aspace->mmu->funcs->attach(aspace->mmu, NULL, 0);
+ if (ret) {
+ msm_gem_address_space_put(aspace);
+ return ERR_PTR(ret);
+ }
+
+ return aspace;
+}
+
+
static const struct adreno_gpu_funcs funcs = {
.base = {
.get_param = adreno_get_param,
@@ -547,6 +584,7 @@ static const struct adreno_gpu_funcs funcs = {
#endif
.gpu_state_get = a4xx_gpu_state_get,
.gpu_state_put = adreno_gpu_state_put,
+ .create_address_space = a4xx_create_address_space,
},
.get_timestamp = a4xx_get_timestamp,
};
@@ -600,19 +638,6 @@ struct msm_gpu *a4xx_gpu_init(struct drm_device *dev)
#endif
}
- if (!gpu->aspace) {
- /* TODO we think it is possible to configure the GPU to
- * restrict access to VRAM carveout. But the required
- * registers are unknown. For now just bail out and
- * limp along with just modesetting. If it turns out
- * to not be possible to restrict access, then we must
- * implement a cmdstream validator.
- */
- DRM_DEV_ERROR(dev->dev, "No memory protection without IOMMU\n");
- ret = -ENXIO;
- goto fail;
- }
-
return gpu;
fail:
diff --git a/drivers/gpu/drm/msm/adreno/a5xx_gpu.c b/drivers/gpu/drm/msm/adreno/a5xx_gpu.c
index 45662d3..3d6f414 100644
--- a/drivers/gpu/drm/msm/adreno/a5xx_gpu.c
+++ b/drivers/gpu/drm/msm/adreno/a5xx_gpu.c
@@ -1456,6 +1456,38 @@ static unsigned long a5xx_gpu_busy(struct msm_gpu *gpu)
return (unsigned long)busy_time;
}
+static struct msm_gem_address_space *
+a5xx_create_address_space(struct msm_gpu *gpu)
+{
+ struct msm_gem_address_space *aspace;
+ struct iommu_domain *iommu;
+ int ret;
+
+ iommu = iommu_domain_alloc(&platform_bus_type);
+ if (!iommu)
+ return NULL;
+
+ iommu->geometry.aperture_start = 0x100000000ULL;
+ iommu->geometry.aperture_end = 0x1ffffffffULL;
+
+ aspace = msm_gem_address_space_create(&gpu->pdev->dev, iommu, "gpu");
+ if (IS_ERR(aspace)) {
+ iommu_domain_free(iommu);
+ DRM_DEV_ERROR(gpu->dev->dev, "failed to init mmu: %ld\n",
+ PTR_ERR(aspace));
+ return aspace;
+ }
+
+ ret = aspace->mmu->funcs->attach(aspace->mmu, NULL, 0);
+ if (ret) {
+ msm_gem_address_space_put(aspace);
+ return ERR_PTR(ret);
+ }
+
+ msm_mmu_set_fault_handler(aspace->mmu, gpu, a5xx_fault_handler);
+ return aspace;
+}
+
static const struct adreno_gpu_funcs funcs = {
.base = {
.get_param = adreno_get_param,
@@ -1477,6 +1509,7 @@ static const struct adreno_gpu_funcs funcs = {
.gpu_busy = a5xx_gpu_busy,
.gpu_state_get = a5xx_gpu_state_get,
.gpu_state_put = a5xx_gpu_state_put,
+ .create_address_space = a5xx_create_address_space,
},
.get_timestamp = a5xx_get_timestamp,
};
@@ -1523,7 +1556,6 @@ struct msm_gpu *a5xx_gpu_init(struct drm_device *dev)
adreno_gpu->registers = a5xx_registers;
adreno_gpu->reg_offsets = a5xx_register_offsets;
-
a5xx_gpu->lm_leakage = 0x4E001A;
check_speed_bin(&pdev->dev);
@@ -1534,9 +1566,6 @@ struct msm_gpu *a5xx_gpu_init(struct drm_device *dev)
return ERR_PTR(ret);
}
- if (gpu->aspace)
- msm_mmu_set_fault_handler(gpu->aspace->mmu, gpu, a5xx_fault_handler);
-
/* Set up the preemption specific bits and pieces for each ringbuffer */
a5xx_preempt_init(gpu);
diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c
index 1c20d59..f2e0800 100644
--- a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c
+++ b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c
@@ -783,6 +783,38 @@ static unsigned long a6xx_gpu_busy(struct msm_gpu *gpu)
return (unsigned long)busy_time;
}
+static struct msm_gem_address_space *
+a6xx_create_address_space(struct msm_gpu *gpu)
+{
+ struct msm_gem_address_space *aspace;
+ struct iommu_domain *iommu;
+ int ret;
+
+ iommu = iommu_domain_alloc(&platform_bus_type);
+ if (!iommu)
+ return NULL;
+
+ iommu->geometry.aperture_start = 0x100000000ULL;
+ iommu->geometry.aperture_end = 0x1ffffffffULL;
+
+ aspace = msm_gem_address_space_create(&gpu->pdev->dev, iommu, "gpu");
+ if (IS_ERR(aspace)) {
+ iommu_domain_free(iommu);
+ DRM_DEV_ERROR(gpu->dev->dev, "failed to init mmu: %ld\n",
+ PTR_ERR(aspace));
+ return aspace;
+ }
+
+ ret = aspace->mmu->funcs->attach(aspace->mmu, NULL, 0);
+ if (ret) {
+ msm_gem_address_space_put(aspace);
+ return ERR_PTR(ret);
+ }
+
+ msm_mmu_set_fault_handler(aspace->mmu, gpu, a6xx_fault_handler);
+ return aspace;
+}
+
static const struct adreno_gpu_funcs funcs = {
.base = {
.get_param = adreno_get_param,
@@ -803,6 +835,7 @@ static const struct adreno_gpu_funcs funcs = {
.gpu_set_freq = a6xx_gmu_set_freq,
.gpu_state_get = a6xx_gpu_state_get,
.gpu_state_put = a6xx_gpu_state_put,
+ .create_address_space = a6xx_create_address_space,
},
.get_timestamp = a6xx_get_timestamp,
};
@@ -845,9 +878,5 @@ struct msm_gpu *a6xx_gpu_init(struct drm_device *dev)
return ERR_PTR(ret);
}
- if (gpu->aspace)
- msm_mmu_set_fault_handler(gpu->aspace->mmu, gpu,
- a6xx_fault_handler);
-
return gpu;
}
diff --git a/drivers/gpu/drm/msm/adreno/adreno_gpu.c b/drivers/gpu/drm/msm/adreno/adreno_gpu.c
index 2cfee1a..dc9ea82 100644
--- a/drivers/gpu/drm/msm/adreno/adreno_gpu.c
+++ b/drivers/gpu/drm/msm/adreno/adreno_gpu.c
@@ -765,13 +765,6 @@ int adreno_gpu_init(struct drm_device *drm, struct platform_device *pdev,
adreno_gpu->rev = config->rev;
adreno_gpu_config.ioname = "kgsl_3d0_reg_memory";
-
- adreno_gpu_config.va_start = SZ_16M;
- adreno_gpu_config.va_end = 0xffffffff;
- /* maximum range of a2xx mmu */
- if (adreno_is_a2xx(adreno_gpu))
- adreno_gpu_config.va_end = SZ_16M + 0xfff * SZ_64K;
-
adreno_gpu_config.nr_rings = nr_rings;
adreno_get_pwrlevels(&pdev->dev, gpu);
diff --git a/drivers/gpu/drm/msm/msm_gem.h b/drivers/gpu/drm/msm/msm_gem.h
index 5e21d01..777f5fb 100644
--- a/drivers/gpu/drm/msm/msm_gem.h
+++ b/drivers/gpu/drm/msm/msm_gem.h
@@ -21,6 +21,7 @@
#include <linux/kref.h>
#include <linux/reservation.h>
#include "msm_drv.h"
+#include "msm_mmu.h"
/* Additional internal-use only BO flags: */
#define MSM_BO_STOLEN 0x10000000 /* try to use stolen/splash memory */
diff --git a/drivers/gpu/drm/msm/msm_gpu.c b/drivers/gpu/drm/msm/msm_gpu.c
index 79b71b1..ec48bb3 100644
--- a/drivers/gpu/drm/msm/msm_gpu.c
+++ b/drivers/gpu/drm/msm/msm_gpu.c
@@ -20,7 +20,6 @@
#include "msm_mmu.h"
#include "msm_fence.h"
#include "msm_gpu_trace.h"
-#include "adreno/adreno_gpu.h"
#include <generated/utsrelease.h>
#include <linux/string_helpers.h>
@@ -821,51 +820,6 @@ static int get_clocks(struct platform_device *pdev, struct msm_gpu *gpu)
return 0;
}
-static struct msm_gem_address_space *
-msm_gpu_create_address_space(struct msm_gpu *gpu, struct platform_device *pdev,
- uint64_t va_start, uint64_t va_end)
-{
- struct msm_gem_address_space *aspace;
- int ret;
-
- /*
- * Setup IOMMU.. eventually we will (I think) do this once per context
- * and have separate page tables per context. For now, to keep things
- * simple and to get something working, just use a single address space:
- */
- if (!adreno_is_a2xx(to_adreno_gpu(gpu))) {
- struct iommu_domain *iommu = iommu_domain_alloc(&platform_bus_type);
- if (!iommu)
- return NULL;
-
- iommu->geometry.aperture_start = va_start;
- iommu->geometry.aperture_end = va_end;
-
- DRM_DEV_INFO(gpu->dev->dev, "%s: using IOMMU\n", gpu->name);
-
- aspace = msm_gem_address_space_create(&pdev->dev, iommu, "gpu");
- if (IS_ERR(aspace))
- iommu_domain_free(iommu);
- } else {
- aspace = msm_gem_address_space_create_a2xx(&pdev->dev, gpu, "gpu",
- va_start, va_end);
- }
-
- if (IS_ERR(aspace)) {
- DRM_DEV_ERROR(gpu->dev->dev, "failed to init mmu: %ld\n",
- PTR_ERR(aspace));
- return ERR_CAST(aspace);
- }
-
- ret = aspace->mmu->funcs->attach(aspace->mmu, NULL, 0);
- if (ret) {
- msm_gem_address_space_put(aspace);
- return ERR_PTR(ret);
- }
-
- return aspace;
-}
-
int msm_gpu_init(struct drm_device *drm, struct platform_device *pdev,
struct msm_gpu *gpu, const struct msm_gpu_funcs *funcs,
const char *name, struct msm_gpu_config *config)
@@ -938,12 +892,8 @@ int msm_gpu_init(struct drm_device *drm, struct platform_device *pdev,
msm_devfreq_init(gpu);
- gpu->aspace = msm_gpu_create_address_space(gpu, pdev,
- config->va_start, config->va_end);
-
- if (gpu->aspace == NULL)
- DRM_DEV_INFO(drm->dev, "%s: no IOMMU, fallback to VRAM carveout!\n", name);
- else if (IS_ERR(gpu->aspace)) {
+ gpu->aspace = gpu->funcs->create_address_space(gpu);
+ if (IS_ERR(gpu->aspace)) {
ret = PTR_ERR(gpu->aspace);
goto fail;
}
diff --git a/drivers/gpu/drm/msm/msm_gpu.h b/drivers/gpu/drm/msm/msm_gpu.h
index ca17086..81b9861 100644
--- a/drivers/gpu/drm/msm/msm_gpu.h
+++ b/drivers/gpu/drm/msm/msm_gpu.h
@@ -74,6 +74,8 @@ struct msm_gpu_funcs {
int (*gpu_state_put)(struct msm_gpu_state *state);
unsigned long (*gpu_get_freq)(struct msm_gpu *gpu);
void (*gpu_set_freq)(struct msm_gpu *gpu, unsigned long freq);
+ struct msm_gem_address_space *(*create_address_space)
+ (struct msm_gpu *gpu);
};
struct msm_gpu {
--
2.7.4
_______________________________________________
Freedreno mailing list
Freedreno@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/freedreno
^ permalink raw reply related [flat|nested] 46+ messages in thread* [RFC PATCH v1 09/15] drm/msm/gpu: Move address space setup to the GPU targets
@ 2019-03-01 19:38 ` Jordan Crouse
0 siblings, 0 replies; 46+ messages in thread
From: Jordan Crouse @ 2019-03-01 19:38 UTC (permalink / raw)
To: freedreno
Cc: jean-philippe.brucker, linux-arm-msm, dianders, hoegsberg,
baolu.lu, Bjorn Andersson, Kees Cook, Thomas Zimmermann,
Sean Paul, Sharat Masetty, dri-devel, linux-kernel, Rob Clark,
David Airlie, Jonathan Marek, Mamta Shukla, Daniel Vetter
Move the address space steup code out of the generic msm GPU code to
to the individual GPU targets. This allows us to do target specific
setup such as gpummu for a2xx or split pagetables and per-instance
pagetables for newer a5xx and a6xx targets. All this is at the
expense of duplicated code in some of the target files but I think
it pays for itself in improved code flow and flexibility.
Signed-off-by: Jordan Crouse <jcrouse@codeaurora.org>
---
drivers/gpu/drm/msm/adreno/a2xx_gpu.c | 37 ++++++++++++++++------
drivers/gpu/drm/msm/adreno/a3xx_gpu.c | 50 ++++++++++++++++++++++--------
drivers/gpu/drm/msm/adreno/a4xx_gpu.c | 51 +++++++++++++++++++++++--------
drivers/gpu/drm/msm/adreno/a5xx_gpu.c | 37 +++++++++++++++++++---
drivers/gpu/drm/msm/adreno/a6xx_gpu.c | 37 +++++++++++++++++++---
drivers/gpu/drm/msm/adreno/adreno_gpu.c | 7 -----
drivers/gpu/drm/msm/msm_gem.h | 1 +
drivers/gpu/drm/msm/msm_gpu.c | 54 ++-------------------------------
drivers/gpu/drm/msm/msm_gpu.h | 2 ++
9 files changed, 173 insertions(+), 103 deletions(-)
diff --git a/drivers/gpu/drm/msm/adreno/a2xx_gpu.c b/drivers/gpu/drm/msm/adreno/a2xx_gpu.c
index 1f83bc1..49241d0 100644
--- a/drivers/gpu/drm/msm/adreno/a2xx_gpu.c
+++ b/drivers/gpu/drm/msm/adreno/a2xx_gpu.c
@@ -401,6 +401,30 @@ static struct msm_gpu_state *a2xx_gpu_state_get(struct msm_gpu *gpu)
return state;
}
+static struct msm_gem_address_space *
+a2xx_create_address_space(struct msm_gpu *gpu)
+{
+ struct msm_gem_address_space *aspace;
+ int ret;
+
+ aspace = msm_gem_address_space_create_a2xx(&gpu->pdev->dev, gpu,
+ "gpu", SZ_16M, SZ_16M + 0xff * SZ_64K);
+ if (IS_ERR(aspace)) {
+ DRM_DEV_ERROR(gpu->dev->dev,
+ "No memory protection without MMU\n");
+ return ERR_PTR(-ENXIO);
+ }
+
+ ret = aspace->mmu->funcs->attach(aspace->mmu, NULL, 0);
+ if (ret) {
+ msm_gem_address_space_put(aspace);
+ return ERR_PTR(ret);
+ }
+
+ return aspace;
+}
+
+
/* Register offset defines for A2XX - copy of A3XX */
static const unsigned int a2xx_register_offsets[REG_ADRENO_REGISTER_MAX] = {
REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_BASE, REG_AXXX_CP_RB_BASE),
@@ -429,6 +453,7 @@ static const struct adreno_gpu_funcs funcs = {
#endif
.gpu_state_get = a2xx_gpu_state_get,
.gpu_state_put = adreno_gpu_state_put,
+ .create_address_space = a2xx_create_address_space,
},
};
@@ -473,16 +498,8 @@ struct msm_gpu *a2xx_gpu_init(struct drm_device *dev)
adreno_gpu->reg_offsets = a2xx_register_offsets;
ret = adreno_gpu_init(dev, pdev, adreno_gpu, &funcs, 1);
- if (ret)
- goto fail;
-
- if (!gpu->aspace) {
- dev_err(dev->dev, "No memory protection without MMU\n");
- ret = -ENXIO;
- goto fail;
- }
-
- return gpu;
+ if (!ret)
+ return gpu;
fail:
if (a2xx_gpu)
diff --git a/drivers/gpu/drm/msm/adreno/a3xx_gpu.c b/drivers/gpu/drm/msm/adreno/a3xx_gpu.c
index c3b4bc6..33ab5e8 100644
--- a/drivers/gpu/drm/msm/adreno/a3xx_gpu.c
+++ b/drivers/gpu/drm/msm/adreno/a3xx_gpu.c
@@ -21,6 +21,7 @@
# include <mach/ocmem.h>
#endif
+#include "msm_gem.h"
#include "a3xx_gpu.h"
#define A3XX_INT0_MASK \
@@ -433,6 +434,41 @@ static struct msm_gpu_state *a3xx_gpu_state_get(struct msm_gpu *gpu)
return state;
}
+static struct msm_gem_address_space *
+a3xx_create_address_space(struct msm_gpu *gpu)
+{
+ struct msm_gem_address_space *aspace;
+ struct iommu_domain *iommu;
+ int ret;
+
+ iommu = iommu_domain_alloc(&platform_bus_type);
+ if (!iommu) {
+ DRM_DEV_ERROR(gpu->dev->dev,
+ "No memory protection without IOMMU\n");
+ return ERR_PTR(-ENXIO);
+ }
+
+ iommu->geometry.aperture_start = SZ_16M;
+ iommu->geometry.aperture_end = 0xffffffff;
+
+ aspace = msm_gem_address_space_create(&gpu->pdev->dev, iommu, "gpu");
+ if (IS_ERR(aspace)) {
+ iommu_domain_free(iommu);
+ DRM_DEV_ERROR(gpu->dev->dev, "failed to init mmu: %ld\n",
+ PTR_ERR(aspace));
+ return aspace;
+ }
+
+ ret = aspace->mmu->funcs->attach(aspace->mmu, NULL, 0);
+ if (ret) {
+ msm_gem_address_space_put(aspace);
+ return ERR_PTR(ret);
+ }
+
+ return aspace;
+}
+
+
/* Register offset defines for A3XX */
static const unsigned int a3xx_register_offsets[REG_ADRENO_REGISTER_MAX] = {
REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_BASE, REG_AXXX_CP_RB_BASE),
@@ -461,6 +497,7 @@ static const struct adreno_gpu_funcs funcs = {
#endif
.gpu_state_get = a3xx_gpu_state_get,
.gpu_state_put = adreno_gpu_state_put,
+ .create_address_space = a3xx_create_address_space,
},
};
@@ -520,19 +557,6 @@ struct msm_gpu *a3xx_gpu_init(struct drm_device *dev)
#endif
}
- if (!gpu->aspace) {
- /* TODO we think it is possible to configure the GPU to
- * restrict access to VRAM carveout. But the required
- * registers are unknown. For now just bail out and
- * limp along with just modesetting. If it turns out
- * to not be possible to restrict access, then we must
- * implement a cmdstream validator.
- */
- DRM_DEV_ERROR(dev->dev, "No memory protection without IOMMU\n");
- ret = -ENXIO;
- goto fail;
- }
-
return gpu;
fail:
diff --git a/drivers/gpu/drm/msm/adreno/a4xx_gpu.c b/drivers/gpu/drm/msm/adreno/a4xx_gpu.c
index 18f9a8e..08a5729 100644
--- a/drivers/gpu/drm/msm/adreno/a4xx_gpu.c
+++ b/drivers/gpu/drm/msm/adreno/a4xx_gpu.c
@@ -15,6 +15,8 @@
# include <soc/qcom/ocmem.h>
#endif
+#include "msm_gem.h"
+
#define A4XX_INT0_MASK \
(A4XX_INT0_RBBM_AHB_ERROR | \
A4XX_INT0_RBBM_ATB_BUS_OVERFLOW | \
@@ -530,6 +532,41 @@ static int a4xx_get_timestamp(struct msm_gpu *gpu, uint64_t *value)
return 0;
}
+static struct msm_gem_address_space *
+a4xx_create_address_space(struct msm_gpu *gpu)
+{
+ struct msm_gem_address_space *aspace;
+ struct iommu_domain *iommu;
+ int ret;
+
+ iommu = iommu_domain_alloc(&platform_bus_type);
+ if (!iommu) {
+ DRM_DEV_ERROR(gpu->dev->dev,
+ "No memory protection without IOMMU\n");
+ return ERR_PTR(-ENXIO);
+ }
+
+ iommu->geometry.aperture_start = SZ_16M;
+ iommu->geometry.aperture_end = 0xffffffff;
+
+ aspace = msm_gem_address_space_create(&gpu->pdev->dev, iommu, "gpu");
+ if (IS_ERR(aspace)) {
+ iommu_domain_free(iommu);
+ DRM_DEV_ERROR(gpu->dev->dev, "failed to init mmu: %ld\n",
+ PTR_ERR(aspace));
+ return aspace;
+ }
+
+ ret = aspace->mmu->funcs->attach(aspace->mmu, NULL, 0);
+ if (ret) {
+ msm_gem_address_space_put(aspace);
+ return ERR_PTR(ret);
+ }
+
+ return aspace;
+}
+
+
static const struct adreno_gpu_funcs funcs = {
.base = {
.get_param = adreno_get_param,
@@ -547,6 +584,7 @@ static const struct adreno_gpu_funcs funcs = {
#endif
.gpu_state_get = a4xx_gpu_state_get,
.gpu_state_put = adreno_gpu_state_put,
+ .create_address_space = a4xx_create_address_space,
},
.get_timestamp = a4xx_get_timestamp,
};
@@ -600,19 +638,6 @@ struct msm_gpu *a4xx_gpu_init(struct drm_device *dev)
#endif
}
- if (!gpu->aspace) {
- /* TODO we think it is possible to configure the GPU to
- * restrict access to VRAM carveout. But the required
- * registers are unknown. For now just bail out and
- * limp along with just modesetting. If it turns out
- * to not be possible to restrict access, then we must
- * implement a cmdstream validator.
- */
- DRM_DEV_ERROR(dev->dev, "No memory protection without IOMMU\n");
- ret = -ENXIO;
- goto fail;
- }
-
return gpu;
fail:
diff --git a/drivers/gpu/drm/msm/adreno/a5xx_gpu.c b/drivers/gpu/drm/msm/adreno/a5xx_gpu.c
index 45662d3..3d6f414 100644
--- a/drivers/gpu/drm/msm/adreno/a5xx_gpu.c
+++ b/drivers/gpu/drm/msm/adreno/a5xx_gpu.c
@@ -1456,6 +1456,38 @@ static unsigned long a5xx_gpu_busy(struct msm_gpu *gpu)
return (unsigned long)busy_time;
}
+static struct msm_gem_address_space *
+a5xx_create_address_space(struct msm_gpu *gpu)
+{
+ struct msm_gem_address_space *aspace;
+ struct iommu_domain *iommu;
+ int ret;
+
+ iommu = iommu_domain_alloc(&platform_bus_type);
+ if (!iommu)
+ return NULL;
+
+ iommu->geometry.aperture_start = 0x100000000ULL;
+ iommu->geometry.aperture_end = 0x1ffffffffULL;
+
+ aspace = msm_gem_address_space_create(&gpu->pdev->dev, iommu, "gpu");
+ if (IS_ERR(aspace)) {
+ iommu_domain_free(iommu);
+ DRM_DEV_ERROR(gpu->dev->dev, "failed to init mmu: %ld\n",
+ PTR_ERR(aspace));
+ return aspace;
+ }
+
+ ret = aspace->mmu->funcs->attach(aspace->mmu, NULL, 0);
+ if (ret) {
+ msm_gem_address_space_put(aspace);
+ return ERR_PTR(ret);
+ }
+
+ msm_mmu_set_fault_handler(aspace->mmu, gpu, a5xx_fault_handler);
+ return aspace;
+}
+
static const struct adreno_gpu_funcs funcs = {
.base = {
.get_param = adreno_get_param,
@@ -1477,6 +1509,7 @@ static const struct adreno_gpu_funcs funcs = {
.gpu_busy = a5xx_gpu_busy,
.gpu_state_get = a5xx_gpu_state_get,
.gpu_state_put = a5xx_gpu_state_put,
+ .create_address_space = a5xx_create_address_space,
},
.get_timestamp = a5xx_get_timestamp,
};
@@ -1523,7 +1556,6 @@ struct msm_gpu *a5xx_gpu_init(struct drm_device *dev)
adreno_gpu->registers = a5xx_registers;
adreno_gpu->reg_offsets = a5xx_register_offsets;
-
a5xx_gpu->lm_leakage = 0x4E001A;
check_speed_bin(&pdev->dev);
@@ -1534,9 +1566,6 @@ struct msm_gpu *a5xx_gpu_init(struct drm_device *dev)
return ERR_PTR(ret);
}
- if (gpu->aspace)
- msm_mmu_set_fault_handler(gpu->aspace->mmu, gpu, a5xx_fault_handler);
-
/* Set up the preemption specific bits and pieces for each ringbuffer */
a5xx_preempt_init(gpu);
diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c
index 1c20d59..f2e0800 100644
--- a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c
+++ b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c
@@ -783,6 +783,38 @@ static unsigned long a6xx_gpu_busy(struct msm_gpu *gpu)
return (unsigned long)busy_time;
}
+static struct msm_gem_address_space *
+a6xx_create_address_space(struct msm_gpu *gpu)
+{
+ struct msm_gem_address_space *aspace;
+ struct iommu_domain *iommu;
+ int ret;
+
+ iommu = iommu_domain_alloc(&platform_bus_type);
+ if (!iommu)
+ return NULL;
+
+ iommu->geometry.aperture_start = 0x100000000ULL;
+ iommu->geometry.aperture_end = 0x1ffffffffULL;
+
+ aspace = msm_gem_address_space_create(&gpu->pdev->dev, iommu, "gpu");
+ if (IS_ERR(aspace)) {
+ iommu_domain_free(iommu);
+ DRM_DEV_ERROR(gpu->dev->dev, "failed to init mmu: %ld\n",
+ PTR_ERR(aspace));
+ return aspace;
+ }
+
+ ret = aspace->mmu->funcs->attach(aspace->mmu, NULL, 0);
+ if (ret) {
+ msm_gem_address_space_put(aspace);
+ return ERR_PTR(ret);
+ }
+
+ msm_mmu_set_fault_handler(aspace->mmu, gpu, a6xx_fault_handler);
+ return aspace;
+}
+
static const struct adreno_gpu_funcs funcs = {
.base = {
.get_param = adreno_get_param,
@@ -803,6 +835,7 @@ static const struct adreno_gpu_funcs funcs = {
.gpu_set_freq = a6xx_gmu_set_freq,
.gpu_state_get = a6xx_gpu_state_get,
.gpu_state_put = a6xx_gpu_state_put,
+ .create_address_space = a6xx_create_address_space,
},
.get_timestamp = a6xx_get_timestamp,
};
@@ -845,9 +878,5 @@ struct msm_gpu *a6xx_gpu_init(struct drm_device *dev)
return ERR_PTR(ret);
}
- if (gpu->aspace)
- msm_mmu_set_fault_handler(gpu->aspace->mmu, gpu,
- a6xx_fault_handler);
-
return gpu;
}
diff --git a/drivers/gpu/drm/msm/adreno/adreno_gpu.c b/drivers/gpu/drm/msm/adreno/adreno_gpu.c
index 2cfee1a..dc9ea82 100644
--- a/drivers/gpu/drm/msm/adreno/adreno_gpu.c
+++ b/drivers/gpu/drm/msm/adreno/adreno_gpu.c
@@ -765,13 +765,6 @@ int adreno_gpu_init(struct drm_device *drm, struct platform_device *pdev,
adreno_gpu->rev = config->rev;
adreno_gpu_config.ioname = "kgsl_3d0_reg_memory";
-
- adreno_gpu_config.va_start = SZ_16M;
- adreno_gpu_config.va_end = 0xffffffff;
- /* maximum range of a2xx mmu */
- if (adreno_is_a2xx(adreno_gpu))
- adreno_gpu_config.va_end = SZ_16M + 0xfff * SZ_64K;
-
adreno_gpu_config.nr_rings = nr_rings;
adreno_get_pwrlevels(&pdev->dev, gpu);
diff --git a/drivers/gpu/drm/msm/msm_gem.h b/drivers/gpu/drm/msm/msm_gem.h
index 5e21d01..777f5fb 100644
--- a/drivers/gpu/drm/msm/msm_gem.h
+++ b/drivers/gpu/drm/msm/msm_gem.h
@@ -21,6 +21,7 @@
#include <linux/kref.h>
#include <linux/reservation.h>
#include "msm_drv.h"
+#include "msm_mmu.h"
/* Additional internal-use only BO flags: */
#define MSM_BO_STOLEN 0x10000000 /* try to use stolen/splash memory */
diff --git a/drivers/gpu/drm/msm/msm_gpu.c b/drivers/gpu/drm/msm/msm_gpu.c
index 79b71b1..ec48bb3 100644
--- a/drivers/gpu/drm/msm/msm_gpu.c
+++ b/drivers/gpu/drm/msm/msm_gpu.c
@@ -20,7 +20,6 @@
#include "msm_mmu.h"
#include "msm_fence.h"
#include "msm_gpu_trace.h"
-#include "adreno/adreno_gpu.h"
#include <generated/utsrelease.h>
#include <linux/string_helpers.h>
@@ -821,51 +820,6 @@ static int get_clocks(struct platform_device *pdev, struct msm_gpu *gpu)
return 0;
}
-static struct msm_gem_address_space *
-msm_gpu_create_address_space(struct msm_gpu *gpu, struct platform_device *pdev,
- uint64_t va_start, uint64_t va_end)
-{
- struct msm_gem_address_space *aspace;
- int ret;
-
- /*
- * Setup IOMMU.. eventually we will (I think) do this once per context
- * and have separate page tables per context. For now, to keep things
- * simple and to get something working, just use a single address space:
- */
- if (!adreno_is_a2xx(to_adreno_gpu(gpu))) {
- struct iommu_domain *iommu = iommu_domain_alloc(&platform_bus_type);
- if (!iommu)
- return NULL;
-
- iommu->geometry.aperture_start = va_start;
- iommu->geometry.aperture_end = va_end;
-
- DRM_DEV_INFO(gpu->dev->dev, "%s: using IOMMU\n", gpu->name);
-
- aspace = msm_gem_address_space_create(&pdev->dev, iommu, "gpu");
- if (IS_ERR(aspace))
- iommu_domain_free(iommu);
- } else {
- aspace = msm_gem_address_space_create_a2xx(&pdev->dev, gpu, "gpu",
- va_start, va_end);
- }
-
- if (IS_ERR(aspace)) {
- DRM_DEV_ERROR(gpu->dev->dev, "failed to init mmu: %ld\n",
- PTR_ERR(aspace));
- return ERR_CAST(aspace);
- }
-
- ret = aspace->mmu->funcs->attach(aspace->mmu, NULL, 0);
- if (ret) {
- msm_gem_address_space_put(aspace);
- return ERR_PTR(ret);
- }
-
- return aspace;
-}
-
int msm_gpu_init(struct drm_device *drm, struct platform_device *pdev,
struct msm_gpu *gpu, const struct msm_gpu_funcs *funcs,
const char *name, struct msm_gpu_config *config)
@@ -938,12 +892,8 @@ int msm_gpu_init(struct drm_device *drm, struct platform_device *pdev,
msm_devfreq_init(gpu);
- gpu->aspace = msm_gpu_create_address_space(gpu, pdev,
- config->va_start, config->va_end);
-
- if (gpu->aspace == NULL)
- DRM_DEV_INFO(drm->dev, "%s: no IOMMU, fallback to VRAM carveout!\n", name);
- else if (IS_ERR(gpu->aspace)) {
+ gpu->aspace = gpu->funcs->create_address_space(gpu);
+ if (IS_ERR(gpu->aspace)) {
ret = PTR_ERR(gpu->aspace);
goto fail;
}
diff --git a/drivers/gpu/drm/msm/msm_gpu.h b/drivers/gpu/drm/msm/msm_gpu.h
index ca17086..81b9861 100644
--- a/drivers/gpu/drm/msm/msm_gpu.h
+++ b/drivers/gpu/drm/msm/msm_gpu.h
@@ -74,6 +74,8 @@ struct msm_gpu_funcs {
int (*gpu_state_put)(struct msm_gpu_state *state);
unsigned long (*gpu_get_freq)(struct msm_gpu *gpu);
void (*gpu_set_freq)(struct msm_gpu *gpu, unsigned long freq);
+ struct msm_gem_address_space *(*create_address_space)
+ (struct msm_gpu *gpu);
};
struct msm_gpu {
--
2.7.4
^ permalink raw reply related [flat|nested] 46+ messages in thread[parent not found: <1551469117-3404-10-git-send-email-jcrouse-sgV2jX0FEOL9JmXXK+q4OQ@public.gmane.org>]
* Re: [RFC PATCH v1 09/15] drm/msm/gpu: Move address space setup to the GPU targets
2019-03-01 19:38 ` Jordan Crouse
@ 2019-03-01 23:01 ` Jonathan Marek
-1 siblings, 0 replies; 46+ messages in thread
From: Jonathan Marek @ 2019-03-01 23:01 UTC (permalink / raw)
To: Jordan Crouse, freedreno-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW
Cc: Kees Cook, jean-philippe.brucker-5wv7dgnIgG8,
linux-arm-msm-u79uwXL29TY76Z2rM5mHXA, Sharat Masetty,
linux-kernel-u79uwXL29TY76Z2rM5mHXA,
dri-devel-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW, Bjorn Andersson,
David Airlie, Rob Clark, dianders-uWgjrcJnOmJ4cg9Nei1l7Q,
hoegsberg-hpIqsD4AKlfQT0dZR+AlfA, Mamta Shukla, Thomas Zimmermann,
Daniel Vetter, Sean Paul, baolu.lu-VuQAYsv1563Yd54FQh9/CA
There is an error in the a2xx part of this patch: 0xfff in adreno_gpu.c
became 0xff in a2xx_gpu.c
On 3/1/19 2:38 PM, Jordan Crouse wrote:
> Move the address space steup code out of the generic msm GPU code to
> to the individual GPU targets. This allows us to do target specific
> setup such as gpummu for a2xx or split pagetables and per-instance
> pagetables for newer a5xx and a6xx targets. All this is at the
> expense of duplicated code in some of the target files but I think
> it pays for itself in improved code flow and flexibility.
>
> Signed-off-by: Jordan Crouse <jcrouse@codeaurora.org>
> ---
>
> drivers/gpu/drm/msm/adreno/a2xx_gpu.c | 37 ++++++++++++++++------
> drivers/gpu/drm/msm/adreno/a3xx_gpu.c | 50 ++++++++++++++++++++++--------
> drivers/gpu/drm/msm/adreno/a4xx_gpu.c | 51 +++++++++++++++++++++++--------
> drivers/gpu/drm/msm/adreno/a5xx_gpu.c | 37 +++++++++++++++++++---
> drivers/gpu/drm/msm/adreno/a6xx_gpu.c | 37 +++++++++++++++++++---
> drivers/gpu/drm/msm/adreno/adreno_gpu.c | 7 -----
> drivers/gpu/drm/msm/msm_gem.h | 1 +
> drivers/gpu/drm/msm/msm_gpu.c | 54 ++-------------------------------
> drivers/gpu/drm/msm/msm_gpu.h | 2 ++
> 9 files changed, 173 insertions(+), 103 deletions(-)
>
> diff --git a/drivers/gpu/drm/msm/adreno/a2xx_gpu.c b/drivers/gpu/drm/msm/adreno/a2xx_gpu.c
> index 1f83bc1..49241d0 100644
> --- a/drivers/gpu/drm/msm/adreno/a2xx_gpu.c
> +++ b/drivers/gpu/drm/msm/adreno/a2xx_gpu.c
> @@ -401,6 +401,30 @@ static struct msm_gpu_state *a2xx_gpu_state_get(struct msm_gpu *gpu)
> return state;
> }
>
> +static struct msm_gem_address_space *
> +a2xx_create_address_space(struct msm_gpu *gpu)
> +{
> + struct msm_gem_address_space *aspace;
> + int ret;
> +
> + aspace = msm_gem_address_space_create_a2xx(&gpu->pdev->dev, gpu,
> + "gpu", SZ_16M, SZ_16M + 0xff * SZ_64K);
> + if (IS_ERR(aspace)) {
> + DRM_DEV_ERROR(gpu->dev->dev,
> + "No memory protection without MMU\n");
> + return ERR_PTR(-ENXIO);
> + }
> +
> + ret = aspace->mmu->funcs->attach(aspace->mmu, NULL, 0);
> + if (ret) {
> + msm_gem_address_space_put(aspace);
> + return ERR_PTR(ret);
> + }
> +
> + return aspace;
> +}
> +
> +
> /* Register offset defines for A2XX - copy of A3XX */
> static const unsigned int a2xx_register_offsets[REG_ADRENO_REGISTER_MAX] = {
> REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_BASE, REG_AXXX_CP_RB_BASE),
> @@ -429,6 +453,7 @@ static const struct adreno_gpu_funcs funcs = {
> #endif
> .gpu_state_get = a2xx_gpu_state_get,
> .gpu_state_put = adreno_gpu_state_put,
> + .create_address_space = a2xx_create_address_space,
> },
> };
>
> @@ -473,16 +498,8 @@ struct msm_gpu *a2xx_gpu_init(struct drm_device *dev)
> adreno_gpu->reg_offsets = a2xx_register_offsets;
>
> ret = adreno_gpu_init(dev, pdev, adreno_gpu, &funcs, 1);
> - if (ret)
> - goto fail;
> -
> - if (!gpu->aspace) {
> - dev_err(dev->dev, "No memory protection without MMU\n");
> - ret = -ENXIO;
> - goto fail;
> - }
> -
> - return gpu;
> + if (!ret)
> + return gpu;
>
> fail:
> if (a2xx_gpu)
> diff --git a/drivers/gpu/drm/msm/adreno/a3xx_gpu.c b/drivers/gpu/drm/msm/adreno/a3xx_gpu.c
> index c3b4bc6..33ab5e8 100644
> --- a/drivers/gpu/drm/msm/adreno/a3xx_gpu.c
> +++ b/drivers/gpu/drm/msm/adreno/a3xx_gpu.c
> @@ -21,6 +21,7 @@
> # include <mach/ocmem.h>
> #endif
>
> +#include "msm_gem.h"
> #include "a3xx_gpu.h"
>
> #define A3XX_INT0_MASK \
> @@ -433,6 +434,41 @@ static struct msm_gpu_state *a3xx_gpu_state_get(struct msm_gpu *gpu)
> return state;
> }
>
> +static struct msm_gem_address_space *
> +a3xx_create_address_space(struct msm_gpu *gpu)
> +{
> + struct msm_gem_address_space *aspace;
> + struct iommu_domain *iommu;
> + int ret;
> +
> + iommu = iommu_domain_alloc(&platform_bus_type);
> + if (!iommu) {
> + DRM_DEV_ERROR(gpu->dev->dev,
> + "No memory protection without IOMMU\n");
> + return ERR_PTR(-ENXIO);
> + }
> +
> + iommu->geometry.aperture_start = SZ_16M;
> + iommu->geometry.aperture_end = 0xffffffff;
> +
> + aspace = msm_gem_address_space_create(&gpu->pdev->dev, iommu, "gpu");
> + if (IS_ERR(aspace)) {
> + iommu_domain_free(iommu);
> + DRM_DEV_ERROR(gpu->dev->dev, "failed to init mmu: %ld\n",
> + PTR_ERR(aspace));
> + return aspace;
> + }
> +
> + ret = aspace->mmu->funcs->attach(aspace->mmu, NULL, 0);
> + if (ret) {
> + msm_gem_address_space_put(aspace);
> + return ERR_PTR(ret);
> + }
> +
> + return aspace;
> +}
> +
> +
> /* Register offset defines for A3XX */
> static const unsigned int a3xx_register_offsets[REG_ADRENO_REGISTER_MAX] = {
> REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_BASE, REG_AXXX_CP_RB_BASE),
> @@ -461,6 +497,7 @@ static const struct adreno_gpu_funcs funcs = {
> #endif
> .gpu_state_get = a3xx_gpu_state_get,
> .gpu_state_put = adreno_gpu_state_put,
> + .create_address_space = a3xx_create_address_space,
> },
> };
>
> @@ -520,19 +557,6 @@ struct msm_gpu *a3xx_gpu_init(struct drm_device *dev)
> #endif
> }
>
> - if (!gpu->aspace) {
> - /* TODO we think it is possible to configure the GPU to
> - * restrict access to VRAM carveout. But the required
> - * registers are unknown. For now just bail out and
> - * limp along with just modesetting. If it turns out
> - * to not be possible to restrict access, then we must
> - * implement a cmdstream validator.
> - */
> - DRM_DEV_ERROR(dev->dev, "No memory protection without IOMMU\n");
> - ret = -ENXIO;
> - goto fail;
> - }
> -
> return gpu;
>
> fail:
> diff --git a/drivers/gpu/drm/msm/adreno/a4xx_gpu.c b/drivers/gpu/drm/msm/adreno/a4xx_gpu.c
> index 18f9a8e..08a5729 100644
> --- a/drivers/gpu/drm/msm/adreno/a4xx_gpu.c
> +++ b/drivers/gpu/drm/msm/adreno/a4xx_gpu.c
> @@ -15,6 +15,8 @@
> # include <soc/qcom/ocmem.h>
> #endif
>
> +#include "msm_gem.h"
> +
> #define A4XX_INT0_MASK \
> (A4XX_INT0_RBBM_AHB_ERROR | \
> A4XX_INT0_RBBM_ATB_BUS_OVERFLOW | \
> @@ -530,6 +532,41 @@ static int a4xx_get_timestamp(struct msm_gpu *gpu, uint64_t *value)
> return 0;
> }
>
> +static struct msm_gem_address_space *
> +a4xx_create_address_space(struct msm_gpu *gpu)
> +{
> + struct msm_gem_address_space *aspace;
> + struct iommu_domain *iommu;
> + int ret;
> +
> + iommu = iommu_domain_alloc(&platform_bus_type);
> + if (!iommu) {
> + DRM_DEV_ERROR(gpu->dev->dev,
> + "No memory protection without IOMMU\n");
> + return ERR_PTR(-ENXIO);
> + }
> +
> + iommu->geometry.aperture_start = SZ_16M;
> + iommu->geometry.aperture_end = 0xffffffff;
> +
> + aspace = msm_gem_address_space_create(&gpu->pdev->dev, iommu, "gpu");
> + if (IS_ERR(aspace)) {
> + iommu_domain_free(iommu);
> + DRM_DEV_ERROR(gpu->dev->dev, "failed to init mmu: %ld\n",
> + PTR_ERR(aspace));
> + return aspace;
> + }
> +
> + ret = aspace->mmu->funcs->attach(aspace->mmu, NULL, 0);
> + if (ret) {
> + msm_gem_address_space_put(aspace);
> + return ERR_PTR(ret);
> + }
> +
> + return aspace;
> +}
> +
> +
> static const struct adreno_gpu_funcs funcs = {
> .base = {
> .get_param = adreno_get_param,
> @@ -547,6 +584,7 @@ static const struct adreno_gpu_funcs funcs = {
> #endif
> .gpu_state_get = a4xx_gpu_state_get,
> .gpu_state_put = adreno_gpu_state_put,
> + .create_address_space = a4xx_create_address_space,
> },
> .get_timestamp = a4xx_get_timestamp,
> };
> @@ -600,19 +638,6 @@ struct msm_gpu *a4xx_gpu_init(struct drm_device *dev)
> #endif
> }
>
> - if (!gpu->aspace) {
> - /* TODO we think it is possible to configure the GPU to
> - * restrict access to VRAM carveout. But the required
> - * registers are unknown. For now just bail out and
> - * limp along with just modesetting. If it turns out
> - * to not be possible to restrict access, then we must
> - * implement a cmdstream validator.
> - */
> - DRM_DEV_ERROR(dev->dev, "No memory protection without IOMMU\n");
> - ret = -ENXIO;
> - goto fail;
> - }
> -
> return gpu;
>
> fail:
> diff --git a/drivers/gpu/drm/msm/adreno/a5xx_gpu.c b/drivers/gpu/drm/msm/adreno/a5xx_gpu.c
> index 45662d3..3d6f414 100644
> --- a/drivers/gpu/drm/msm/adreno/a5xx_gpu.c
> +++ b/drivers/gpu/drm/msm/adreno/a5xx_gpu.c
> @@ -1456,6 +1456,38 @@ static unsigned long a5xx_gpu_busy(struct msm_gpu *gpu)
> return (unsigned long)busy_time;
> }
>
> +static struct msm_gem_address_space *
> +a5xx_create_address_space(struct msm_gpu *gpu)
> +{
> + struct msm_gem_address_space *aspace;
> + struct iommu_domain *iommu;
> + int ret;
> +
> + iommu = iommu_domain_alloc(&platform_bus_type);
> + if (!iommu)
> + return NULL;
> +
> + iommu->geometry.aperture_start = 0x100000000ULL;
> + iommu->geometry.aperture_end = 0x1ffffffffULL;
> +
> + aspace = msm_gem_address_space_create(&gpu->pdev->dev, iommu, "gpu");
> + if (IS_ERR(aspace)) {
> + iommu_domain_free(iommu);
> + DRM_DEV_ERROR(gpu->dev->dev, "failed to init mmu: %ld\n",
> + PTR_ERR(aspace));
> + return aspace;
> + }
> +
> + ret = aspace->mmu->funcs->attach(aspace->mmu, NULL, 0);
> + if (ret) {
> + msm_gem_address_space_put(aspace);
> + return ERR_PTR(ret);
> + }
> +
> + msm_mmu_set_fault_handler(aspace->mmu, gpu, a5xx_fault_handler);
> + return aspace;
> +}
> +
> static const struct adreno_gpu_funcs funcs = {
> .base = {
> .get_param = adreno_get_param,
> @@ -1477,6 +1509,7 @@ static const struct adreno_gpu_funcs funcs = {
> .gpu_busy = a5xx_gpu_busy,
> .gpu_state_get = a5xx_gpu_state_get,
> .gpu_state_put = a5xx_gpu_state_put,
> + .create_address_space = a5xx_create_address_space,
> },
> .get_timestamp = a5xx_get_timestamp,
> };
> @@ -1523,7 +1556,6 @@ struct msm_gpu *a5xx_gpu_init(struct drm_device *dev)
>
> adreno_gpu->registers = a5xx_registers;
> adreno_gpu->reg_offsets = a5xx_register_offsets;
> -
> a5xx_gpu->lm_leakage = 0x4E001A;
>
> check_speed_bin(&pdev->dev);
> @@ -1534,9 +1566,6 @@ struct msm_gpu *a5xx_gpu_init(struct drm_device *dev)
> return ERR_PTR(ret);
> }
>
> - if (gpu->aspace)
> - msm_mmu_set_fault_handler(gpu->aspace->mmu, gpu, a5xx_fault_handler);
> -
> /* Set up the preemption specific bits and pieces for each ringbuffer */
> a5xx_preempt_init(gpu);
>
> diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c
> index 1c20d59..f2e0800 100644
> --- a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c
> +++ b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c
> @@ -783,6 +783,38 @@ static unsigned long a6xx_gpu_busy(struct msm_gpu *gpu)
> return (unsigned long)busy_time;
> }
>
> +static struct msm_gem_address_space *
> +a6xx_create_address_space(struct msm_gpu *gpu)
> +{
> + struct msm_gem_address_space *aspace;
> + struct iommu_domain *iommu;
> + int ret;
> +
> + iommu = iommu_domain_alloc(&platform_bus_type);
> + if (!iommu)
> + return NULL;
> +
> + iommu->geometry.aperture_start = 0x100000000ULL;
> + iommu->geometry.aperture_end = 0x1ffffffffULL;
> +
> + aspace = msm_gem_address_space_create(&gpu->pdev->dev, iommu, "gpu");
> + if (IS_ERR(aspace)) {
> + iommu_domain_free(iommu);
> + DRM_DEV_ERROR(gpu->dev->dev, "failed to init mmu: %ld\n",
> + PTR_ERR(aspace));
> + return aspace;
> + }
> +
> + ret = aspace->mmu->funcs->attach(aspace->mmu, NULL, 0);
> + if (ret) {
> + msm_gem_address_space_put(aspace);
> + return ERR_PTR(ret);
> + }
> +
> + msm_mmu_set_fault_handler(aspace->mmu, gpu, a6xx_fault_handler);
> + return aspace;
> +}
> +
> static const struct adreno_gpu_funcs funcs = {
> .base = {
> .get_param = adreno_get_param,
> @@ -803,6 +835,7 @@ static const struct adreno_gpu_funcs funcs = {
> .gpu_set_freq = a6xx_gmu_set_freq,
> .gpu_state_get = a6xx_gpu_state_get,
> .gpu_state_put = a6xx_gpu_state_put,
> + .create_address_space = a6xx_create_address_space,
> },
> .get_timestamp = a6xx_get_timestamp,
> };
> @@ -845,9 +878,5 @@ struct msm_gpu *a6xx_gpu_init(struct drm_device *dev)
> return ERR_PTR(ret);
> }
>
> - if (gpu->aspace)
> - msm_mmu_set_fault_handler(gpu->aspace->mmu, gpu,
> - a6xx_fault_handler);
> -
> return gpu;
> }
> diff --git a/drivers/gpu/drm/msm/adreno/adreno_gpu.c b/drivers/gpu/drm/msm/adreno/adreno_gpu.c
> index 2cfee1a..dc9ea82 100644
> --- a/drivers/gpu/drm/msm/adreno/adreno_gpu.c
> +++ b/drivers/gpu/drm/msm/adreno/adreno_gpu.c
> @@ -765,13 +765,6 @@ int adreno_gpu_init(struct drm_device *drm, struct platform_device *pdev,
> adreno_gpu->rev = config->rev;
>
> adreno_gpu_config.ioname = "kgsl_3d0_reg_memory";
> -
> - adreno_gpu_config.va_start = SZ_16M;
> - adreno_gpu_config.va_end = 0xffffffff;
> - /* maximum range of a2xx mmu */
> - if (adreno_is_a2xx(adreno_gpu))
> - adreno_gpu_config.va_end = SZ_16M + 0xfff * SZ_64K;
> -
> adreno_gpu_config.nr_rings = nr_rings;
>
> adreno_get_pwrlevels(&pdev->dev, gpu);
> diff --git a/drivers/gpu/drm/msm/msm_gem.h b/drivers/gpu/drm/msm/msm_gem.h
> index 5e21d01..777f5fb 100644
> --- a/drivers/gpu/drm/msm/msm_gem.h
> +++ b/drivers/gpu/drm/msm/msm_gem.h
> @@ -21,6 +21,7 @@
> #include <linux/kref.h>
> #include <linux/reservation.h>
> #include "msm_drv.h"
> +#include "msm_mmu.h"
>
> /* Additional internal-use only BO flags: */
> #define MSM_BO_STOLEN 0x10000000 /* try to use stolen/splash memory */
> diff --git a/drivers/gpu/drm/msm/msm_gpu.c b/drivers/gpu/drm/msm/msm_gpu.c
> index 79b71b1..ec48bb3 100644
> --- a/drivers/gpu/drm/msm/msm_gpu.c
> +++ b/drivers/gpu/drm/msm/msm_gpu.c
> @@ -20,7 +20,6 @@
> #include "msm_mmu.h"
> #include "msm_fence.h"
> #include "msm_gpu_trace.h"
> -#include "adreno/adreno_gpu.h"
>
> #include <generated/utsrelease.h>
> #include <linux/string_helpers.h>
> @@ -821,51 +820,6 @@ static int get_clocks(struct platform_device *pdev, struct msm_gpu *gpu)
> return 0;
> }
>
> -static struct msm_gem_address_space *
> -msm_gpu_create_address_space(struct msm_gpu *gpu, struct platform_device *pdev,
> - uint64_t va_start, uint64_t va_end)
> -{
> - struct msm_gem_address_space *aspace;
> - int ret;
> -
> - /*
> - * Setup IOMMU.. eventually we will (I think) do this once per context
> - * and have separate page tables per context. For now, to keep things
> - * simple and to get something working, just use a single address space:
> - */
> - if (!adreno_is_a2xx(to_adreno_gpu(gpu))) {
> - struct iommu_domain *iommu = iommu_domain_alloc(&platform_bus_type);
> - if (!iommu)
> - return NULL;
> -
> - iommu->geometry.aperture_start = va_start;
> - iommu->geometry.aperture_end = va_end;
> -
> - DRM_DEV_INFO(gpu->dev->dev, "%s: using IOMMU\n", gpu->name);
> -
> - aspace = msm_gem_address_space_create(&pdev->dev, iommu, "gpu");
> - if (IS_ERR(aspace))
> - iommu_domain_free(iommu);
> - } else {
> - aspace = msm_gem_address_space_create_a2xx(&pdev->dev, gpu, "gpu",
> - va_start, va_end);
> - }
> -
> - if (IS_ERR(aspace)) {
> - DRM_DEV_ERROR(gpu->dev->dev, "failed to init mmu: %ld\n",
> - PTR_ERR(aspace));
> - return ERR_CAST(aspace);
> - }
> -
> - ret = aspace->mmu->funcs->attach(aspace->mmu, NULL, 0);
> - if (ret) {
> - msm_gem_address_space_put(aspace);
> - return ERR_PTR(ret);
> - }
> -
> - return aspace;
> -}
> -
> int msm_gpu_init(struct drm_device *drm, struct platform_device *pdev,
> struct msm_gpu *gpu, const struct msm_gpu_funcs *funcs,
> const char *name, struct msm_gpu_config *config)
> @@ -938,12 +892,8 @@ int msm_gpu_init(struct drm_device *drm, struct platform_device *pdev,
>
> msm_devfreq_init(gpu);
>
> - gpu->aspace = msm_gpu_create_address_space(gpu, pdev,
> - config->va_start, config->va_end);
> -
> - if (gpu->aspace == NULL)
> - DRM_DEV_INFO(drm->dev, "%s: no IOMMU, fallback to VRAM carveout!\n", name);
> - else if (IS_ERR(gpu->aspace)) {
> + gpu->aspace = gpu->funcs->create_address_space(gpu);
> + if (IS_ERR(gpu->aspace)) {
> ret = PTR_ERR(gpu->aspace);
> goto fail;
> }
> diff --git a/drivers/gpu/drm/msm/msm_gpu.h b/drivers/gpu/drm/msm/msm_gpu.h
> index ca17086..81b9861 100644
> --- a/drivers/gpu/drm/msm/msm_gpu.h
> +++ b/drivers/gpu/drm/msm/msm_gpu.h
> @@ -74,6 +74,8 @@ struct msm_gpu_funcs {
> int (*gpu_state_put)(struct msm_gpu_state *state);
> unsigned long (*gpu_get_freq)(struct msm_gpu *gpu);
> void (*gpu_set_freq)(struct msm_gpu *gpu, unsigned long freq);
> + struct msm_gem_address_space *(*create_address_space)
> + (struct msm_gpu *gpu);
> };
>
> struct msm_gpu {
>
_______________________________________________
Freedreno mailing list
Freedreno@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/freedreno
^ permalink raw reply [flat|nested] 46+ messages in thread* Re: [RFC PATCH v1 09/15] drm/msm/gpu: Move address space setup to the GPU targets
@ 2019-03-01 23:01 ` Jonathan Marek
0 siblings, 0 replies; 46+ messages in thread
From: Jonathan Marek @ 2019-03-01 23:01 UTC (permalink / raw)
To: Jordan Crouse, freedreno
Cc: jean-philippe.brucker, linux-arm-msm, dianders, hoegsberg,
baolu.lu, Bjorn Andersson, Kees Cook, Thomas Zimmermann,
Sean Paul, Sharat Masetty, dri-devel, linux-kernel, Rob Clark,
David Airlie, Mamta Shukla, Daniel Vetter
There is an error in the a2xx part of this patch: 0xfff in adreno_gpu.c
became 0xff in a2xx_gpu.c
On 3/1/19 2:38 PM, Jordan Crouse wrote:
> Move the address space steup code out of the generic msm GPU code to
> to the individual GPU targets. This allows us to do target specific
> setup such as gpummu for a2xx or split pagetables and per-instance
> pagetables for newer a5xx and a6xx targets. All this is at the
> expense of duplicated code in some of the target files but I think
> it pays for itself in improved code flow and flexibility.
>
> Signed-off-by: Jordan Crouse <jcrouse@codeaurora.org>
> ---
>
> drivers/gpu/drm/msm/adreno/a2xx_gpu.c | 37 ++++++++++++++++------
> drivers/gpu/drm/msm/adreno/a3xx_gpu.c | 50 ++++++++++++++++++++++--------
> drivers/gpu/drm/msm/adreno/a4xx_gpu.c | 51 +++++++++++++++++++++++--------
> drivers/gpu/drm/msm/adreno/a5xx_gpu.c | 37 +++++++++++++++++++---
> drivers/gpu/drm/msm/adreno/a6xx_gpu.c | 37 +++++++++++++++++++---
> drivers/gpu/drm/msm/adreno/adreno_gpu.c | 7 -----
> drivers/gpu/drm/msm/msm_gem.h | 1 +
> drivers/gpu/drm/msm/msm_gpu.c | 54 ++-------------------------------
> drivers/gpu/drm/msm/msm_gpu.h | 2 ++
> 9 files changed, 173 insertions(+), 103 deletions(-)
>
> diff --git a/drivers/gpu/drm/msm/adreno/a2xx_gpu.c b/drivers/gpu/drm/msm/adreno/a2xx_gpu.c
> index 1f83bc1..49241d0 100644
> --- a/drivers/gpu/drm/msm/adreno/a2xx_gpu.c
> +++ b/drivers/gpu/drm/msm/adreno/a2xx_gpu.c
> @@ -401,6 +401,30 @@ static struct msm_gpu_state *a2xx_gpu_state_get(struct msm_gpu *gpu)
> return state;
> }
>
> +static struct msm_gem_address_space *
> +a2xx_create_address_space(struct msm_gpu *gpu)
> +{
> + struct msm_gem_address_space *aspace;
> + int ret;
> +
> + aspace = msm_gem_address_space_create_a2xx(&gpu->pdev->dev, gpu,
> + "gpu", SZ_16M, SZ_16M + 0xff * SZ_64K);
> + if (IS_ERR(aspace)) {
> + DRM_DEV_ERROR(gpu->dev->dev,
> + "No memory protection without MMU\n");
> + return ERR_PTR(-ENXIO);
> + }
> +
> + ret = aspace->mmu->funcs->attach(aspace->mmu, NULL, 0);
> + if (ret) {
> + msm_gem_address_space_put(aspace);
> + return ERR_PTR(ret);
> + }
> +
> + return aspace;
> +}
> +
> +
> /* Register offset defines for A2XX - copy of A3XX */
> static const unsigned int a2xx_register_offsets[REG_ADRENO_REGISTER_MAX] = {
> REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_BASE, REG_AXXX_CP_RB_BASE),
> @@ -429,6 +453,7 @@ static const struct adreno_gpu_funcs funcs = {
> #endif
> .gpu_state_get = a2xx_gpu_state_get,
> .gpu_state_put = adreno_gpu_state_put,
> + .create_address_space = a2xx_create_address_space,
> },
> };
>
> @@ -473,16 +498,8 @@ struct msm_gpu *a2xx_gpu_init(struct drm_device *dev)
> adreno_gpu->reg_offsets = a2xx_register_offsets;
>
> ret = adreno_gpu_init(dev, pdev, adreno_gpu, &funcs, 1);
> - if (ret)
> - goto fail;
> -
> - if (!gpu->aspace) {
> - dev_err(dev->dev, "No memory protection without MMU\n");
> - ret = -ENXIO;
> - goto fail;
> - }
> -
> - return gpu;
> + if (!ret)
> + return gpu;
>
> fail:
> if (a2xx_gpu)
> diff --git a/drivers/gpu/drm/msm/adreno/a3xx_gpu.c b/drivers/gpu/drm/msm/adreno/a3xx_gpu.c
> index c3b4bc6..33ab5e8 100644
> --- a/drivers/gpu/drm/msm/adreno/a3xx_gpu.c
> +++ b/drivers/gpu/drm/msm/adreno/a3xx_gpu.c
> @@ -21,6 +21,7 @@
> # include <mach/ocmem.h>
> #endif
>
> +#include "msm_gem.h"
> #include "a3xx_gpu.h"
>
> #define A3XX_INT0_MASK \
> @@ -433,6 +434,41 @@ static struct msm_gpu_state *a3xx_gpu_state_get(struct msm_gpu *gpu)
> return state;
> }
>
> +static struct msm_gem_address_space *
> +a3xx_create_address_space(struct msm_gpu *gpu)
> +{
> + struct msm_gem_address_space *aspace;
> + struct iommu_domain *iommu;
> + int ret;
> +
> + iommu = iommu_domain_alloc(&platform_bus_type);
> + if (!iommu) {
> + DRM_DEV_ERROR(gpu->dev->dev,
> + "No memory protection without IOMMU\n");
> + return ERR_PTR(-ENXIO);
> + }
> +
> + iommu->geometry.aperture_start = SZ_16M;
> + iommu->geometry.aperture_end = 0xffffffff;
> +
> + aspace = msm_gem_address_space_create(&gpu->pdev->dev, iommu, "gpu");
> + if (IS_ERR(aspace)) {
> + iommu_domain_free(iommu);
> + DRM_DEV_ERROR(gpu->dev->dev, "failed to init mmu: %ld\n",
> + PTR_ERR(aspace));
> + return aspace;
> + }
> +
> + ret = aspace->mmu->funcs->attach(aspace->mmu, NULL, 0);
> + if (ret) {
> + msm_gem_address_space_put(aspace);
> + return ERR_PTR(ret);
> + }
> +
> + return aspace;
> +}
> +
> +
> /* Register offset defines for A3XX */
> static const unsigned int a3xx_register_offsets[REG_ADRENO_REGISTER_MAX] = {
> REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_BASE, REG_AXXX_CP_RB_BASE),
> @@ -461,6 +497,7 @@ static const struct adreno_gpu_funcs funcs = {
> #endif
> .gpu_state_get = a3xx_gpu_state_get,
> .gpu_state_put = adreno_gpu_state_put,
> + .create_address_space = a3xx_create_address_space,
> },
> };
>
> @@ -520,19 +557,6 @@ struct msm_gpu *a3xx_gpu_init(struct drm_device *dev)
> #endif
> }
>
> - if (!gpu->aspace) {
> - /* TODO we think it is possible to configure the GPU to
> - * restrict access to VRAM carveout. But the required
> - * registers are unknown. For now just bail out and
> - * limp along with just modesetting. If it turns out
> - * to not be possible to restrict access, then we must
> - * implement a cmdstream validator.
> - */
> - DRM_DEV_ERROR(dev->dev, "No memory protection without IOMMU\n");
> - ret = -ENXIO;
> - goto fail;
> - }
> -
> return gpu;
>
> fail:
> diff --git a/drivers/gpu/drm/msm/adreno/a4xx_gpu.c b/drivers/gpu/drm/msm/adreno/a4xx_gpu.c
> index 18f9a8e..08a5729 100644
> --- a/drivers/gpu/drm/msm/adreno/a4xx_gpu.c
> +++ b/drivers/gpu/drm/msm/adreno/a4xx_gpu.c
> @@ -15,6 +15,8 @@
> # include <soc/qcom/ocmem.h>
> #endif
>
> +#include "msm_gem.h"
> +
> #define A4XX_INT0_MASK \
> (A4XX_INT0_RBBM_AHB_ERROR | \
> A4XX_INT0_RBBM_ATB_BUS_OVERFLOW | \
> @@ -530,6 +532,41 @@ static int a4xx_get_timestamp(struct msm_gpu *gpu, uint64_t *value)
> return 0;
> }
>
> +static struct msm_gem_address_space *
> +a4xx_create_address_space(struct msm_gpu *gpu)
> +{
> + struct msm_gem_address_space *aspace;
> + struct iommu_domain *iommu;
> + int ret;
> +
> + iommu = iommu_domain_alloc(&platform_bus_type);
> + if (!iommu) {
> + DRM_DEV_ERROR(gpu->dev->dev,
> + "No memory protection without IOMMU\n");
> + return ERR_PTR(-ENXIO);
> + }
> +
> + iommu->geometry.aperture_start = SZ_16M;
> + iommu->geometry.aperture_end = 0xffffffff;
> +
> + aspace = msm_gem_address_space_create(&gpu->pdev->dev, iommu, "gpu");
> + if (IS_ERR(aspace)) {
> + iommu_domain_free(iommu);
> + DRM_DEV_ERROR(gpu->dev->dev, "failed to init mmu: %ld\n",
> + PTR_ERR(aspace));
> + return aspace;
> + }
> +
> + ret = aspace->mmu->funcs->attach(aspace->mmu, NULL, 0);
> + if (ret) {
> + msm_gem_address_space_put(aspace);
> + return ERR_PTR(ret);
> + }
> +
> + return aspace;
> +}
> +
> +
> static const struct adreno_gpu_funcs funcs = {
> .base = {
> .get_param = adreno_get_param,
> @@ -547,6 +584,7 @@ static const struct adreno_gpu_funcs funcs = {
> #endif
> .gpu_state_get = a4xx_gpu_state_get,
> .gpu_state_put = adreno_gpu_state_put,
> + .create_address_space = a4xx_create_address_space,
> },
> .get_timestamp = a4xx_get_timestamp,
> };
> @@ -600,19 +638,6 @@ struct msm_gpu *a4xx_gpu_init(struct drm_device *dev)
> #endif
> }
>
> - if (!gpu->aspace) {
> - /* TODO we think it is possible to configure the GPU to
> - * restrict access to VRAM carveout. But the required
> - * registers are unknown. For now just bail out and
> - * limp along with just modesetting. If it turns out
> - * to not be possible to restrict access, then we must
> - * implement a cmdstream validator.
> - */
> - DRM_DEV_ERROR(dev->dev, "No memory protection without IOMMU\n");
> - ret = -ENXIO;
> - goto fail;
> - }
> -
> return gpu;
>
> fail:
> diff --git a/drivers/gpu/drm/msm/adreno/a5xx_gpu.c b/drivers/gpu/drm/msm/adreno/a5xx_gpu.c
> index 45662d3..3d6f414 100644
> --- a/drivers/gpu/drm/msm/adreno/a5xx_gpu.c
> +++ b/drivers/gpu/drm/msm/adreno/a5xx_gpu.c
> @@ -1456,6 +1456,38 @@ static unsigned long a5xx_gpu_busy(struct msm_gpu *gpu)
> return (unsigned long)busy_time;
> }
>
> +static struct msm_gem_address_space *
> +a5xx_create_address_space(struct msm_gpu *gpu)
> +{
> + struct msm_gem_address_space *aspace;
> + struct iommu_domain *iommu;
> + int ret;
> +
> + iommu = iommu_domain_alloc(&platform_bus_type);
> + if (!iommu)
> + return NULL;
> +
> + iommu->geometry.aperture_start = 0x100000000ULL;
> + iommu->geometry.aperture_end = 0x1ffffffffULL;
> +
> + aspace = msm_gem_address_space_create(&gpu->pdev->dev, iommu, "gpu");
> + if (IS_ERR(aspace)) {
> + iommu_domain_free(iommu);
> + DRM_DEV_ERROR(gpu->dev->dev, "failed to init mmu: %ld\n",
> + PTR_ERR(aspace));
> + return aspace;
> + }
> +
> + ret = aspace->mmu->funcs->attach(aspace->mmu, NULL, 0);
> + if (ret) {
> + msm_gem_address_space_put(aspace);
> + return ERR_PTR(ret);
> + }
> +
> + msm_mmu_set_fault_handler(aspace->mmu, gpu, a5xx_fault_handler);
> + return aspace;
> +}
> +
> static const struct adreno_gpu_funcs funcs = {
> .base = {
> .get_param = adreno_get_param,
> @@ -1477,6 +1509,7 @@ static const struct adreno_gpu_funcs funcs = {
> .gpu_busy = a5xx_gpu_busy,
> .gpu_state_get = a5xx_gpu_state_get,
> .gpu_state_put = a5xx_gpu_state_put,
> + .create_address_space = a5xx_create_address_space,
> },
> .get_timestamp = a5xx_get_timestamp,
> };
> @@ -1523,7 +1556,6 @@ struct msm_gpu *a5xx_gpu_init(struct drm_device *dev)
>
> adreno_gpu->registers = a5xx_registers;
> adreno_gpu->reg_offsets = a5xx_register_offsets;
> -
> a5xx_gpu->lm_leakage = 0x4E001A;
>
> check_speed_bin(&pdev->dev);
> @@ -1534,9 +1566,6 @@ struct msm_gpu *a5xx_gpu_init(struct drm_device *dev)
> return ERR_PTR(ret);
> }
>
> - if (gpu->aspace)
> - msm_mmu_set_fault_handler(gpu->aspace->mmu, gpu, a5xx_fault_handler);
> -
> /* Set up the preemption specific bits and pieces for each ringbuffer */
> a5xx_preempt_init(gpu);
>
> diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c
> index 1c20d59..f2e0800 100644
> --- a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c
> +++ b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c
> @@ -783,6 +783,38 @@ static unsigned long a6xx_gpu_busy(struct msm_gpu *gpu)
> return (unsigned long)busy_time;
> }
>
> +static struct msm_gem_address_space *
> +a6xx_create_address_space(struct msm_gpu *gpu)
> +{
> + struct msm_gem_address_space *aspace;
> + struct iommu_domain *iommu;
> + int ret;
> +
> + iommu = iommu_domain_alloc(&platform_bus_type);
> + if (!iommu)
> + return NULL;
> +
> + iommu->geometry.aperture_start = 0x100000000ULL;
> + iommu->geometry.aperture_end = 0x1ffffffffULL;
> +
> + aspace = msm_gem_address_space_create(&gpu->pdev->dev, iommu, "gpu");
> + if (IS_ERR(aspace)) {
> + iommu_domain_free(iommu);
> + DRM_DEV_ERROR(gpu->dev->dev, "failed to init mmu: %ld\n",
> + PTR_ERR(aspace));
> + return aspace;
> + }
> +
> + ret = aspace->mmu->funcs->attach(aspace->mmu, NULL, 0);
> + if (ret) {
> + msm_gem_address_space_put(aspace);
> + return ERR_PTR(ret);
> + }
> +
> + msm_mmu_set_fault_handler(aspace->mmu, gpu, a6xx_fault_handler);
> + return aspace;
> +}
> +
> static const struct adreno_gpu_funcs funcs = {
> .base = {
> .get_param = adreno_get_param,
> @@ -803,6 +835,7 @@ static const struct adreno_gpu_funcs funcs = {
> .gpu_set_freq = a6xx_gmu_set_freq,
> .gpu_state_get = a6xx_gpu_state_get,
> .gpu_state_put = a6xx_gpu_state_put,
> + .create_address_space = a6xx_create_address_space,
> },
> .get_timestamp = a6xx_get_timestamp,
> };
> @@ -845,9 +878,5 @@ struct msm_gpu *a6xx_gpu_init(struct drm_device *dev)
> return ERR_PTR(ret);
> }
>
> - if (gpu->aspace)
> - msm_mmu_set_fault_handler(gpu->aspace->mmu, gpu,
> - a6xx_fault_handler);
> -
> return gpu;
> }
> diff --git a/drivers/gpu/drm/msm/adreno/adreno_gpu.c b/drivers/gpu/drm/msm/adreno/adreno_gpu.c
> index 2cfee1a..dc9ea82 100644
> --- a/drivers/gpu/drm/msm/adreno/adreno_gpu.c
> +++ b/drivers/gpu/drm/msm/adreno/adreno_gpu.c
> @@ -765,13 +765,6 @@ int adreno_gpu_init(struct drm_device *drm, struct platform_device *pdev,
> adreno_gpu->rev = config->rev;
>
> adreno_gpu_config.ioname = "kgsl_3d0_reg_memory";
> -
> - adreno_gpu_config.va_start = SZ_16M;
> - adreno_gpu_config.va_end = 0xffffffff;
> - /* maximum range of a2xx mmu */
> - if (adreno_is_a2xx(adreno_gpu))
> - adreno_gpu_config.va_end = SZ_16M + 0xfff * SZ_64K;
> -
> adreno_gpu_config.nr_rings = nr_rings;
>
> adreno_get_pwrlevels(&pdev->dev, gpu);
> diff --git a/drivers/gpu/drm/msm/msm_gem.h b/drivers/gpu/drm/msm/msm_gem.h
> index 5e21d01..777f5fb 100644
> --- a/drivers/gpu/drm/msm/msm_gem.h
> +++ b/drivers/gpu/drm/msm/msm_gem.h
> @@ -21,6 +21,7 @@
> #include <linux/kref.h>
> #include <linux/reservation.h>
> #include "msm_drv.h"
> +#include "msm_mmu.h"
>
> /* Additional internal-use only BO flags: */
> #define MSM_BO_STOLEN 0x10000000 /* try to use stolen/splash memory */
> diff --git a/drivers/gpu/drm/msm/msm_gpu.c b/drivers/gpu/drm/msm/msm_gpu.c
> index 79b71b1..ec48bb3 100644
> --- a/drivers/gpu/drm/msm/msm_gpu.c
> +++ b/drivers/gpu/drm/msm/msm_gpu.c
> @@ -20,7 +20,6 @@
> #include "msm_mmu.h"
> #include "msm_fence.h"
> #include "msm_gpu_trace.h"
> -#include "adreno/adreno_gpu.h"
>
> #include <generated/utsrelease.h>
> #include <linux/string_helpers.h>
> @@ -821,51 +820,6 @@ static int get_clocks(struct platform_device *pdev, struct msm_gpu *gpu)
> return 0;
> }
>
> -static struct msm_gem_address_space *
> -msm_gpu_create_address_space(struct msm_gpu *gpu, struct platform_device *pdev,
> - uint64_t va_start, uint64_t va_end)
> -{
> - struct msm_gem_address_space *aspace;
> - int ret;
> -
> - /*
> - * Setup IOMMU.. eventually we will (I think) do this once per context
> - * and have separate page tables per context. For now, to keep things
> - * simple and to get something working, just use a single address space:
> - */
> - if (!adreno_is_a2xx(to_adreno_gpu(gpu))) {
> - struct iommu_domain *iommu = iommu_domain_alloc(&platform_bus_type);
> - if (!iommu)
> - return NULL;
> -
> - iommu->geometry.aperture_start = va_start;
> - iommu->geometry.aperture_end = va_end;
> -
> - DRM_DEV_INFO(gpu->dev->dev, "%s: using IOMMU\n", gpu->name);
> -
> - aspace = msm_gem_address_space_create(&pdev->dev, iommu, "gpu");
> - if (IS_ERR(aspace))
> - iommu_domain_free(iommu);
> - } else {
> - aspace = msm_gem_address_space_create_a2xx(&pdev->dev, gpu, "gpu",
> - va_start, va_end);
> - }
> -
> - if (IS_ERR(aspace)) {
> - DRM_DEV_ERROR(gpu->dev->dev, "failed to init mmu: %ld\n",
> - PTR_ERR(aspace));
> - return ERR_CAST(aspace);
> - }
> -
> - ret = aspace->mmu->funcs->attach(aspace->mmu, NULL, 0);
> - if (ret) {
> - msm_gem_address_space_put(aspace);
> - return ERR_PTR(ret);
> - }
> -
> - return aspace;
> -}
> -
> int msm_gpu_init(struct drm_device *drm, struct platform_device *pdev,
> struct msm_gpu *gpu, const struct msm_gpu_funcs *funcs,
> const char *name, struct msm_gpu_config *config)
> @@ -938,12 +892,8 @@ int msm_gpu_init(struct drm_device *drm, struct platform_device *pdev,
>
> msm_devfreq_init(gpu);
>
> - gpu->aspace = msm_gpu_create_address_space(gpu, pdev,
> - config->va_start, config->va_end);
> -
> - if (gpu->aspace == NULL)
> - DRM_DEV_INFO(drm->dev, "%s: no IOMMU, fallback to VRAM carveout!\n", name);
> - else if (IS_ERR(gpu->aspace)) {
> + gpu->aspace = gpu->funcs->create_address_space(gpu);
> + if (IS_ERR(gpu->aspace)) {
> ret = PTR_ERR(gpu->aspace);
> goto fail;
> }
> diff --git a/drivers/gpu/drm/msm/msm_gpu.h b/drivers/gpu/drm/msm/msm_gpu.h
> index ca17086..81b9861 100644
> --- a/drivers/gpu/drm/msm/msm_gpu.h
> +++ b/drivers/gpu/drm/msm/msm_gpu.h
> @@ -74,6 +74,8 @@ struct msm_gpu_funcs {
> int (*gpu_state_put)(struct msm_gpu_state *state);
> unsigned long (*gpu_get_freq)(struct msm_gpu *gpu);
> void (*gpu_set_freq)(struct msm_gpu *gpu, unsigned long freq);
> + struct msm_gem_address_space *(*create_address_space)
> + (struct msm_gpu *gpu);
> };
>
> struct msm_gpu {
>
^ permalink raw reply [flat|nested] 46+ messages in thread
* [RFC PATCH v1 11/15] drm/msm: Add a helper function for a per-instance address space
2019-03-01 19:38 ` Jordan Crouse
@ 2019-03-01 19:38 ` Jordan Crouse
-1 siblings, 0 replies; 46+ messages in thread
From: Jordan Crouse @ 2019-03-01 19:38 UTC (permalink / raw)
To: freedreno-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW
Cc: jean-philippe.brucker-5wv7dgnIgG8,
linux-arm-msm-u79uwXL29TY76Z2rM5mHXA,
linux-kernel-u79uwXL29TY76Z2rM5mHXA,
dri-devel-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW, David Airlie,
Rob Clark, dianders-uWgjrcJnOmJ4cg9Nei1l7Q,
hoegsberg-hpIqsD4AKlfQT0dZR+AlfA, Daniel Vetter, Sean Paul,
baolu.lu-VuQAYsv1563Yd54FQh9/CA
Add a helper function to create a GEM address space attached to
an iommu auxiliary domain for a per-instance pagetable.
Signed-off-by: Jordan Crouse <jcrouse@codeaurora.org>
---
drivers/gpu/drm/msm/msm_drv.h | 4 +++
drivers/gpu/drm/msm/msm_gem_vma.c | 53 +++++++++++++++++++++++----------------
2 files changed, 36 insertions(+), 21 deletions(-)
diff --git a/drivers/gpu/drm/msm/msm_drv.h b/drivers/gpu/drm/msm/msm_drv.h
index 3140e8f..4423be0 100644
--- a/drivers/gpu/drm/msm/msm_drv.h
+++ b/drivers/gpu/drm/msm/msm_drv.h
@@ -258,6 +258,10 @@ struct msm_gem_address_space *
msm_gem_address_space_create_a2xx(struct device *dev, struct msm_gpu *gpu,
const char *name, uint64_t va_start, uint64_t va_end);
+struct msm_gem_address_space *
+msm_gem_address_space_create_instance(struct device *dev, const char *name,
+ u64 va_start, u64 va_end);
+
int msm_register_mmu(struct drm_device *dev, struct msm_mmu *mmu);
void msm_unregister_mmu(struct drm_device *dev, struct msm_mmu *mmu);
diff --git a/drivers/gpu/drm/msm/msm_gem_vma.c b/drivers/gpu/drm/msm/msm_gem_vma.c
index 49c0482..4364bf8 100644
--- a/drivers/gpu/drm/msm/msm_gem_vma.c
+++ b/drivers/gpu/drm/msm/msm_gem_vma.c
@@ -136,14 +136,12 @@ int msm_gem_init_vma(struct msm_gem_address_space *aspace,
return 0;
}
-
-struct msm_gem_address_space *
-msm_gem_address_space_create(struct device *dev, struct iommu_domain *domain,
- const char *name)
+static struct msm_gem_address_space *
+msm_gem_address_space_new(struct msm_mmu *mmu, const char *name,
+ u64 va_start, u64 va_end)
{
struct msm_gem_address_space *aspace;
- u64 size = domain->geometry.aperture_end -
- domain->geometry.aperture_start;
+ u64 size = va_end - va_start;
aspace = kzalloc(sizeof(*aspace), GFP_KERNEL);
if (!aspace)
@@ -151,10 +149,9 @@ msm_gem_address_space_create(struct device *dev, struct iommu_domain *domain,
spin_lock_init(&aspace->lock);
aspace->name = name;
- aspace->mmu = msm_iommu_new(dev, domain);
+ aspace->mmu = mmu;
- drm_mm_init(&aspace->mm, (domain->geometry.aperture_start >> PAGE_SHIFT),
- size >> PAGE_SHIFT);
+ drm_mm_init(&aspace->mm, (va_start >> PAGE_SHIFT), size >> PAGE_SHIFT);
kref_init(&aspace->kref);
@@ -162,24 +159,38 @@ msm_gem_address_space_create(struct device *dev, struct iommu_domain *domain,
}
struct msm_gem_address_space *
+msm_gem_address_space_create(struct device *dev, struct iommu_domain *domain,
+ const char *name)
+{
+ struct msm_mmu *mmu = msm_iommu_new(dev, domain);
+
+ if (IS_ERR(mmu))
+ return ERR_CAST(mmu);
+
+ return msm_gem_address_space_new(mmu, name,
+ domain->geometry.aperture_start, domain->geometry.aperture_end);
+}
+
+struct msm_gem_address_space *
msm_gem_address_space_create_a2xx(struct device *dev, struct msm_gpu *gpu,
const char *name, uint64_t va_start, uint64_t va_end)
{
- struct msm_gem_address_space *aspace;
- u64 size = va_end - va_start;
+ struct msm_mmu *mmu = msm_gpummu_new(dev, gpu);
- aspace = kzalloc(sizeof(*aspace), GFP_KERNEL);
- if (!aspace)
- return ERR_PTR(-ENOMEM);
+ if (IS_ERR(mmu))
+ return ERR_CAST(mmu);
- spin_lock_init(&aspace->lock);
- aspace->name = name;
- aspace->mmu = msm_gpummu_new(dev, gpu);
+ return msm_gem_address_space_new(mmu, name, va_start, va_end);
+}
- drm_mm_init(&aspace->mm, (va_start >> PAGE_SHIFT),
- size >> PAGE_SHIFT);
+struct msm_gem_address_space *
+msm_gem_address_space_create_instance(struct device *dev, const char *name,
+ u64 va_start, u64 va_end)
+{
+ struct msm_mmu *mmu = msm_iommu_new_instance(dev);
- kref_init(&aspace->kref);
+ if (IS_ERR(mmu))
+ return ERR_CAST(mmu);
- return aspace;
+ return msm_gem_address_space_new(mmu, name, va_start, va_end);
}
--
2.7.4
_______________________________________________
Freedreno mailing list
Freedreno@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/freedreno
^ permalink raw reply related [flat|nested] 46+ messages in thread* [RFC PATCH v1 11/15] drm/msm: Add a helper function for a per-instance address space
@ 2019-03-01 19:38 ` Jordan Crouse
0 siblings, 0 replies; 46+ messages in thread
From: Jordan Crouse @ 2019-03-01 19:38 UTC (permalink / raw)
To: freedreno
Cc: jean-philippe.brucker, linux-arm-msm, dianders, hoegsberg,
baolu.lu, Sean Paul, linux-kernel, dri-devel, Rob Clark,
David Airlie, Daniel Vetter
Add a helper function to create a GEM address space attached to
an iommu auxiliary domain for a per-instance pagetable.
Signed-off-by: Jordan Crouse <jcrouse@codeaurora.org>
---
drivers/gpu/drm/msm/msm_drv.h | 4 +++
drivers/gpu/drm/msm/msm_gem_vma.c | 53 +++++++++++++++++++++++----------------
2 files changed, 36 insertions(+), 21 deletions(-)
diff --git a/drivers/gpu/drm/msm/msm_drv.h b/drivers/gpu/drm/msm/msm_drv.h
index 3140e8f..4423be0 100644
--- a/drivers/gpu/drm/msm/msm_drv.h
+++ b/drivers/gpu/drm/msm/msm_drv.h
@@ -258,6 +258,10 @@ struct msm_gem_address_space *
msm_gem_address_space_create_a2xx(struct device *dev, struct msm_gpu *gpu,
const char *name, uint64_t va_start, uint64_t va_end);
+struct msm_gem_address_space *
+msm_gem_address_space_create_instance(struct device *dev, const char *name,
+ u64 va_start, u64 va_end);
+
int msm_register_mmu(struct drm_device *dev, struct msm_mmu *mmu);
void msm_unregister_mmu(struct drm_device *dev, struct msm_mmu *mmu);
diff --git a/drivers/gpu/drm/msm/msm_gem_vma.c b/drivers/gpu/drm/msm/msm_gem_vma.c
index 49c0482..4364bf8 100644
--- a/drivers/gpu/drm/msm/msm_gem_vma.c
+++ b/drivers/gpu/drm/msm/msm_gem_vma.c
@@ -136,14 +136,12 @@ int msm_gem_init_vma(struct msm_gem_address_space *aspace,
return 0;
}
-
-struct msm_gem_address_space *
-msm_gem_address_space_create(struct device *dev, struct iommu_domain *domain,
- const char *name)
+static struct msm_gem_address_space *
+msm_gem_address_space_new(struct msm_mmu *mmu, const char *name,
+ u64 va_start, u64 va_end)
{
struct msm_gem_address_space *aspace;
- u64 size = domain->geometry.aperture_end -
- domain->geometry.aperture_start;
+ u64 size = va_end - va_start;
aspace = kzalloc(sizeof(*aspace), GFP_KERNEL);
if (!aspace)
@@ -151,10 +149,9 @@ msm_gem_address_space_create(struct device *dev, struct iommu_domain *domain,
spin_lock_init(&aspace->lock);
aspace->name = name;
- aspace->mmu = msm_iommu_new(dev, domain);
+ aspace->mmu = mmu;
- drm_mm_init(&aspace->mm, (domain->geometry.aperture_start >> PAGE_SHIFT),
- size >> PAGE_SHIFT);
+ drm_mm_init(&aspace->mm, (va_start >> PAGE_SHIFT), size >> PAGE_SHIFT);
kref_init(&aspace->kref);
@@ -162,24 +159,38 @@ msm_gem_address_space_create(struct device *dev, struct iommu_domain *domain,
}
struct msm_gem_address_space *
+msm_gem_address_space_create(struct device *dev, struct iommu_domain *domain,
+ const char *name)
+{
+ struct msm_mmu *mmu = msm_iommu_new(dev, domain);
+
+ if (IS_ERR(mmu))
+ return ERR_CAST(mmu);
+
+ return msm_gem_address_space_new(mmu, name,
+ domain->geometry.aperture_start, domain->geometry.aperture_end);
+}
+
+struct msm_gem_address_space *
msm_gem_address_space_create_a2xx(struct device *dev, struct msm_gpu *gpu,
const char *name, uint64_t va_start, uint64_t va_end)
{
- struct msm_gem_address_space *aspace;
- u64 size = va_end - va_start;
+ struct msm_mmu *mmu = msm_gpummu_new(dev, gpu);
- aspace = kzalloc(sizeof(*aspace), GFP_KERNEL);
- if (!aspace)
- return ERR_PTR(-ENOMEM);
+ if (IS_ERR(mmu))
+ return ERR_CAST(mmu);
- spin_lock_init(&aspace->lock);
- aspace->name = name;
- aspace->mmu = msm_gpummu_new(dev, gpu);
+ return msm_gem_address_space_new(mmu, name, va_start, va_end);
+}
- drm_mm_init(&aspace->mm, (va_start >> PAGE_SHIFT),
- size >> PAGE_SHIFT);
+struct msm_gem_address_space *
+msm_gem_address_space_create_instance(struct device *dev, const char *name,
+ u64 va_start, u64 va_end)
+{
+ struct msm_mmu *mmu = msm_iommu_new_instance(dev);
- kref_init(&aspace->kref);
+ if (IS_ERR(mmu))
+ return ERR_CAST(mmu);
- return aspace;
+ return msm_gem_address_space_new(mmu, name, va_start, va_end);
}
--
2.7.4
^ permalink raw reply related [flat|nested] 46+ messages in thread