From: Jordan Crouse <jcrouse@codeaurora.org>
To: freedreno@lists.freedesktop.org
Cc: jean-philippe.brucker@arm.com, linux-arm-msm@vger.kernel.org,
hoegsberg@google.com, dianders@chromium.org,
Sean Paul <sean@poorly.run>, Wen Yang <wen.yang99@zte.com.cn>,
Thomas Zimmermann <tzimmermann@suse.de>,
Sharat Masetty <smasetty@codeaurora.org>,
dri-devel@lists.freedesktop.org, linux-kernel@vger.kernel.org,
Rob Clark <robdclark@gmail.com>, David Airlie <airlied@linux.ie>,
Mamta Shukla <mamtashukla555@gmail.com>,
Daniel Vetter <daniel@ffwll.ch>
Subject: [PATCH v3 16/16] drm/msm/a5xx: Support per-instance pagetables
Date: Wed, 29 May 2019 14:54:52 -0600 [thread overview]
Message-ID: <1559163292-4792-17-git-send-email-jcrouse@codeaurora.org> (raw)
In-Reply-To: <1559163292-4792-1-git-send-email-jcrouse@codeaurora.org>
Add support for per-instance pagetables for 5XX targets. Create a support
buffer for preemption to hold the SMMU pagetable information for a
preempted ring, enable TTBR1 to support split pagetables and add the
necessary PM4 commands to trigger a pagetable switch at the beginning
of a user command.
Signed-off-by: Jordan Crouse <jcrouse@codeaurora.org>
---
drivers/gpu/drm/msm/adreno/a5xx_gpu.c | 120 +++++++++++++++++++++++++++++-
drivers/gpu/drm/msm/adreno/a5xx_gpu.h | 19 +++++
drivers/gpu/drm/msm/adreno/a5xx_preempt.c | 70 +++++++++++++----
3 files changed, 192 insertions(+), 17 deletions(-)
diff --git a/drivers/gpu/drm/msm/adreno/a5xx_gpu.c b/drivers/gpu/drm/msm/adreno/a5xx_gpu.c
index 2f87c3e..fedd470 100644
--- a/drivers/gpu/drm/msm/adreno/a5xx_gpu.c
+++ b/drivers/gpu/drm/msm/adreno/a5xx_gpu.c
@@ -111,6 +111,59 @@ static void a5xx_submit_in_rb(struct msm_gpu *gpu, struct msm_gem_submit *submit
msm_gpu_retire(gpu);
}
+static void a5xx_set_pagetable(struct msm_gpu *gpu, struct msm_ringbuffer *ring,
+ struct msm_file_private *ctx)
+{
+ u64 ttbr;
+ u32 asid;
+
+ if (!msm_iommu_get_ptinfo(ctx->aspace->mmu, &ttbr, &asid))
+ return;
+
+ ttbr = ttbr | ((u64) asid) << 48;
+
+ /* Turn off protected mode */
+ OUT_PKT7(ring, CP_SET_PROTECTED_MODE, 1);
+ OUT_RING(ring, 0);
+
+ /* Turn on APIV mode to access critical regions */
+ OUT_PKT4(ring, REG_A5XX_CP_CNTL, 1);
+ OUT_RING(ring, 1);
+
+ /* Make sure the ME is synchronized before staring the update */
+ OUT_PKT7(ring, CP_WAIT_FOR_ME, 0);
+
+ /* Execute the table update */
+ OUT_PKT7(ring, CP_SMMU_TABLE_UPDATE, 3);
+ OUT_RING(ring, lower_32_bits(ttbr));
+ OUT_RING(ring, upper_32_bits(ttbr));
+ OUT_RING(ring, 0);
+
+ /*
+ * Write the new TTBR0 to the preemption records - this will be used to
+ * reload the pagetable if the current ring gets preempted out.
+ */
+ OUT_PKT7(ring, CP_MEM_WRITE, 4);
+ OUT_RING(ring, lower_32_bits(rbmemptr(ring, ttbr0)));
+ OUT_RING(ring, upper_32_bits(rbmemptr(ring, ttbr0)));
+ OUT_RING(ring, lower_32_bits(ttbr));
+ OUT_RING(ring, upper_32_bits(ttbr));
+
+ /* Invalidate the draw state so we start off fresh */
+ OUT_PKT7(ring, CP_SET_DRAW_STATE, 3);
+ OUT_RING(ring, 0x40000);
+ OUT_RING(ring, 1);
+ OUT_RING(ring, 0);
+
+ /* Turn off APRIV */
+ OUT_PKT4(ring, REG_A5XX_CP_CNTL, 1);
+ OUT_RING(ring, 0);
+
+ /* Turn off protected mode */
+ OUT_PKT7(ring, CP_SET_PROTECTED_MODE, 1);
+ OUT_RING(ring, 1);
+}
+
static void a5xx_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit,
struct msm_file_private *ctx)
{
@@ -126,6 +179,8 @@ static void a5xx_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit,
return;
}
+ a5xx_set_pagetable(gpu, ring, ctx);
+
OUT_PKT7(ring, CP_PREEMPT_ENABLE_GLOBAL, 1);
OUT_RING(ring, 0x02);
@@ -1349,21 +1404,77 @@ static unsigned long a5xx_gpu_busy(struct msm_gpu *gpu)
return (unsigned long)busy_time;
}
+static struct msm_gem_address_space *a5xx_new_address_space(struct msm_gpu *gpu)
+{
+ struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
+ struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu);
+ struct msm_gem_address_space *aspace;
+ int ret;
+
+ /* Return the default pagetable if per instance tables don't work */
+ if (!a5xx_gpu->per_instance_tables)
+ return gpu->aspace;
+
+ aspace = msm_gem_address_space_create_instance(&gpu->pdev->dev,
+ "gpu", 0x100000000ULL, 0x1ffffffffULL);
+ if (IS_ERR(aspace))
+ return aspace;
+
+ ret = aspace->mmu->funcs->attach(aspace->mmu, NULL, 0);
+ if (ret) {
+ /* -ENODEV means that aux domains aren't supported */
+ if (ret == -ENODEV)
+ return gpu->aspace;
+
+ return ERR_PTR(ret);
+ }
+
+ return aspace;
+}
+
static struct msm_gem_address_space *
a5xx_create_address_space(struct msm_gpu *gpu)
{
+ struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
+ struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu);
+ struct device *dev = &gpu->pdev->dev;
struct msm_gem_address_space *aspace;
struct iommu_domain *iommu;
- int ret;
+ int ret, val = 1;
+
+ a5xx_gpu->per_instance_tables = false;
iommu = iommu_domain_alloc(&platform_bus_type);
if (!iommu)
return ERR_PTR(-ENXIO);
- iommu->geometry.aperture_start = 0x100000000ULL;
- iommu->geometry.aperture_end = 0x1ffffffffULL;
+ /* Try to enable split pagetables */
+ if (iommu_domain_set_attr(iommu, DOMAIN_ATTR_SPLIT_TABLES, &val)) {
+ /*
+ * If split pagetables aren't available we won't be able to do
+ * per-instance pagetables so set up the global va space at our
+ * susual location
+ */
+ iommu->geometry.aperture_start = 0x100000000ULL;
+ iommu->geometry.aperture_end = 0x1ffffffffULL;
+ } else {
+ /*
+ * If split pagetables are available then we might be able to do
+ * per-instance pagetables. Put the default va-space in TTBR1 to
+ * prepare
+ */
+ iommu->geometry.aperture_start = 0xfffffff100000000ULL;
+ iommu->geometry.aperture_end = 0xfffffff1ffffffffULL;
+
+ /*
+ * If both split pagetables and aux domains are supported we can
+ * do per_instance pagetables
+ */
+ a5xx_gpu->per_instance_tables =
+ iommu_dev_has_feature(dev, IOMMU_DEV_FEAT_AUX);
+ }
- aspace = msm_gem_address_space_create(&gpu->pdev->dev, iommu, "gpu");
+ aspace = msm_gem_address_space_create(dev, iommu, "gpu");
if (IS_ERR(aspace)) {
iommu_domain_free(iommu);
DRM_DEV_ERROR(gpu->dev->dev, "failed to init mmu: %ld\n",
@@ -1403,6 +1514,7 @@ static const struct adreno_gpu_funcs funcs = {
.gpu_state_get = a5xx_gpu_state_get,
.gpu_state_put = a5xx_gpu_state_put,
.create_address_space = a5xx_create_address_space,
+ .new_address_space = a5xx_new_address_space,
},
.get_timestamp = a5xx_get_timestamp,
};
diff --git a/drivers/gpu/drm/msm/adreno/a5xx_gpu.h b/drivers/gpu/drm/msm/adreno/a5xx_gpu.h
index 7d71860..82ceb9b 100644
--- a/drivers/gpu/drm/msm/adreno/a5xx_gpu.h
+++ b/drivers/gpu/drm/msm/adreno/a5xx_gpu.h
@@ -45,6 +45,11 @@ struct a5xx_gpu {
atomic_t preempt_state;
struct timer_list preempt_timer;
+ struct a5xx_smmu_info *smmu_info;
+ struct drm_gem_object *smmu_info_bo;
+ uint64_t smmu_info_iova;
+
+ bool per_instance_tables;
};
#define to_a5xx_gpu(x) container_of(x, struct a5xx_gpu, base)
@@ -132,6 +137,20 @@ struct a5xx_preempt_record {
*/
#define A5XX_PREEMPT_COUNTER_SIZE (16 * 4)
+/*
+ * This is a global structure that the preemption code uses to switch in the
+ * pagetable for the preempted process - the code switches in whatever we
+ * after preempting in a new ring.
+ */
+struct a5xx_smmu_info {
+ uint32_t magic;
+ uint32_t _pad4;
+ uint64_t ttbr0;
+ uint32_t asid;
+ uint32_t contextidr;
+};
+
+#define A5XX_SMMU_INFO_MAGIC 0x3618CDA3UL
int a5xx_power_init(struct msm_gpu *gpu);
void a5xx_gpmu_ucode_init(struct msm_gpu *gpu);
diff --git a/drivers/gpu/drm/msm/adreno/a5xx_preempt.c b/drivers/gpu/drm/msm/adreno/a5xx_preempt.c
index 3d62310..1050409 100644
--- a/drivers/gpu/drm/msm/adreno/a5xx_preempt.c
+++ b/drivers/gpu/drm/msm/adreno/a5xx_preempt.c
@@ -12,6 +12,7 @@
*/
#include "msm_gem.h"
+#include "msm_mmu.h"
#include "a5xx_gpu.h"
/*
@@ -145,6 +146,15 @@ void a5xx_preempt_trigger(struct msm_gpu *gpu)
a5xx_gpu->preempt[ring->id]->wptr = get_wptr(ring);
spin_unlock_irqrestore(&ring->lock, flags);
+ /* Do read barrier to make sure we have updated pagetable info */
+ rmb();
+
+ /* Set the SMMU info for the preemption */
+ if (a5xx_gpu->smmu_info) {
+ a5xx_gpu->smmu_info->ttbr0 = ring->memptrs->ttbr0;
+ a5xx_gpu->smmu_info->contextidr = 0;
+ }
+
/* Set the address of the incoming preemption record */
gpu_write64(gpu, REG_A5XX_CP_CONTEXT_SWITCH_RESTORE_ADDR_LO,
REG_A5XX_CP_CONTEXT_SWITCH_RESTORE_ADDR_HI,
@@ -221,9 +231,10 @@ void a5xx_preempt_hw_init(struct msm_gpu *gpu)
a5xx_gpu->preempt[i]->rbase = gpu->rb[i]->iova;
}
- /* Write a 0 to signal that we aren't switching pagetables */
+ /* Tell the CP where to find the smmu_info buffer*/
gpu_write64(gpu, REG_A5XX_CP_CONTEXT_SWITCH_SMMU_INFO_LO,
- REG_A5XX_CP_CONTEXT_SWITCH_SMMU_INFO_HI, 0);
+ REG_A5XX_CP_CONTEXT_SWITCH_SMMU_INFO_HI,
+ a5xx_gpu->smmu_info_iova);
/* Reset the preemption state */
set_preempt_state(a5xx_gpu, PREEMPT_NONE);
@@ -271,6 +282,34 @@ void a5xx_preempt_fini(struct msm_gpu *gpu)
for (i = 0; i < gpu->nr_rings; i++)
msm_gem_kernel_put(a5xx_gpu->preempt_bo[i], gpu->aspace, true);
+
+ msm_gem_kernel_put(a5xx_gpu->smmu_info_bo, gpu->aspace, true);
+}
+
+static int a5xx_smmu_info_init(struct msm_gpu *gpu)
+{
+ struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
+ struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu);
+ struct a5xx_smmu_info *ptr;
+ struct drm_gem_object *bo;
+ u64 iova;
+
+ if (!a5xx_gpu->per_instance_tables)
+ return 0;
+
+ ptr = msm_gem_kernel_new(gpu->dev, sizeof(struct a5xx_smmu_info),
+ MSM_BO_UNCACHED, gpu->aspace, &bo, &iova);
+
+ if (IS_ERR(ptr))
+ return PTR_ERR(ptr);
+
+ ptr->magic = A5XX_SMMU_INFO_MAGIC;
+
+ a5xx_gpu->smmu_info_bo = bo;
+ a5xx_gpu->smmu_info_iova = iova;
+ a5xx_gpu->smmu_info = ptr;
+
+ return 0;
}
void a5xx_preempt_init(struct msm_gpu *gpu)
@@ -284,17 +323,22 @@ void a5xx_preempt_init(struct msm_gpu *gpu)
return;
for (i = 0; i < gpu->nr_rings; i++) {
- if (preempt_init_ring(a5xx_gpu, gpu->rb[i])) {
- /*
- * On any failure our adventure is over. Clean up and
- * set nr_rings to 1 to force preemption off
- */
- a5xx_preempt_fini(gpu);
- gpu->nr_rings = 1;
-
- return;
- }
+ if (preempt_init_ring(a5xx_gpu, gpu->rb[i]))
+ goto fail;
}
- timer_setup(&a5xx_gpu->preempt_timer, a5xx_preempt_timer, 0);
+ if (a5xx_smmu_info_init(gpu))
+ goto fail;
+
+ timer_setup(&a5xx_gpu->preempt_timer, a5xx_preempt_timer,
+ (unsigned long) a5xx_gpu);
+
+ return;
+fail:
+ /*
+ * On any failure our adventure is over. Clean up and
+ * set nr_rings to 1 to force preemption off
+ */
+ a5xx_preempt_fini(gpu);
+ gpu->nr_rings = 1;
}
--
2.7.4
prev parent reply other threads:[~2019-05-29 20:56 UTC|newest]
Thread overview: 38+ messages / expand[flat|nested] mbox.gz Atom feed top
2019-05-29 20:54 [PATCH v3 00/16] drm/msm: Per-instance pagetable support Jordan Crouse
2019-05-29 20:54 ` Jordan Crouse
2019-05-29 20:54 ` Jordan Crouse
2019-05-29 20:54 ` Jordan Crouse
2019-05-29 20:54 ` [PATCH v3 01/16] iommu/arm-smmu: Allow client devices to select direct mapping Jordan Crouse
2019-05-29 20:54 ` Jordan Crouse
2019-05-29 20:54 ` Jordan Crouse
2019-05-29 20:54 ` [PATCH v3 02/16] iommu: Add DOMAIN_ATTR_SPLIT_TABLES Jordan Crouse
2019-05-29 20:54 ` Jordan Crouse
2019-05-29 20:54 ` [PATCH v3 03/16] iommu/io-pgtable-arm: Add support for AARCH64 split pagetables Jordan Crouse
2019-05-29 20:54 ` Jordan Crouse
2019-05-29 20:54 ` Jordan Crouse
2019-05-29 20:54 ` [PATCH v3 04/16] iommu/arm-smmu: Add support for DOMAIN_ATTR_SPLIT_TABLES Jordan Crouse
2019-05-29 20:54 ` Jordan Crouse
2019-05-29 20:54 ` Jordan Crouse
2019-05-29 20:54 ` [PATCH v3 05/16] iommu: Add DOMAIN_ATTR_PTBASE Jordan Crouse
2019-05-29 20:54 ` Jordan Crouse
2019-05-29 20:54 ` [PATCH v3 06/16] iommu/arm-smmu: Add auxiliary domain support for arm-smmuv2 Jordan Crouse
2019-05-29 20:54 ` Jordan Crouse
2019-05-29 20:54 ` Jordan Crouse
2019-05-29 20:54 ` [PATCH v3 07/16] drm/msm/adreno: Enable 64 bit mode by default on a5xx and a6xx targets Jordan Crouse
2019-05-29 20:54 ` Jordan Crouse
2019-05-29 20:54 ` [PATCH v3 08/16] drm/msm: Print all 64 bits of the faulting IOMMU address Jordan Crouse
2019-05-29 20:54 ` Jordan Crouse
2019-05-29 20:54 ` [PATCH v3 09/16] drm/msm: Pass the MMU domain index in struct msm_file_private Jordan Crouse
2019-05-29 20:54 ` Jordan Crouse
2019-05-29 20:54 ` [PATCH v3 10/16] drm/msm/gpu: Move address space setup to the GPU targets Jordan Crouse
2019-05-29 20:54 ` Jordan Crouse
2019-05-29 20:54 ` [PATCH v3 11/16] drm/msm: Add support for IOMMU auxiliary domains Jordan Crouse
2019-05-29 20:54 ` Jordan Crouse
2019-05-29 20:54 ` [PATCH v3 12/16] drm/msm: Add a helper function for a per-instance address space Jordan Crouse
2019-05-29 20:54 ` [PATCH v3 13/16] drm/msm: Add support to create target specific address spaces Jordan Crouse
2019-05-29 20:54 ` Jordan Crouse
2019-05-29 20:54 ` [PATCH v3 14/16] drm/msm/gpu: Add ttbr0 to the memptrs Jordan Crouse
2019-05-29 20:54 ` Jordan Crouse
2019-05-29 20:54 ` [PATCH v3 15/16] drm/msm/a6xx: Support per-instance pagetables Jordan Crouse
2019-05-29 20:54 ` Jordan Crouse
2019-05-29 20:54 ` Jordan Crouse [this message]
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=1559163292-4792-17-git-send-email-jcrouse@codeaurora.org \
--to=jcrouse@codeaurora.org \
--cc=airlied@linux.ie \
--cc=daniel@ffwll.ch \
--cc=dianders@chromium.org \
--cc=dri-devel@lists.freedesktop.org \
--cc=freedreno@lists.freedesktop.org \
--cc=hoegsberg@google.com \
--cc=jean-philippe.brucker@arm.com \
--cc=linux-arm-msm@vger.kernel.org \
--cc=linux-kernel@vger.kernel.org \
--cc=mamtashukla555@gmail.com \
--cc=robdclark@gmail.com \
--cc=sean@poorly.run \
--cc=smasetty@codeaurora.org \
--cc=tzimmermann@suse.de \
--cc=wen.yang99@zte.com.cn \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.