* [PATCH 1/4] drm/amdgpu: further cleanup amdgpu_vm_need_pipeline_sync
@ 2017-05-31 12:38 Christian König
[not found] ` <1496234326-2581-1-git-send-email-deathsimple-ANTagKRnAhcb1SvskN2V4Q@public.gmane.org>
0 siblings, 1 reply; 7+ messages in thread
From: Christian König @ 2017-05-31 12:38 UTC (permalink / raw)
To: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW
From: Christian König <christian.koenig@amd.com>
Remove a bunch of misleading variables and move the compute
VM bug checking into the VM code again.
Also fix the coding style of the "if"s.
Signed-off-by: Christian König <christian.koenig@amd.com>
---
drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c | 32 ------------------------
drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h | 6 -----
drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 42 +++++++++++++++++++++++---------
drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h | 3 +++
4 files changed, 34 insertions(+), 49 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
index 7d95435..31aa51d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
@@ -153,36 +153,6 @@ void amdgpu_ring_undo(struct amdgpu_ring *ring)
}
/**
- * amdgpu_ring_check_compute_vm_bug - check whether this ring has compute vm bug
- *
- * @adev: amdgpu_device pointer
- * @ring: amdgpu_ring structure holding ring information
- */
-static void amdgpu_ring_check_compute_vm_bug(struct amdgpu_device *adev,
- struct amdgpu_ring *ring)
-{
- const struct amdgpu_ip_block *ip_block;
-
- ring->has_compute_vm_bug = false;
-
- if (ring->funcs->type != AMDGPU_RING_TYPE_COMPUTE)
- /* only compute rings */
- return;
-
- ip_block = amdgpu_get_ip_block(adev, AMD_IP_BLOCK_TYPE_GFX);
- if (!ip_block)
- return;
-
- /* Compute ring has a VM bug for GFX version < 7.
- And compute ring has a VM bug for GFX 8 MEC firmware version < 673.*/
- if (ip_block->version->major <= 7) {
- ring->has_compute_vm_bug = true;
- } else if (ip_block->version->major == 8)
- if (adev->gfx.mec_fw_version < 673)
- ring->has_compute_vm_bug = true;
-}
-
-/**
* amdgpu_ring_init - init driver ring struct.
*
* @adev: amdgpu_device pointer
@@ -288,8 +258,6 @@ int amdgpu_ring_init(struct amdgpu_device *adev, struct amdgpu_ring *ring,
DRM_ERROR("Failed to register debugfs file for rings !\n");
}
- amdgpu_ring_check_compute_vm_bug(adev, ring);
-
return 0;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
index 334307e..a9223a8 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
@@ -185,7 +185,6 @@ struct amdgpu_ring {
u64 cond_exe_gpu_addr;
volatile u32 *cond_exe_cpu_addr;
unsigned vm_inv_eng;
- bool has_compute_vm_bug;
#if defined(CONFIG_DEBUG_FS)
struct dentry *ent;
#endif
@@ -208,9 +207,4 @@ static inline void amdgpu_ring_clear_ring(struct amdgpu_ring *ring)
}
-static inline bool amdgpu_ring_has_compute_vm_bug(struct amdgpu_ring *ring)
-{
- return ring->has_compute_vm_bug;
-}
-
#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index d4d05a8..b5e62bd 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -663,25 +663,31 @@ bool amdgpu_vm_need_pipeline_sync(struct amdgpu_ring *ring,
unsigned vmhub = ring->funcs->vmhub;
struct amdgpu_vm_id_manager *id_mgr = &adev->vm_manager.id_mgr[vmhub];
struct amdgpu_vm_id *id;
- bool gds_switch_needed;
- bool vm_flush_needed = job->vm_needs_flush ||
- amdgpu_ring_has_compute_vm_bug(ring);
if (job->vm_id == 0)
return false;
+
+ if (job->vm_needs_flush)
+ return true;
+
id = &id_mgr->ids[job->vm_id];
- gds_switch_needed = ring->funcs->emit_gds_switch && (
- id->gds_base != job->gds_base ||
- id->gds_size != job->gds_size ||
- id->gws_base != job->gws_base ||
- id->gws_size != job->gws_size ||
- id->oa_base != job->oa_base ||
- id->oa_size != job->oa_size);
+ if (ring->funcs->emit_gds_switch &&
+ (id->gds_base != job->gds_base ||
+ id->gds_size != job->gds_size ||
+ id->gws_base != job->gws_base ||
+ id->gws_size != job->gws_size ||
+ id->oa_base != job->oa_base ||
+ id->oa_size != job->oa_size))
+ return true;
if (amdgpu_vm_had_gpu_reset(adev, id))
return true;
- return vm_flush_needed || gds_switch_needed;
+ if (adev->vm_manager.has_compute_vm_bug &&
+ ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE)
+ return true;
+
+ return false;
}
/**
@@ -2371,6 +2377,7 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm)
*/
void amdgpu_vm_manager_init(struct amdgpu_device *adev)
{
+ const struct amdgpu_ip_block *ip_block;
unsigned i, j;
for (i = 0; i < AMDGPU_MAX_VMHUBS; ++i) {
@@ -2398,6 +2405,19 @@ void amdgpu_vm_manager_init(struct amdgpu_device *adev)
atomic64_set(&adev->vm_manager.client_counter, 0);
spin_lock_init(&adev->vm_manager.prt_lock);
atomic_set(&adev->vm_manager.num_prt_users, 0);
+
+
+ /* Compute ring has a VM bug for GFX version < 7.
+ * And compute ring has a VM bug for GFX 8 MEC firmware version < 673.
+ */
+ adev->vm_manager.has_compute_vm_bug = false;
+ ip_block = amdgpu_get_ip_block(adev, AMD_IP_BLOCK_TYPE_GFX);
+ if (ip_block) {
+ if (ip_block->version->major <= 7 ||
+ ((ip_block->version->major == 8) &&
+ (adev->gfx.mec_fw_version < 673)))
+ adev->vm_manager.has_compute_vm_bug = true;
+ }
}
/**
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
index 8309bc7..a49c608 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
@@ -184,6 +184,8 @@ struct amdgpu_vm_manager {
/* partial resident texture handling */
spinlock_t prt_lock;
atomic_t num_prt_users;
+
+ bool has_compute_vm_bug;
};
void amdgpu_vm_manager_init(struct amdgpu_device *adev);
@@ -243,6 +245,7 @@ void amdgpu_vm_bo_rmv(struct amdgpu_device *adev,
struct amdgpu_bo_va *bo_va);
void amdgpu_vm_adjust_size(struct amdgpu_device *adev, uint64_t vm_size);
int amdgpu_vm_ioctl(struct drm_device *dev, void *data, struct drm_file *filp);
+bool amdgpu_vm_ring_has_compute_vm_bug(struct amdgpu_ring *ring);
bool amdgpu_vm_need_pipeline_sync(struct amdgpu_ring *ring,
struct amdgpu_job *job);
--
2.7.4
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx
^ permalink raw reply related [flat|nested] 7+ messages in thread
* [PATCH 2/4] drm/amdgpu: simplify VM shadow handling
[not found] ` <1496234326-2581-1-git-send-email-deathsimple-ANTagKRnAhcb1SvskN2V4Q@public.gmane.org>
@ 2017-05-31 12:38 ` Christian König
[not found] ` <1496234326-2581-2-git-send-email-deathsimple-ANTagKRnAhcb1SvskN2V4Q@public.gmane.org>
2017-05-31 12:38 ` [PATCH 3/4] drm/amdgpu: increase fragmentation size for Vega10 Christian König
2017-05-31 12:38 ` [PATCH 4/4] drm/amdgpu: enable huge page handling in the VM v3 Christian König
2 siblings, 1 reply; 7+ messages in thread
From: Christian König @ 2017-05-31 12:38 UTC (permalink / raw)
To: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW
From: Christian König <christian.koenig@amd.com>
Now that we don't join PTE updates any more we don't need to call
the update function twice for this.
Signed-off-by: Christian König <christian.koenig@amd.com>
---
drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 21 +++++++--------------
1 file changed, 7 insertions(+), 14 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index b5e62bd..b56e24a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -77,8 +77,6 @@ struct amdgpu_pte_update_params {
void (*func)(struct amdgpu_pte_update_params *params, uint64_t pe,
uint64_t addr, unsigned count, uint32_t incr,
uint64_t flags);
- /* indicate update pt or its shadow */
- bool shadow;
};
/* Helper to disable partial resident texture feature from a fence callback */
@@ -1181,20 +1179,20 @@ static int amdgpu_vm_update_ptes(struct amdgpu_pte_update_params *params,
return -EINVAL;
}
- if (params->shadow) {
- if (!pt->shadow)
- return 0;
- pt = pt->shadow;
- }
-
if ((addr & ~mask) == (end & ~mask))
nptes = end - addr;
else
nptes = AMDGPU_VM_PTE_COUNT(adev) - (addr & mask);
+ if (pt->shadow) {
+ pe_start = amdgpu_bo_gpu_offset(pt->shadow);
+ pe_start += (addr & mask) * 8;
+ params->func(params, pe_start, dst, nptes,
+ AMDGPU_GPU_PAGE_SIZE, flags);
+ }
+
pe_start = amdgpu_bo_gpu_offset(pt);
pe_start += (addr & mask) * 8;
-
params->func(params, pe_start, dst, nptes,
AMDGPU_GPU_PAGE_SIZE, flags);
@@ -1392,11 +1390,6 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
if (r)
goto error_free;
- params.shadow = true;
- r = amdgpu_vm_frag_ptes(¶ms, start, last + 1, addr, flags);
- if (r)
- goto error_free;
- params.shadow = false;
r = amdgpu_vm_frag_ptes(¶ms, start, last + 1, addr, flags);
if (r)
goto error_free;
--
2.7.4
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx
^ permalink raw reply related [flat|nested] 7+ messages in thread
* [PATCH 3/4] drm/amdgpu: increase fragmentation size for Vega10
[not found] ` <1496234326-2581-1-git-send-email-deathsimple-ANTagKRnAhcb1SvskN2V4Q@public.gmane.org>
2017-05-31 12:38 ` [PATCH 2/4] drm/amdgpu: simplify VM shadow handling Christian König
@ 2017-05-31 12:38 ` Christian König
[not found] ` <1496234326-2581-3-git-send-email-deathsimple-ANTagKRnAhcb1SvskN2V4Q@public.gmane.org>
2017-05-31 12:38 ` [PATCH 4/4] drm/amdgpu: enable huge page handling in the VM v3 Christian König
2 siblings, 1 reply; 7+ messages in thread
From: Christian König @ 2017-05-31 12:38 UTC (permalink / raw)
To: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW
From: Christian König <christian.koenig@amd.com>
The fragment bits work differently for Vega10 compared to previous generations.
Increase the fragment size to 2MB for now to better handle that.
Signed-off-by: Christian König <christian.koenig@amd.com>
---
drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c | 5 +++--
drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 5 +++--
drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h | 4 +++-
3 files changed, 9 insertions(+), 5 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
index 4dd83a3..36de8a5 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
@@ -551,8 +551,9 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file
dev_info.virtual_address_offset = AMDGPU_VA_RESERVED_SIZE;
dev_info.virtual_address_max = (uint64_t)adev->vm_manager.max_pfn * AMDGPU_GPU_PAGE_SIZE;
dev_info.virtual_address_alignment = max((int)PAGE_SIZE, AMDGPU_GPU_PAGE_SIZE);
- dev_info.pte_fragment_size = (1 << AMDGPU_LOG2_PAGES_PER_FRAG) *
- AMDGPU_GPU_PAGE_SIZE;
+ dev_info.pte_fragment_size =
+ (1 << AMDGPU_LOG2_PAGES_PER_FRAG(adev)) *
+ AMDGPU_GPU_PAGE_SIZE;
dev_info.gart_page_size = AMDGPU_GPU_PAGE_SIZE;
dev_info.cu_active_number = adev->gfx.cu_info.number;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index b56e24a..d07a28c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -1239,8 +1239,9 @@ static int amdgpu_vm_frag_ptes(struct amdgpu_pte_update_params *params,
*/
/* SI and newer are optimized for 64KB */
- uint64_t frag_flags = AMDGPU_PTE_FRAG(AMDGPU_LOG2_PAGES_PER_FRAG);
- uint64_t frag_align = 1 << AMDGPU_LOG2_PAGES_PER_FRAG;
+ unsigned pages_per_frag = AMDGPU_LOG2_PAGES_PER_FRAG(params->adev);
+ uint64_t frag_flags = AMDGPU_PTE_FRAG(pages_per_frag);
+ uint64_t frag_align = 1 << pages_per_frag;
uint64_t frag_start = ALIGN(start, frag_align);
uint64_t frag_end = end & ~(frag_align - 1);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
index a49c608..aabe815 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
@@ -51,7 +51,9 @@ struct amdgpu_bo_list_entry;
#define AMDGPU_VM_PTB_ALIGN_SIZE 32768
/* LOG2 number of continuous pages for the fragment field */
-#define AMDGPU_LOG2_PAGES_PER_FRAG 4
+#define AMDGPU_LOG2_PAGES_PER_FRAG(adev) \
+ ((adev)->asic_type < CHIP_VEGA10 ? 4 : \
+ (adev)->vm_manager.block_size)
#define AMDGPU_PTE_VALID (1ULL << 0)
#define AMDGPU_PTE_SYSTEM (1ULL << 1)
--
2.7.4
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx
^ permalink raw reply related [flat|nested] 7+ messages in thread
* [PATCH 4/4] drm/amdgpu: enable huge page handling in the VM v3
[not found] ` <1496234326-2581-1-git-send-email-deathsimple-ANTagKRnAhcb1SvskN2V4Q@public.gmane.org>
2017-05-31 12:38 ` [PATCH 2/4] drm/amdgpu: simplify VM shadow handling Christian König
2017-05-31 12:38 ` [PATCH 3/4] drm/amdgpu: increase fragmentation size for Vega10 Christian König
@ 2017-05-31 12:38 ` Christian König
[not found] ` <1496234326-2581-4-git-send-email-deathsimple-ANTagKRnAhcb1SvskN2V4Q@public.gmane.org>
2 siblings, 1 reply; 7+ messages in thread
From: Christian König @ 2017-05-31 12:38 UTC (permalink / raw)
To: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW
From: Christian König <christian.koenig@amd.com>
The hardware can use huge pages to map 2MB of address space with only one PDE.
v2: few cleanups and rebased
v3: skip PT updates if we are using the PDE
Signed-off-by: Christian König <christian.koenig@amd.com>
---
drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 103 +++++++++++++++++++++++++++------
drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h | 4 ++
2 files changed, 88 insertions(+), 19 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index d07a28c..4cfef3c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -323,6 +323,7 @@ static int amdgpu_vm_alloc_levels(struct amdgpu_device *adev,
entry->bo = pt;
entry->addr = 0;
+ entry->huge_page = false;
}
if (level < adev->vm_manager.num_level) {
@@ -990,7 +991,8 @@ static int amdgpu_vm_update_level(struct amdgpu_device *adev,
pt = amdgpu_bo_gpu_offset(bo);
pt = amdgpu_gart_get_vm_pde(adev, pt);
- if (parent->entries[pt_idx].addr == pt)
+ if (parent->entries[pt_idx].addr == pt ||
+ parent->entries[pt_idx].huge_page)
continue;
parent->entries[pt_idx].addr = pt;
@@ -1122,29 +1124,83 @@ int amdgpu_vm_update_directories(struct amdgpu_device *adev,
}
/**
- * amdgpu_vm_find_pt - find the page table for an address
+ * amdgpu_vm_find_entry - find the entry for an address
*
* @p: see amdgpu_pte_update_params definition
* @addr: virtual address in question
+ * @entry: resulting entry or NULL
+ * @parent: parent entry
*
- * Find the page table BO for a virtual address, return NULL when none found.
+ * Find the vm_pt entry and it's parent for the given address.
*/
-static struct amdgpu_bo *amdgpu_vm_get_pt(struct amdgpu_pte_update_params *p,
- uint64_t addr)
+void amdgpu_vm_get_entry(struct amdgpu_pte_update_params *p, uint64_t addr,
+ struct amdgpu_vm_pt **entry,
+ struct amdgpu_vm_pt **parent)
{
- struct amdgpu_vm_pt *entry = &p->vm->root;
unsigned idx, level = p->adev->vm_manager.num_level;
- while (entry->entries) {
+ *parent = NULL;
+ *entry = &p->vm->root;
+ while ((*entry)->entries) {
idx = addr >> (p->adev->vm_manager.block_size * level--);
- idx %= amdgpu_bo_size(entry->bo) / 8;
- entry = &entry->entries[idx];
+ idx %= amdgpu_bo_size((*entry)->bo) / 8;
+ *parent = *entry;
+ *entry = &(*entry)->entries[idx];
}
if (level)
- return NULL;
+ *entry = NULL;
+}
+
+/**
+ * amdgpu_vm_handle_huge_pages - handle updating the PD with huge pages
+ *
+ * @p: see amdgpu_pte_update_params definition
+ * @entry: vm_pt entry to check
+ * @parent: parent entry
+ * @nptes: number of PTEs updated with this operation
+ * @dst: destination address where the PTEs should point to
+ * @flags: access flags fro the PTEs
+ *
+ * Check if we can update the PD with a huge page.
+ */
+static void amdgpu_vm_handle_huge_pages(struct amdgpu_pte_update_params *p,
+ struct amdgpu_vm_pt *entry,
+ struct amdgpu_vm_pt *parent,
+ unsigned nptes, uint64_t dst,
+ uint64_t flags)
+{
+ uint64_t pd_addr, pde;
+
+ /* In the case of a mixed PT the PDE must point to it*/
+ if (p->adev->asic_type < CHIP_VEGA10 ||
+ nptes != AMDGPU_VM_PTE_COUNT(p->adev) ||
+ p->func != amdgpu_vm_do_set_ptes ||
+ !(flags & AMDGPU_PTE_VALID)) {
+
+ dst = amdgpu_bo_gpu_offset(entry->bo);
+ dst = amdgpu_gart_get_vm_pde(p->adev, dst);
+ flags = AMDGPU_PTE_VALID;
+ } else {
+ flags |= AMDGPU_PDE_PTE;
+ }
- return entry->bo;
+ if (entry->addr == dst &&
+ entry->huge_page == !!(flags & AMDGPU_PDE_PTE))
+ return;
+
+ entry->addr = dst;
+ entry->huge_page = !!(flags & AMDGPU_PDE_PTE);
+
+ if (parent->bo->shadow) {
+ pd_addr = amdgpu_bo_gpu_offset(parent->bo->shadow);
+ pde = pd_addr + (entry - parent->entries) * 8;
+ amdgpu_vm_do_set_ptes(p, pde, dst, 1, 0, flags);
+ }
+
+ pd_addr = amdgpu_bo_gpu_offset(parent->bo);
+ pde = pd_addr + (entry - parent->entries) * 8;
+ amdgpu_vm_do_set_ptes(p, pde, dst, 1, 0, flags);
}
/**
@@ -1172,18 +1228,25 @@ static int amdgpu_vm_update_ptes(struct amdgpu_pte_update_params *params,
unsigned nptes;
/* walk over the address space and update the page tables */
- for (addr = start; addr < end; addr += nptes) {
- pt = amdgpu_vm_get_pt(params, addr);
- if (!pt) {
- pr_err("PT not found, aborting update_ptes\n");
- return -EINVAL;
- }
+ for (addr = start; addr < end; addr += nptes,
+ dst += nptes * AMDGPU_GPU_PAGE_SIZE) {
+ struct amdgpu_vm_pt *entry, *parent;
+
+ amdgpu_vm_get_entry(params, addr, &entry, &parent);
+ if (!entry)
+ return -ENOENT;
if ((addr & ~mask) == (end & ~mask))
nptes = end - addr;
else
nptes = AMDGPU_VM_PTE_COUNT(adev) - (addr & mask);
+ amdgpu_vm_handle_huge_pages(params, entry, parent,
+ nptes, dst, flags);
+ if (entry->huge_page)
+ continue;
+
+ pt = entry->bo;
if (pt->shadow) {
pe_start = amdgpu_bo_gpu_offset(pt->shadow);
pe_start += (addr & mask) * 8;
@@ -1195,8 +1258,6 @@ static int amdgpu_vm_update_ptes(struct amdgpu_pte_update_params *params,
pe_start += (addr & mask) * 8;
params->func(params, pe_start, dst, nptes,
AMDGPU_GPU_PAGE_SIZE, flags);
-
- dst += nptes * AMDGPU_GPU_PAGE_SIZE;
}
return 0;
@@ -1330,6 +1391,9 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
/* padding, etc. */
ndw = 64;
+ /* one PDE write for each huge page */
+ ndw += ((nptes >> adev->vm_manager.block_size) + 1) * 7;
+
if (src) {
/* only copy commands needed */
ndw += ncmds * 7;
@@ -1409,6 +1473,7 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
error_free:
amdgpu_job_free(job);
+ amdgpu_vm_invalidate_level(&vm->root);
return r;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
index aabe815..79145e6 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
@@ -70,6 +70,9 @@ struct amdgpu_bo_list_entry;
/* TILED for VEGA10, reserved for older ASICs */
#define AMDGPU_PTE_PRT (1ULL << 51)
+/* PDE is handled as PTE for VEGA10 */
+#define AMDGPU_PDE_PTE (1ULL << 54)
+
/* VEGA10 only */
#define AMDGPU_PTE_MTYPE(a) ((uint64_t)a << 57)
#define AMDGPU_PTE_MTYPE_MASK AMDGPU_PTE_MTYPE(3ULL)
@@ -92,6 +95,7 @@ struct amdgpu_bo_list_entry;
struct amdgpu_vm_pt {
struct amdgpu_bo *bo;
uint64_t addr;
+ bool huge_page;
/* array of page tables, one for each directory entry */
struct amdgpu_vm_pt *entries;
--
2.7.4
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx
^ permalink raw reply related [flat|nested] 7+ messages in thread
* Re: [PATCH 2/4] drm/amdgpu: simplify VM shadow handling
[not found] ` <1496234326-2581-2-git-send-email-deathsimple-ANTagKRnAhcb1SvskN2V4Q@public.gmane.org>
@ 2017-06-01 2:19 ` zhoucm1
0 siblings, 0 replies; 7+ messages in thread
From: zhoucm1 @ 2017-06-01 2:19 UTC (permalink / raw)
To: Christian König, amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW
On 2017年05月31日 20:38, Christian König wrote:
> From: Christian König <christian.koenig@amd.com>
>
> Now that we don't join PTE updates any more we don't need to call
> the update function twice for this.
>
> Signed-off-by: Christian König <christian.koenig@amd.com>
Reviewed-by: Chunming Zhou <david1.zhou@amd.com>
> ---
> drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 21 +++++++--------------
> 1 file changed, 7 insertions(+), 14 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> index b5e62bd..b56e24a 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> @@ -77,8 +77,6 @@ struct amdgpu_pte_update_params {
> void (*func)(struct amdgpu_pte_update_params *params, uint64_t pe,
> uint64_t addr, unsigned count, uint32_t incr,
> uint64_t flags);
> - /* indicate update pt or its shadow */
> - bool shadow;
> };
>
> /* Helper to disable partial resident texture feature from a fence callback */
> @@ -1181,20 +1179,20 @@ static int amdgpu_vm_update_ptes(struct amdgpu_pte_update_params *params,
> return -EINVAL;
> }
>
> - if (params->shadow) {
> - if (!pt->shadow)
> - return 0;
> - pt = pt->shadow;
> - }
> -
> if ((addr & ~mask) == (end & ~mask))
> nptes = end - addr;
> else
> nptes = AMDGPU_VM_PTE_COUNT(adev) - (addr & mask);
>
> + if (pt->shadow) {
> + pe_start = amdgpu_bo_gpu_offset(pt->shadow);
> + pe_start += (addr & mask) * 8;
> + params->func(params, pe_start, dst, nptes,
> + AMDGPU_GPU_PAGE_SIZE, flags);
> + }
> +
> pe_start = amdgpu_bo_gpu_offset(pt);
> pe_start += (addr & mask) * 8;
> -
> params->func(params, pe_start, dst, nptes,
> AMDGPU_GPU_PAGE_SIZE, flags);
>
> @@ -1392,11 +1390,6 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
> if (r)
> goto error_free;
>
> - params.shadow = true;
> - r = amdgpu_vm_frag_ptes(¶ms, start, last + 1, addr, flags);
> - if (r)
> - goto error_free;
> - params.shadow = false;
> r = amdgpu_vm_frag_ptes(¶ms, start, last + 1, addr, flags);
> if (r)
> goto error_free;
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx
^ permalink raw reply [flat|nested] 7+ messages in thread
* Re: [PATCH 3/4] drm/amdgpu: increase fragmentation size for Vega10
[not found] ` <1496234326-2581-3-git-send-email-deathsimple-ANTagKRnAhcb1SvskN2V4Q@public.gmane.org>
@ 2017-06-01 3:04 ` zhoucm1
0 siblings, 0 replies; 7+ messages in thread
From: zhoucm1 @ 2017-06-01 3:04 UTC (permalink / raw)
To: Christian König, amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW
On 2017年05月31日 20:38, Christian König wrote:
> From: Christian König <christian.koenig@amd.com>
>
> The fragment bits work differently for Vega10 compared to previous generations.
>
> Increase the fragment size to 2MB for now to better handle that.
I checked the fragment bits in PTE, don't find difference, Could you
point what difference is?
And 2MB fragment seems not a good option, the buffer people allocated
may be often smaller than 2MB, so 64KB seems be more effective.
Regards,
David Zhou
>
> Signed-off-by: Christian König <christian.koenig@amd.com>
> ---
> drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c | 5 +++--
> drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 5 +++--
> drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h | 4 +++-
> 3 files changed, 9 insertions(+), 5 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
> index 4dd83a3..36de8a5 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
> @@ -551,8 +551,9 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file
> dev_info.virtual_address_offset = AMDGPU_VA_RESERVED_SIZE;
> dev_info.virtual_address_max = (uint64_t)adev->vm_manager.max_pfn * AMDGPU_GPU_PAGE_SIZE;
> dev_info.virtual_address_alignment = max((int)PAGE_SIZE, AMDGPU_GPU_PAGE_SIZE);
> - dev_info.pte_fragment_size = (1 << AMDGPU_LOG2_PAGES_PER_FRAG) *
> - AMDGPU_GPU_PAGE_SIZE;
> + dev_info.pte_fragment_size =
> + (1 << AMDGPU_LOG2_PAGES_PER_FRAG(adev)) *
> + AMDGPU_GPU_PAGE_SIZE;
> dev_info.gart_page_size = AMDGPU_GPU_PAGE_SIZE;
>
> dev_info.cu_active_number = adev->gfx.cu_info.number;
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> index b56e24a..d07a28c 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> @@ -1239,8 +1239,9 @@ static int amdgpu_vm_frag_ptes(struct amdgpu_pte_update_params *params,
> */
>
> /* SI and newer are optimized for 64KB */
> - uint64_t frag_flags = AMDGPU_PTE_FRAG(AMDGPU_LOG2_PAGES_PER_FRAG);
> - uint64_t frag_align = 1 << AMDGPU_LOG2_PAGES_PER_FRAG;
> + unsigned pages_per_frag = AMDGPU_LOG2_PAGES_PER_FRAG(params->adev);
> + uint64_t frag_flags = AMDGPU_PTE_FRAG(pages_per_frag);
> + uint64_t frag_align = 1 << pages_per_frag;
>
> uint64_t frag_start = ALIGN(start, frag_align);
> uint64_t frag_end = end & ~(frag_align - 1);
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
> index a49c608..aabe815 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
> @@ -51,7 +51,9 @@ struct amdgpu_bo_list_entry;
> #define AMDGPU_VM_PTB_ALIGN_SIZE 32768
>
> /* LOG2 number of continuous pages for the fragment field */
> -#define AMDGPU_LOG2_PAGES_PER_FRAG 4
> +#define AMDGPU_LOG2_PAGES_PER_FRAG(adev) \
> + ((adev)->asic_type < CHIP_VEGA10 ? 4 : \
> + (adev)->vm_manager.block_size)
>
> #define AMDGPU_PTE_VALID (1ULL << 0)
> #define AMDGPU_PTE_SYSTEM (1ULL << 1)
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx
^ permalink raw reply [flat|nested] 7+ messages in thread
* Re: [PATCH 4/4] drm/amdgpu: enable huge page handling in the VM v3
[not found] ` <1496234326-2581-4-git-send-email-deathsimple-ANTagKRnAhcb1SvskN2V4Q@public.gmane.org>
@ 2017-06-01 3:06 ` zhoucm1
0 siblings, 0 replies; 7+ messages in thread
From: zhoucm1 @ 2017-06-01 3:06 UTC (permalink / raw)
To: Christian König, amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW
On 2017年05月31日 20:38, Christian König wrote:
> From: Christian König <christian.koenig@amd.com>
>
> The hardware can use huge pages to map 2MB of address space with only one PDE.
>
> v2: few cleanups and rebased
> v3: skip PT updates if we are using the PDE
>
> Signed-off-by: Christian König <christian.koenig@amd.com>
Reviewed-by: Chunming Zhou <david1.zhou@amd.com>
> ---
> drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 103 +++++++++++++++++++++++++++------
> drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h | 4 ++
> 2 files changed, 88 insertions(+), 19 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> index d07a28c..4cfef3c 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> @@ -323,6 +323,7 @@ static int amdgpu_vm_alloc_levels(struct amdgpu_device *adev,
>
> entry->bo = pt;
> entry->addr = 0;
> + entry->huge_page = false;
> }
>
> if (level < adev->vm_manager.num_level) {
> @@ -990,7 +991,8 @@ static int amdgpu_vm_update_level(struct amdgpu_device *adev,
>
> pt = amdgpu_bo_gpu_offset(bo);
> pt = amdgpu_gart_get_vm_pde(adev, pt);
> - if (parent->entries[pt_idx].addr == pt)
> + if (parent->entries[pt_idx].addr == pt ||
> + parent->entries[pt_idx].huge_page)
> continue;
>
> parent->entries[pt_idx].addr = pt;
> @@ -1122,29 +1124,83 @@ int amdgpu_vm_update_directories(struct amdgpu_device *adev,
> }
>
> /**
> - * amdgpu_vm_find_pt - find the page table for an address
> + * amdgpu_vm_find_entry - find the entry for an address
> *
> * @p: see amdgpu_pte_update_params definition
> * @addr: virtual address in question
> + * @entry: resulting entry or NULL
> + * @parent: parent entry
> *
> - * Find the page table BO for a virtual address, return NULL when none found.
> + * Find the vm_pt entry and it's parent for the given address.
> */
> -static struct amdgpu_bo *amdgpu_vm_get_pt(struct amdgpu_pte_update_params *p,
> - uint64_t addr)
> +void amdgpu_vm_get_entry(struct amdgpu_pte_update_params *p, uint64_t addr,
> + struct amdgpu_vm_pt **entry,
> + struct amdgpu_vm_pt **parent)
> {
> - struct amdgpu_vm_pt *entry = &p->vm->root;
> unsigned idx, level = p->adev->vm_manager.num_level;
>
> - while (entry->entries) {
> + *parent = NULL;
> + *entry = &p->vm->root;
> + while ((*entry)->entries) {
> idx = addr >> (p->adev->vm_manager.block_size * level--);
> - idx %= amdgpu_bo_size(entry->bo) / 8;
> - entry = &entry->entries[idx];
> + idx %= amdgpu_bo_size((*entry)->bo) / 8;
> + *parent = *entry;
> + *entry = &(*entry)->entries[idx];
> }
>
> if (level)
> - return NULL;
> + *entry = NULL;
> +}
> +
> +/**
> + * amdgpu_vm_handle_huge_pages - handle updating the PD with huge pages
> + *
> + * @p: see amdgpu_pte_update_params definition
> + * @entry: vm_pt entry to check
> + * @parent: parent entry
> + * @nptes: number of PTEs updated with this operation
> + * @dst: destination address where the PTEs should point to
> + * @flags: access flags fro the PTEs
> + *
> + * Check if we can update the PD with a huge page.
> + */
> +static void amdgpu_vm_handle_huge_pages(struct amdgpu_pte_update_params *p,
> + struct amdgpu_vm_pt *entry,
> + struct amdgpu_vm_pt *parent,
> + unsigned nptes, uint64_t dst,
> + uint64_t flags)
> +{
> + uint64_t pd_addr, pde;
> +
> + /* In the case of a mixed PT the PDE must point to it*/
> + if (p->adev->asic_type < CHIP_VEGA10 ||
> + nptes != AMDGPU_VM_PTE_COUNT(p->adev) ||
> + p->func != amdgpu_vm_do_set_ptes ||
> + !(flags & AMDGPU_PTE_VALID)) {
> +
> + dst = amdgpu_bo_gpu_offset(entry->bo);
> + dst = amdgpu_gart_get_vm_pde(p->adev, dst);
> + flags = AMDGPU_PTE_VALID;
> + } else {
> + flags |= AMDGPU_PDE_PTE;
> + }
>
> - return entry->bo;
> + if (entry->addr == dst &&
> + entry->huge_page == !!(flags & AMDGPU_PDE_PTE))
> + return;
> +
> + entry->addr = dst;
> + entry->huge_page = !!(flags & AMDGPU_PDE_PTE);
> +
> + if (parent->bo->shadow) {
> + pd_addr = amdgpu_bo_gpu_offset(parent->bo->shadow);
> + pde = pd_addr + (entry - parent->entries) * 8;
> + amdgpu_vm_do_set_ptes(p, pde, dst, 1, 0, flags);
> + }
> +
> + pd_addr = amdgpu_bo_gpu_offset(parent->bo);
> + pde = pd_addr + (entry - parent->entries) * 8;
> + amdgpu_vm_do_set_ptes(p, pde, dst, 1, 0, flags);
> }
>
> /**
> @@ -1172,18 +1228,25 @@ static int amdgpu_vm_update_ptes(struct amdgpu_pte_update_params *params,
> unsigned nptes;
>
> /* walk over the address space and update the page tables */
> - for (addr = start; addr < end; addr += nptes) {
> - pt = amdgpu_vm_get_pt(params, addr);
> - if (!pt) {
> - pr_err("PT not found, aborting update_ptes\n");
> - return -EINVAL;
> - }
> + for (addr = start; addr < end; addr += nptes,
> + dst += nptes * AMDGPU_GPU_PAGE_SIZE) {
> + struct amdgpu_vm_pt *entry, *parent;
> +
> + amdgpu_vm_get_entry(params, addr, &entry, &parent);
> + if (!entry)
> + return -ENOENT;
>
> if ((addr & ~mask) == (end & ~mask))
> nptes = end - addr;
> else
> nptes = AMDGPU_VM_PTE_COUNT(adev) - (addr & mask);
>
> + amdgpu_vm_handle_huge_pages(params, entry, parent,
> + nptes, dst, flags);
> + if (entry->huge_page)
> + continue;
> +
> + pt = entry->bo;
> if (pt->shadow) {
> pe_start = amdgpu_bo_gpu_offset(pt->shadow);
> pe_start += (addr & mask) * 8;
> @@ -1195,8 +1258,6 @@ static int amdgpu_vm_update_ptes(struct amdgpu_pte_update_params *params,
> pe_start += (addr & mask) * 8;
> params->func(params, pe_start, dst, nptes,
> AMDGPU_GPU_PAGE_SIZE, flags);
> -
> - dst += nptes * AMDGPU_GPU_PAGE_SIZE;
> }
>
> return 0;
> @@ -1330,6 +1391,9 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
> /* padding, etc. */
> ndw = 64;
>
> + /* one PDE write for each huge page */
> + ndw += ((nptes >> adev->vm_manager.block_size) + 1) * 7;
> +
> if (src) {
> /* only copy commands needed */
> ndw += ncmds * 7;
> @@ -1409,6 +1473,7 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
>
> error_free:
> amdgpu_job_free(job);
> + amdgpu_vm_invalidate_level(&vm->root);
> return r;
> }
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
> index aabe815..79145e6 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
> @@ -70,6 +70,9 @@ struct amdgpu_bo_list_entry;
> /* TILED for VEGA10, reserved for older ASICs */
> #define AMDGPU_PTE_PRT (1ULL << 51)
>
> +/* PDE is handled as PTE for VEGA10 */
> +#define AMDGPU_PDE_PTE (1ULL << 54)
> +
> /* VEGA10 only */
> #define AMDGPU_PTE_MTYPE(a) ((uint64_t)a << 57)
> #define AMDGPU_PTE_MTYPE_MASK AMDGPU_PTE_MTYPE(3ULL)
> @@ -92,6 +95,7 @@ struct amdgpu_bo_list_entry;
> struct amdgpu_vm_pt {
> struct amdgpu_bo *bo;
> uint64_t addr;
> + bool huge_page;
>
> /* array of page tables, one for each directory entry */
> struct amdgpu_vm_pt *entries;
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx
^ permalink raw reply [flat|nested] 7+ messages in thread
end of thread, other threads:[~2017-06-01 3:06 UTC | newest]
Thread overview: 7+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2017-05-31 12:38 [PATCH 1/4] drm/amdgpu: further cleanup amdgpu_vm_need_pipeline_sync Christian König
[not found] ` <1496234326-2581-1-git-send-email-deathsimple-ANTagKRnAhcb1SvskN2V4Q@public.gmane.org>
2017-05-31 12:38 ` [PATCH 2/4] drm/amdgpu: simplify VM shadow handling Christian König
[not found] ` <1496234326-2581-2-git-send-email-deathsimple-ANTagKRnAhcb1SvskN2V4Q@public.gmane.org>
2017-06-01 2:19 ` zhoucm1
2017-05-31 12:38 ` [PATCH 3/4] drm/amdgpu: increase fragmentation size for Vega10 Christian König
[not found] ` <1496234326-2581-3-git-send-email-deathsimple-ANTagKRnAhcb1SvskN2V4Q@public.gmane.org>
2017-06-01 3:04 ` zhoucm1
2017-05-31 12:38 ` [PATCH 4/4] drm/amdgpu: enable huge page handling in the VM v3 Christian König
[not found] ` <1496234326-2581-4-git-send-email-deathsimple-ANTagKRnAhcb1SvskN2V4Q@public.gmane.org>
2017-06-01 3:06 ` zhoucm1
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.