* [PATCH 1/4] drm/amdgpu: further cleanup amdgpu_vm_need_pipeline_sync
@ 2017-05-31 12:38 Christian König
[not found] ` <1496234326-2581-1-git-send-email-deathsimple-ANTagKRnAhcb1SvskN2V4Q@public.gmane.org>
0 siblings, 1 reply; 7+ messages in thread
From: Christian König @ 2017-05-31 12:38 UTC (permalink / raw)
To: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW
From: Christian König <christian.koenig@amd.com>
Remove a bunch of misleading variables and move the compute
VM bug checking into the VM code again.
Also fix the coding style of the "if"s.
Signed-off-by: Christian König <christian.koenig@amd.com>
---
drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c | 32 ------------------------
drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h | 6 -----
drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 42 +++++++++++++++++++++++---------
drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h | 3 +++
4 files changed, 34 insertions(+), 49 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
index 7d95435..31aa51d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
@@ -153,36 +153,6 @@ void amdgpu_ring_undo(struct amdgpu_ring *ring)
}
/**
- * amdgpu_ring_check_compute_vm_bug - check whether this ring has compute vm bug
- *
- * @adev: amdgpu_device pointer
- * @ring: amdgpu_ring structure holding ring information
- */
-static void amdgpu_ring_check_compute_vm_bug(struct amdgpu_device *adev,
- struct amdgpu_ring *ring)
-{
- const struct amdgpu_ip_block *ip_block;
-
- ring->has_compute_vm_bug = false;
-
- if (ring->funcs->type != AMDGPU_RING_TYPE_COMPUTE)
- /* only compute rings */
- return;
-
- ip_block = amdgpu_get_ip_block(adev, AMD_IP_BLOCK_TYPE_GFX);
- if (!ip_block)
- return;
-
- /* Compute ring has a VM bug for GFX version < 7.
- And compute ring has a VM bug for GFX 8 MEC firmware version < 673.*/
- if (ip_block->version->major <= 7) {
- ring->has_compute_vm_bug = true;
- } else if (ip_block->version->major == 8)
- if (adev->gfx.mec_fw_version < 673)
- ring->has_compute_vm_bug = true;
-}
-
-/**
* amdgpu_ring_init - init driver ring struct.
*
* @adev: amdgpu_device pointer
@@ -288,8 +258,6 @@ int amdgpu_ring_init(struct amdgpu_device *adev, struct amdgpu_ring *ring,
DRM_ERROR("Failed to register debugfs file for rings !\n");
}
- amdgpu_ring_check_compute_vm_bug(adev, ring);
-
return 0;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
index 334307e..a9223a8 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
@@ -185,7 +185,6 @@ struct amdgpu_ring {
u64 cond_exe_gpu_addr;
volatile u32 *cond_exe_cpu_addr;
unsigned vm_inv_eng;
- bool has_compute_vm_bug;
#if defined(CONFIG_DEBUG_FS)
struct dentry *ent;
#endif
@@ -208,9 +207,4 @@ static inline void amdgpu_ring_clear_ring(struct amdgpu_ring *ring)
}
-static inline bool amdgpu_ring_has_compute_vm_bug(struct amdgpu_ring *ring)
-{
- return ring->has_compute_vm_bug;
-}
-
#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index d4d05a8..b5e62bd 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -663,25 +663,31 @@ bool amdgpu_vm_need_pipeline_sync(struct amdgpu_ring *ring,
unsigned vmhub = ring->funcs->vmhub;
struct amdgpu_vm_id_manager *id_mgr = &adev->vm_manager.id_mgr[vmhub];
struct amdgpu_vm_id *id;
- bool gds_switch_needed;
- bool vm_flush_needed = job->vm_needs_flush ||
- amdgpu_ring_has_compute_vm_bug(ring);
if (job->vm_id == 0)
return false;
+
+ if (job->vm_needs_flush)
+ return true;
+
id = &id_mgr->ids[job->vm_id];
- gds_switch_needed = ring->funcs->emit_gds_switch && (
- id->gds_base != job->gds_base ||
- id->gds_size != job->gds_size ||
- id->gws_base != job->gws_base ||
- id->gws_size != job->gws_size ||
- id->oa_base != job->oa_base ||
- id->oa_size != job->oa_size);
+ if (ring->funcs->emit_gds_switch &&
+ (id->gds_base != job->gds_base ||
+ id->gds_size != job->gds_size ||
+ id->gws_base != job->gws_base ||
+ id->gws_size != job->gws_size ||
+ id->oa_base != job->oa_base ||
+ id->oa_size != job->oa_size))
+ return true;
if (amdgpu_vm_had_gpu_reset(adev, id))
return true;
- return vm_flush_needed || gds_switch_needed;
+ if (adev->vm_manager.has_compute_vm_bug &&
+ ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE)
+ return true;
+
+ return false;
}
/**
@@ -2371,6 +2377,7 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm)
*/
void amdgpu_vm_manager_init(struct amdgpu_device *adev)
{
+ const struct amdgpu_ip_block *ip_block;
unsigned i, j;
for (i = 0; i < AMDGPU_MAX_VMHUBS; ++i) {
@@ -2398,6 +2405,19 @@ void amdgpu_vm_manager_init(struct amdgpu_device *adev)
atomic64_set(&adev->vm_manager.client_counter, 0);
spin_lock_init(&adev->vm_manager.prt_lock);
atomic_set(&adev->vm_manager.num_prt_users, 0);
+
+
+ /* Compute ring has a VM bug for GFX version < 7.
+ * And compute ring has a VM bug for GFX 8 MEC firmware version < 673.
+ */
+ adev->vm_manager.has_compute_vm_bug = false;
+ ip_block = amdgpu_get_ip_block(adev, AMD_IP_BLOCK_TYPE_GFX);
+ if (ip_block) {
+ if (ip_block->version->major <= 7 ||
+ ((ip_block->version->major == 8) &&
+ (adev->gfx.mec_fw_version < 673)))
+ adev->vm_manager.has_compute_vm_bug = true;
+ }
}
/**
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
index 8309bc7..a49c608 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
@@ -184,6 +184,8 @@ struct amdgpu_vm_manager {
/* partial resident texture handling */
spinlock_t prt_lock;
atomic_t num_prt_users;
+
+ bool has_compute_vm_bug;
};
void amdgpu_vm_manager_init(struct amdgpu_device *adev);
@@ -243,6 +245,7 @@ void amdgpu_vm_bo_rmv(struct amdgpu_device *adev,
struct amdgpu_bo_va *bo_va);
void amdgpu_vm_adjust_size(struct amdgpu_device *adev, uint64_t vm_size);
int amdgpu_vm_ioctl(struct drm_device *dev, void *data, struct drm_file *filp);
+bool amdgpu_vm_ring_has_compute_vm_bug(struct amdgpu_ring *ring);
bool amdgpu_vm_need_pipeline_sync(struct amdgpu_ring *ring,
struct amdgpu_job *job);
--
2.7.4
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx
^ permalink raw reply related [flat|nested] 7+ messages in thread[parent not found: <1496234326-2581-1-git-send-email-deathsimple-ANTagKRnAhcb1SvskN2V4Q@public.gmane.org>]
* [PATCH 2/4] drm/amdgpu: simplify VM shadow handling [not found] ` <1496234326-2581-1-git-send-email-deathsimple-ANTagKRnAhcb1SvskN2V4Q@public.gmane.org> @ 2017-05-31 12:38 ` Christian König [not found] ` <1496234326-2581-2-git-send-email-deathsimple-ANTagKRnAhcb1SvskN2V4Q@public.gmane.org> 2017-05-31 12:38 ` [PATCH 3/4] drm/amdgpu: increase fragmentation size for Vega10 Christian König 2017-05-31 12:38 ` [PATCH 4/4] drm/amdgpu: enable huge page handling in the VM v3 Christian König 2 siblings, 1 reply; 7+ messages in thread From: Christian König @ 2017-05-31 12:38 UTC (permalink / raw) To: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW From: Christian König <christian.koenig@amd.com> Now that we don't join PTE updates any more we don't need to call the update function twice for this. Signed-off-by: Christian König <christian.koenig@amd.com> --- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 21 +++++++-------------- 1 file changed, 7 insertions(+), 14 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index b5e62bd..b56e24a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -77,8 +77,6 @@ struct amdgpu_pte_update_params { void (*func)(struct amdgpu_pte_update_params *params, uint64_t pe, uint64_t addr, unsigned count, uint32_t incr, uint64_t flags); - /* indicate update pt or its shadow */ - bool shadow; }; /* Helper to disable partial resident texture feature from a fence callback */ @@ -1181,20 +1179,20 @@ static int amdgpu_vm_update_ptes(struct amdgpu_pte_update_params *params, return -EINVAL; } - if (params->shadow) { - if (!pt->shadow) - return 0; - pt = pt->shadow; - } - if ((addr & ~mask) == (end & ~mask)) nptes = end - addr; else nptes = AMDGPU_VM_PTE_COUNT(adev) - (addr & mask); + if (pt->shadow) { + pe_start = amdgpu_bo_gpu_offset(pt->shadow); + pe_start += (addr & mask) * 8; + params->func(params, pe_start, dst, nptes, + AMDGPU_GPU_PAGE_SIZE, flags); + } + pe_start = amdgpu_bo_gpu_offset(pt); pe_start += (addr & mask) * 8; - params->func(params, pe_start, dst, nptes, AMDGPU_GPU_PAGE_SIZE, flags); @@ -1392,11 +1390,6 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev, if (r) goto error_free; - params.shadow = true; - r = amdgpu_vm_frag_ptes(¶ms, start, last + 1, addr, flags); - if (r) - goto error_free; - params.shadow = false; r = amdgpu_vm_frag_ptes(¶ms, start, last + 1, addr, flags); if (r) goto error_free; -- 2.7.4 _______________________________________________ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx ^ permalink raw reply related [flat|nested] 7+ messages in thread
[parent not found: <1496234326-2581-2-git-send-email-deathsimple-ANTagKRnAhcb1SvskN2V4Q@public.gmane.org>]
* Re: [PATCH 2/4] drm/amdgpu: simplify VM shadow handling [not found] ` <1496234326-2581-2-git-send-email-deathsimple-ANTagKRnAhcb1SvskN2V4Q@public.gmane.org> @ 2017-06-01 2:19 ` zhoucm1 0 siblings, 0 replies; 7+ messages in thread From: zhoucm1 @ 2017-06-01 2:19 UTC (permalink / raw) To: Christian König, amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW On 2017年05月31日 20:38, Christian König wrote: > From: Christian König <christian.koenig@amd.com> > > Now that we don't join PTE updates any more we don't need to call > the update function twice for this. > > Signed-off-by: Christian König <christian.koenig@amd.com> Reviewed-by: Chunming Zhou <david1.zhou@amd.com> > --- > drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 21 +++++++-------------- > 1 file changed, 7 insertions(+), 14 deletions(-) > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c > index b5e62bd..b56e24a 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c > @@ -77,8 +77,6 @@ struct amdgpu_pte_update_params { > void (*func)(struct amdgpu_pte_update_params *params, uint64_t pe, > uint64_t addr, unsigned count, uint32_t incr, > uint64_t flags); > - /* indicate update pt or its shadow */ > - bool shadow; > }; > > /* Helper to disable partial resident texture feature from a fence callback */ > @@ -1181,20 +1179,20 @@ static int amdgpu_vm_update_ptes(struct amdgpu_pte_update_params *params, > return -EINVAL; > } > > - if (params->shadow) { > - if (!pt->shadow) > - return 0; > - pt = pt->shadow; > - } > - > if ((addr & ~mask) == (end & ~mask)) > nptes = end - addr; > else > nptes = AMDGPU_VM_PTE_COUNT(adev) - (addr & mask); > > + if (pt->shadow) { > + pe_start = amdgpu_bo_gpu_offset(pt->shadow); > + pe_start += (addr & mask) * 8; > + params->func(params, pe_start, dst, nptes, > + AMDGPU_GPU_PAGE_SIZE, flags); > + } > + > pe_start = amdgpu_bo_gpu_offset(pt); > pe_start += (addr & mask) * 8; > - > params->func(params, pe_start, dst, nptes, > AMDGPU_GPU_PAGE_SIZE, flags); > > @@ -1392,11 +1390,6 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev, > if (r) > goto error_free; > > - params.shadow = true; > - r = amdgpu_vm_frag_ptes(¶ms, start, last + 1, addr, flags); > - if (r) > - goto error_free; > - params.shadow = false; > r = amdgpu_vm_frag_ptes(¶ms, start, last + 1, addr, flags); > if (r) > goto error_free; _______________________________________________ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx ^ permalink raw reply [flat|nested] 7+ messages in thread
* [PATCH 3/4] drm/amdgpu: increase fragmentation size for Vega10 [not found] ` <1496234326-2581-1-git-send-email-deathsimple-ANTagKRnAhcb1SvskN2V4Q@public.gmane.org> 2017-05-31 12:38 ` [PATCH 2/4] drm/amdgpu: simplify VM shadow handling Christian König @ 2017-05-31 12:38 ` Christian König [not found] ` <1496234326-2581-3-git-send-email-deathsimple-ANTagKRnAhcb1SvskN2V4Q@public.gmane.org> 2017-05-31 12:38 ` [PATCH 4/4] drm/amdgpu: enable huge page handling in the VM v3 Christian König 2 siblings, 1 reply; 7+ messages in thread From: Christian König @ 2017-05-31 12:38 UTC (permalink / raw) To: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW From: Christian König <christian.koenig@amd.com> The fragment bits work differently for Vega10 compared to previous generations. Increase the fragment size to 2MB for now to better handle that. Signed-off-by: Christian König <christian.koenig@amd.com> --- drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c | 5 +++-- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 5 +++-- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h | 4 +++- 3 files changed, 9 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c index 4dd83a3..36de8a5 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c @@ -551,8 +551,9 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file dev_info.virtual_address_offset = AMDGPU_VA_RESERVED_SIZE; dev_info.virtual_address_max = (uint64_t)adev->vm_manager.max_pfn * AMDGPU_GPU_PAGE_SIZE; dev_info.virtual_address_alignment = max((int)PAGE_SIZE, AMDGPU_GPU_PAGE_SIZE); - dev_info.pte_fragment_size = (1 << AMDGPU_LOG2_PAGES_PER_FRAG) * - AMDGPU_GPU_PAGE_SIZE; + dev_info.pte_fragment_size = + (1 << AMDGPU_LOG2_PAGES_PER_FRAG(adev)) * + AMDGPU_GPU_PAGE_SIZE; dev_info.gart_page_size = AMDGPU_GPU_PAGE_SIZE; dev_info.cu_active_number = adev->gfx.cu_info.number; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index b56e24a..d07a28c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -1239,8 +1239,9 @@ static int amdgpu_vm_frag_ptes(struct amdgpu_pte_update_params *params, */ /* SI and newer are optimized for 64KB */ - uint64_t frag_flags = AMDGPU_PTE_FRAG(AMDGPU_LOG2_PAGES_PER_FRAG); - uint64_t frag_align = 1 << AMDGPU_LOG2_PAGES_PER_FRAG; + unsigned pages_per_frag = AMDGPU_LOG2_PAGES_PER_FRAG(params->adev); + uint64_t frag_flags = AMDGPU_PTE_FRAG(pages_per_frag); + uint64_t frag_align = 1 << pages_per_frag; uint64_t frag_start = ALIGN(start, frag_align); uint64_t frag_end = end & ~(frag_align - 1); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h index a49c608..aabe815 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h @@ -51,7 +51,9 @@ struct amdgpu_bo_list_entry; #define AMDGPU_VM_PTB_ALIGN_SIZE 32768 /* LOG2 number of continuous pages for the fragment field */ -#define AMDGPU_LOG2_PAGES_PER_FRAG 4 +#define AMDGPU_LOG2_PAGES_PER_FRAG(adev) \ + ((adev)->asic_type < CHIP_VEGA10 ? 4 : \ + (adev)->vm_manager.block_size) #define AMDGPU_PTE_VALID (1ULL << 0) #define AMDGPU_PTE_SYSTEM (1ULL << 1) -- 2.7.4 _______________________________________________ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx ^ permalink raw reply related [flat|nested] 7+ messages in thread
[parent not found: <1496234326-2581-3-git-send-email-deathsimple-ANTagKRnAhcb1SvskN2V4Q@public.gmane.org>]
* Re: [PATCH 3/4] drm/amdgpu: increase fragmentation size for Vega10 [not found] ` <1496234326-2581-3-git-send-email-deathsimple-ANTagKRnAhcb1SvskN2V4Q@public.gmane.org> @ 2017-06-01 3:04 ` zhoucm1 0 siblings, 0 replies; 7+ messages in thread From: zhoucm1 @ 2017-06-01 3:04 UTC (permalink / raw) To: Christian König, amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW On 2017年05月31日 20:38, Christian König wrote: > From: Christian König <christian.koenig@amd.com> > > The fragment bits work differently for Vega10 compared to previous generations. > > Increase the fragment size to 2MB for now to better handle that. I checked the fragment bits in PTE, don't find difference, Could you point what difference is? And 2MB fragment seems not a good option, the buffer people allocated may be often smaller than 2MB, so 64KB seems be more effective. Regards, David Zhou > > Signed-off-by: Christian König <christian.koenig@amd.com> > --- > drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c | 5 +++-- > drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 5 +++-- > drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h | 4 +++- > 3 files changed, 9 insertions(+), 5 deletions(-) > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c > index 4dd83a3..36de8a5 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c > @@ -551,8 +551,9 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file > dev_info.virtual_address_offset = AMDGPU_VA_RESERVED_SIZE; > dev_info.virtual_address_max = (uint64_t)adev->vm_manager.max_pfn * AMDGPU_GPU_PAGE_SIZE; > dev_info.virtual_address_alignment = max((int)PAGE_SIZE, AMDGPU_GPU_PAGE_SIZE); > - dev_info.pte_fragment_size = (1 << AMDGPU_LOG2_PAGES_PER_FRAG) * > - AMDGPU_GPU_PAGE_SIZE; > + dev_info.pte_fragment_size = > + (1 << AMDGPU_LOG2_PAGES_PER_FRAG(adev)) * > + AMDGPU_GPU_PAGE_SIZE; > dev_info.gart_page_size = AMDGPU_GPU_PAGE_SIZE; > > dev_info.cu_active_number = adev->gfx.cu_info.number; > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c > index b56e24a..d07a28c 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c > @@ -1239,8 +1239,9 @@ static int amdgpu_vm_frag_ptes(struct amdgpu_pte_update_params *params, > */ > > /* SI and newer are optimized for 64KB */ > - uint64_t frag_flags = AMDGPU_PTE_FRAG(AMDGPU_LOG2_PAGES_PER_FRAG); > - uint64_t frag_align = 1 << AMDGPU_LOG2_PAGES_PER_FRAG; > + unsigned pages_per_frag = AMDGPU_LOG2_PAGES_PER_FRAG(params->adev); > + uint64_t frag_flags = AMDGPU_PTE_FRAG(pages_per_frag); > + uint64_t frag_align = 1 << pages_per_frag; > > uint64_t frag_start = ALIGN(start, frag_align); > uint64_t frag_end = end & ~(frag_align - 1); > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h > index a49c608..aabe815 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h > @@ -51,7 +51,9 @@ struct amdgpu_bo_list_entry; > #define AMDGPU_VM_PTB_ALIGN_SIZE 32768 > > /* LOG2 number of continuous pages for the fragment field */ > -#define AMDGPU_LOG2_PAGES_PER_FRAG 4 > +#define AMDGPU_LOG2_PAGES_PER_FRAG(adev) \ > + ((adev)->asic_type < CHIP_VEGA10 ? 4 : \ > + (adev)->vm_manager.block_size) > > #define AMDGPU_PTE_VALID (1ULL << 0) > #define AMDGPU_PTE_SYSTEM (1ULL << 1) _______________________________________________ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx ^ permalink raw reply [flat|nested] 7+ messages in thread
* [PATCH 4/4] drm/amdgpu: enable huge page handling in the VM v3 [not found] ` <1496234326-2581-1-git-send-email-deathsimple-ANTagKRnAhcb1SvskN2V4Q@public.gmane.org> 2017-05-31 12:38 ` [PATCH 2/4] drm/amdgpu: simplify VM shadow handling Christian König 2017-05-31 12:38 ` [PATCH 3/4] drm/amdgpu: increase fragmentation size for Vega10 Christian König @ 2017-05-31 12:38 ` Christian König [not found] ` <1496234326-2581-4-git-send-email-deathsimple-ANTagKRnAhcb1SvskN2V4Q@public.gmane.org> 2 siblings, 1 reply; 7+ messages in thread From: Christian König @ 2017-05-31 12:38 UTC (permalink / raw) To: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW From: Christian König <christian.koenig@amd.com> The hardware can use huge pages to map 2MB of address space with only one PDE. v2: few cleanups and rebased v3: skip PT updates if we are using the PDE Signed-off-by: Christian König <christian.koenig@amd.com> --- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 103 +++++++++++++++++++++++++++------ drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h | 4 ++ 2 files changed, 88 insertions(+), 19 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index d07a28c..4cfef3c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -323,6 +323,7 @@ static int amdgpu_vm_alloc_levels(struct amdgpu_device *adev, entry->bo = pt; entry->addr = 0; + entry->huge_page = false; } if (level < adev->vm_manager.num_level) { @@ -990,7 +991,8 @@ static int amdgpu_vm_update_level(struct amdgpu_device *adev, pt = amdgpu_bo_gpu_offset(bo); pt = amdgpu_gart_get_vm_pde(adev, pt); - if (parent->entries[pt_idx].addr == pt) + if (parent->entries[pt_idx].addr == pt || + parent->entries[pt_idx].huge_page) continue; parent->entries[pt_idx].addr = pt; @@ -1122,29 +1124,83 @@ int amdgpu_vm_update_directories(struct amdgpu_device *adev, } /** - * amdgpu_vm_find_pt - find the page table for an address + * amdgpu_vm_find_entry - find the entry for an address * * @p: see amdgpu_pte_update_params definition * @addr: virtual address in question + * @entry: resulting entry or NULL + * @parent: parent entry * - * Find the page table BO for a virtual address, return NULL when none found. + * Find the vm_pt entry and it's parent for the given address. */ -static struct amdgpu_bo *amdgpu_vm_get_pt(struct amdgpu_pte_update_params *p, - uint64_t addr) +void amdgpu_vm_get_entry(struct amdgpu_pte_update_params *p, uint64_t addr, + struct amdgpu_vm_pt **entry, + struct amdgpu_vm_pt **parent) { - struct amdgpu_vm_pt *entry = &p->vm->root; unsigned idx, level = p->adev->vm_manager.num_level; - while (entry->entries) { + *parent = NULL; + *entry = &p->vm->root; + while ((*entry)->entries) { idx = addr >> (p->adev->vm_manager.block_size * level--); - idx %= amdgpu_bo_size(entry->bo) / 8; - entry = &entry->entries[idx]; + idx %= amdgpu_bo_size((*entry)->bo) / 8; + *parent = *entry; + *entry = &(*entry)->entries[idx]; } if (level) - return NULL; + *entry = NULL; +} + +/** + * amdgpu_vm_handle_huge_pages - handle updating the PD with huge pages + * + * @p: see amdgpu_pte_update_params definition + * @entry: vm_pt entry to check + * @parent: parent entry + * @nptes: number of PTEs updated with this operation + * @dst: destination address where the PTEs should point to + * @flags: access flags fro the PTEs + * + * Check if we can update the PD with a huge page. + */ +static void amdgpu_vm_handle_huge_pages(struct amdgpu_pte_update_params *p, + struct amdgpu_vm_pt *entry, + struct amdgpu_vm_pt *parent, + unsigned nptes, uint64_t dst, + uint64_t flags) +{ + uint64_t pd_addr, pde; + + /* In the case of a mixed PT the PDE must point to it*/ + if (p->adev->asic_type < CHIP_VEGA10 || + nptes != AMDGPU_VM_PTE_COUNT(p->adev) || + p->func != amdgpu_vm_do_set_ptes || + !(flags & AMDGPU_PTE_VALID)) { + + dst = amdgpu_bo_gpu_offset(entry->bo); + dst = amdgpu_gart_get_vm_pde(p->adev, dst); + flags = AMDGPU_PTE_VALID; + } else { + flags |= AMDGPU_PDE_PTE; + } - return entry->bo; + if (entry->addr == dst && + entry->huge_page == !!(flags & AMDGPU_PDE_PTE)) + return; + + entry->addr = dst; + entry->huge_page = !!(flags & AMDGPU_PDE_PTE); + + if (parent->bo->shadow) { + pd_addr = amdgpu_bo_gpu_offset(parent->bo->shadow); + pde = pd_addr + (entry - parent->entries) * 8; + amdgpu_vm_do_set_ptes(p, pde, dst, 1, 0, flags); + } + + pd_addr = amdgpu_bo_gpu_offset(parent->bo); + pde = pd_addr + (entry - parent->entries) * 8; + amdgpu_vm_do_set_ptes(p, pde, dst, 1, 0, flags); } /** @@ -1172,18 +1228,25 @@ static int amdgpu_vm_update_ptes(struct amdgpu_pte_update_params *params, unsigned nptes; /* walk over the address space and update the page tables */ - for (addr = start; addr < end; addr += nptes) { - pt = amdgpu_vm_get_pt(params, addr); - if (!pt) { - pr_err("PT not found, aborting update_ptes\n"); - return -EINVAL; - } + for (addr = start; addr < end; addr += nptes, + dst += nptes * AMDGPU_GPU_PAGE_SIZE) { + struct amdgpu_vm_pt *entry, *parent; + + amdgpu_vm_get_entry(params, addr, &entry, &parent); + if (!entry) + return -ENOENT; if ((addr & ~mask) == (end & ~mask)) nptes = end - addr; else nptes = AMDGPU_VM_PTE_COUNT(adev) - (addr & mask); + amdgpu_vm_handle_huge_pages(params, entry, parent, + nptes, dst, flags); + if (entry->huge_page) + continue; + + pt = entry->bo; if (pt->shadow) { pe_start = amdgpu_bo_gpu_offset(pt->shadow); pe_start += (addr & mask) * 8; @@ -1195,8 +1258,6 @@ static int amdgpu_vm_update_ptes(struct amdgpu_pte_update_params *params, pe_start += (addr & mask) * 8; params->func(params, pe_start, dst, nptes, AMDGPU_GPU_PAGE_SIZE, flags); - - dst += nptes * AMDGPU_GPU_PAGE_SIZE; } return 0; @@ -1330,6 +1391,9 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev, /* padding, etc. */ ndw = 64; + /* one PDE write for each huge page */ + ndw += ((nptes >> adev->vm_manager.block_size) + 1) * 7; + if (src) { /* only copy commands needed */ ndw += ncmds * 7; @@ -1409,6 +1473,7 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev, error_free: amdgpu_job_free(job); + amdgpu_vm_invalidate_level(&vm->root); return r; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h index aabe815..79145e6 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h @@ -70,6 +70,9 @@ struct amdgpu_bo_list_entry; /* TILED for VEGA10, reserved for older ASICs */ #define AMDGPU_PTE_PRT (1ULL << 51) +/* PDE is handled as PTE for VEGA10 */ +#define AMDGPU_PDE_PTE (1ULL << 54) + /* VEGA10 only */ #define AMDGPU_PTE_MTYPE(a) ((uint64_t)a << 57) #define AMDGPU_PTE_MTYPE_MASK AMDGPU_PTE_MTYPE(3ULL) @@ -92,6 +95,7 @@ struct amdgpu_bo_list_entry; struct amdgpu_vm_pt { struct amdgpu_bo *bo; uint64_t addr; + bool huge_page; /* array of page tables, one for each directory entry */ struct amdgpu_vm_pt *entries; -- 2.7.4 _______________________________________________ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx ^ permalink raw reply related [flat|nested] 7+ messages in thread
[parent not found: <1496234326-2581-4-git-send-email-deathsimple-ANTagKRnAhcb1SvskN2V4Q@public.gmane.org>]
* Re: [PATCH 4/4] drm/amdgpu: enable huge page handling in the VM v3 [not found] ` <1496234326-2581-4-git-send-email-deathsimple-ANTagKRnAhcb1SvskN2V4Q@public.gmane.org> @ 2017-06-01 3:06 ` zhoucm1 0 siblings, 0 replies; 7+ messages in thread From: zhoucm1 @ 2017-06-01 3:06 UTC (permalink / raw) To: Christian König, amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW On 2017年05月31日 20:38, Christian König wrote: > From: Christian König <christian.koenig@amd.com> > > The hardware can use huge pages to map 2MB of address space with only one PDE. > > v2: few cleanups and rebased > v3: skip PT updates if we are using the PDE > > Signed-off-by: Christian König <christian.koenig@amd.com> Reviewed-by: Chunming Zhou <david1.zhou@amd.com> > --- > drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 103 +++++++++++++++++++++++++++------ > drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h | 4 ++ > 2 files changed, 88 insertions(+), 19 deletions(-) > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c > index d07a28c..4cfef3c 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c > @@ -323,6 +323,7 @@ static int amdgpu_vm_alloc_levels(struct amdgpu_device *adev, > > entry->bo = pt; > entry->addr = 0; > + entry->huge_page = false; > } > > if (level < adev->vm_manager.num_level) { > @@ -990,7 +991,8 @@ static int amdgpu_vm_update_level(struct amdgpu_device *adev, > > pt = amdgpu_bo_gpu_offset(bo); > pt = amdgpu_gart_get_vm_pde(adev, pt); > - if (parent->entries[pt_idx].addr == pt) > + if (parent->entries[pt_idx].addr == pt || > + parent->entries[pt_idx].huge_page) > continue; > > parent->entries[pt_idx].addr = pt; > @@ -1122,29 +1124,83 @@ int amdgpu_vm_update_directories(struct amdgpu_device *adev, > } > > /** > - * amdgpu_vm_find_pt - find the page table for an address > + * amdgpu_vm_find_entry - find the entry for an address > * > * @p: see amdgpu_pte_update_params definition > * @addr: virtual address in question > + * @entry: resulting entry or NULL > + * @parent: parent entry > * > - * Find the page table BO for a virtual address, return NULL when none found. > + * Find the vm_pt entry and it's parent for the given address. > */ > -static struct amdgpu_bo *amdgpu_vm_get_pt(struct amdgpu_pte_update_params *p, > - uint64_t addr) > +void amdgpu_vm_get_entry(struct amdgpu_pte_update_params *p, uint64_t addr, > + struct amdgpu_vm_pt **entry, > + struct amdgpu_vm_pt **parent) > { > - struct amdgpu_vm_pt *entry = &p->vm->root; > unsigned idx, level = p->adev->vm_manager.num_level; > > - while (entry->entries) { > + *parent = NULL; > + *entry = &p->vm->root; > + while ((*entry)->entries) { > idx = addr >> (p->adev->vm_manager.block_size * level--); > - idx %= amdgpu_bo_size(entry->bo) / 8; > - entry = &entry->entries[idx]; > + idx %= amdgpu_bo_size((*entry)->bo) / 8; > + *parent = *entry; > + *entry = &(*entry)->entries[idx]; > } > > if (level) > - return NULL; > + *entry = NULL; > +} > + > +/** > + * amdgpu_vm_handle_huge_pages - handle updating the PD with huge pages > + * > + * @p: see amdgpu_pte_update_params definition > + * @entry: vm_pt entry to check > + * @parent: parent entry > + * @nptes: number of PTEs updated with this operation > + * @dst: destination address where the PTEs should point to > + * @flags: access flags fro the PTEs > + * > + * Check if we can update the PD with a huge page. > + */ > +static void amdgpu_vm_handle_huge_pages(struct amdgpu_pte_update_params *p, > + struct amdgpu_vm_pt *entry, > + struct amdgpu_vm_pt *parent, > + unsigned nptes, uint64_t dst, > + uint64_t flags) > +{ > + uint64_t pd_addr, pde; > + > + /* In the case of a mixed PT the PDE must point to it*/ > + if (p->adev->asic_type < CHIP_VEGA10 || > + nptes != AMDGPU_VM_PTE_COUNT(p->adev) || > + p->func != amdgpu_vm_do_set_ptes || > + !(flags & AMDGPU_PTE_VALID)) { > + > + dst = amdgpu_bo_gpu_offset(entry->bo); > + dst = amdgpu_gart_get_vm_pde(p->adev, dst); > + flags = AMDGPU_PTE_VALID; > + } else { > + flags |= AMDGPU_PDE_PTE; > + } > > - return entry->bo; > + if (entry->addr == dst && > + entry->huge_page == !!(flags & AMDGPU_PDE_PTE)) > + return; > + > + entry->addr = dst; > + entry->huge_page = !!(flags & AMDGPU_PDE_PTE); > + > + if (parent->bo->shadow) { > + pd_addr = amdgpu_bo_gpu_offset(parent->bo->shadow); > + pde = pd_addr + (entry - parent->entries) * 8; > + amdgpu_vm_do_set_ptes(p, pde, dst, 1, 0, flags); > + } > + > + pd_addr = amdgpu_bo_gpu_offset(parent->bo); > + pde = pd_addr + (entry - parent->entries) * 8; > + amdgpu_vm_do_set_ptes(p, pde, dst, 1, 0, flags); > } > > /** > @@ -1172,18 +1228,25 @@ static int amdgpu_vm_update_ptes(struct amdgpu_pte_update_params *params, > unsigned nptes; > > /* walk over the address space and update the page tables */ > - for (addr = start; addr < end; addr += nptes) { > - pt = amdgpu_vm_get_pt(params, addr); > - if (!pt) { > - pr_err("PT not found, aborting update_ptes\n"); > - return -EINVAL; > - } > + for (addr = start; addr < end; addr += nptes, > + dst += nptes * AMDGPU_GPU_PAGE_SIZE) { > + struct amdgpu_vm_pt *entry, *parent; > + > + amdgpu_vm_get_entry(params, addr, &entry, &parent); > + if (!entry) > + return -ENOENT; > > if ((addr & ~mask) == (end & ~mask)) > nptes = end - addr; > else > nptes = AMDGPU_VM_PTE_COUNT(adev) - (addr & mask); > > + amdgpu_vm_handle_huge_pages(params, entry, parent, > + nptes, dst, flags); > + if (entry->huge_page) > + continue; > + > + pt = entry->bo; > if (pt->shadow) { > pe_start = amdgpu_bo_gpu_offset(pt->shadow); > pe_start += (addr & mask) * 8; > @@ -1195,8 +1258,6 @@ static int amdgpu_vm_update_ptes(struct amdgpu_pte_update_params *params, > pe_start += (addr & mask) * 8; > params->func(params, pe_start, dst, nptes, > AMDGPU_GPU_PAGE_SIZE, flags); > - > - dst += nptes * AMDGPU_GPU_PAGE_SIZE; > } > > return 0; > @@ -1330,6 +1391,9 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev, > /* padding, etc. */ > ndw = 64; > > + /* one PDE write for each huge page */ > + ndw += ((nptes >> adev->vm_manager.block_size) + 1) * 7; > + > if (src) { > /* only copy commands needed */ > ndw += ncmds * 7; > @@ -1409,6 +1473,7 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev, > > error_free: > amdgpu_job_free(job); > + amdgpu_vm_invalidate_level(&vm->root); > return r; > } > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h > index aabe815..79145e6 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h > @@ -70,6 +70,9 @@ struct amdgpu_bo_list_entry; > /* TILED for VEGA10, reserved for older ASICs */ > #define AMDGPU_PTE_PRT (1ULL << 51) > > +/* PDE is handled as PTE for VEGA10 */ > +#define AMDGPU_PDE_PTE (1ULL << 54) > + > /* VEGA10 only */ > #define AMDGPU_PTE_MTYPE(a) ((uint64_t)a << 57) > #define AMDGPU_PTE_MTYPE_MASK AMDGPU_PTE_MTYPE(3ULL) > @@ -92,6 +95,7 @@ struct amdgpu_bo_list_entry; > struct amdgpu_vm_pt { > struct amdgpu_bo *bo; > uint64_t addr; > + bool huge_page; > > /* array of page tables, one for each directory entry */ > struct amdgpu_vm_pt *entries; _______________________________________________ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx ^ permalink raw reply [flat|nested] 7+ messages in thread
end of thread, other threads:[~2017-06-01 3:06 UTC | newest]
Thread overview: 7+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2017-05-31 12:38 [PATCH 1/4] drm/amdgpu: further cleanup amdgpu_vm_need_pipeline_sync Christian König
[not found] ` <1496234326-2581-1-git-send-email-deathsimple-ANTagKRnAhcb1SvskN2V4Q@public.gmane.org>
2017-05-31 12:38 ` [PATCH 2/4] drm/amdgpu: simplify VM shadow handling Christian König
[not found] ` <1496234326-2581-2-git-send-email-deathsimple-ANTagKRnAhcb1SvskN2V4Q@public.gmane.org>
2017-06-01 2:19 ` zhoucm1
2017-05-31 12:38 ` [PATCH 3/4] drm/amdgpu: increase fragmentation size for Vega10 Christian König
[not found] ` <1496234326-2581-3-git-send-email-deathsimple-ANTagKRnAhcb1SvskN2V4Q@public.gmane.org>
2017-06-01 3:04 ` zhoucm1
2017-05-31 12:38 ` [PATCH 4/4] drm/amdgpu: enable huge page handling in the VM v3 Christian König
[not found] ` <1496234326-2581-4-git-send-email-deathsimple-ANTagKRnAhcb1SvskN2V4Q@public.gmane.org>
2017-06-01 3:06 ` zhoucm1
This is an external index of several public inboxes, see mirroring instructions on how to clone and mirror all data and code used by this external index.