From: zhoucm1 <david1.zhou-5C7GfCeVMHo@public.gmane.org>
To: "Christian König"
<deathsimple-ANTagKRnAhcb1SvskN2V4Q@public.gmane.org>,
amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org
Subject: Re: [PATCH 4/4] drm/amdgpu: enable huge page handling in the VM v3
Date: Thu, 1 Jun 2017 11:06:54 +0800 [thread overview]
Message-ID: <592F84CE.4030709@amd.com> (raw)
In-Reply-To: <1496234326-2581-4-git-send-email-deathsimple-ANTagKRnAhcb1SvskN2V4Q@public.gmane.org>
On 2017年05月31日 20:38, Christian König wrote:
> From: Christian König <christian.koenig@amd.com>
>
> The hardware can use huge pages to map 2MB of address space with only one PDE.
>
> v2: few cleanups and rebased
> v3: skip PT updates if we are using the PDE
>
> Signed-off-by: Christian König <christian.koenig@amd.com>
Reviewed-by: Chunming Zhou <david1.zhou@amd.com>
> ---
> drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 103 +++++++++++++++++++++++++++------
> drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h | 4 ++
> 2 files changed, 88 insertions(+), 19 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> index d07a28c..4cfef3c 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> @@ -323,6 +323,7 @@ static int amdgpu_vm_alloc_levels(struct amdgpu_device *adev,
>
> entry->bo = pt;
> entry->addr = 0;
> + entry->huge_page = false;
> }
>
> if (level < adev->vm_manager.num_level) {
> @@ -990,7 +991,8 @@ static int amdgpu_vm_update_level(struct amdgpu_device *adev,
>
> pt = amdgpu_bo_gpu_offset(bo);
> pt = amdgpu_gart_get_vm_pde(adev, pt);
> - if (parent->entries[pt_idx].addr == pt)
> + if (parent->entries[pt_idx].addr == pt ||
> + parent->entries[pt_idx].huge_page)
> continue;
>
> parent->entries[pt_idx].addr = pt;
> @@ -1122,29 +1124,83 @@ int amdgpu_vm_update_directories(struct amdgpu_device *adev,
> }
>
> /**
> - * amdgpu_vm_find_pt - find the page table for an address
> + * amdgpu_vm_find_entry - find the entry for an address
> *
> * @p: see amdgpu_pte_update_params definition
> * @addr: virtual address in question
> + * @entry: resulting entry or NULL
> + * @parent: parent entry
> *
> - * Find the page table BO for a virtual address, return NULL when none found.
> + * Find the vm_pt entry and it's parent for the given address.
> */
> -static struct amdgpu_bo *amdgpu_vm_get_pt(struct amdgpu_pte_update_params *p,
> - uint64_t addr)
> +void amdgpu_vm_get_entry(struct amdgpu_pte_update_params *p, uint64_t addr,
> + struct amdgpu_vm_pt **entry,
> + struct amdgpu_vm_pt **parent)
> {
> - struct amdgpu_vm_pt *entry = &p->vm->root;
> unsigned idx, level = p->adev->vm_manager.num_level;
>
> - while (entry->entries) {
> + *parent = NULL;
> + *entry = &p->vm->root;
> + while ((*entry)->entries) {
> idx = addr >> (p->adev->vm_manager.block_size * level--);
> - idx %= amdgpu_bo_size(entry->bo) / 8;
> - entry = &entry->entries[idx];
> + idx %= amdgpu_bo_size((*entry)->bo) / 8;
> + *parent = *entry;
> + *entry = &(*entry)->entries[idx];
> }
>
> if (level)
> - return NULL;
> + *entry = NULL;
> +}
> +
> +/**
> + * amdgpu_vm_handle_huge_pages - handle updating the PD with huge pages
> + *
> + * @p: see amdgpu_pte_update_params definition
> + * @entry: vm_pt entry to check
> + * @parent: parent entry
> + * @nptes: number of PTEs updated with this operation
> + * @dst: destination address where the PTEs should point to
> + * @flags: access flags fro the PTEs
> + *
> + * Check if we can update the PD with a huge page.
> + */
> +static void amdgpu_vm_handle_huge_pages(struct amdgpu_pte_update_params *p,
> + struct amdgpu_vm_pt *entry,
> + struct amdgpu_vm_pt *parent,
> + unsigned nptes, uint64_t dst,
> + uint64_t flags)
> +{
> + uint64_t pd_addr, pde;
> +
> + /* In the case of a mixed PT the PDE must point to it*/
> + if (p->adev->asic_type < CHIP_VEGA10 ||
> + nptes != AMDGPU_VM_PTE_COUNT(p->adev) ||
> + p->func != amdgpu_vm_do_set_ptes ||
> + !(flags & AMDGPU_PTE_VALID)) {
> +
> + dst = amdgpu_bo_gpu_offset(entry->bo);
> + dst = amdgpu_gart_get_vm_pde(p->adev, dst);
> + flags = AMDGPU_PTE_VALID;
> + } else {
> + flags |= AMDGPU_PDE_PTE;
> + }
>
> - return entry->bo;
> + if (entry->addr == dst &&
> + entry->huge_page == !!(flags & AMDGPU_PDE_PTE))
> + return;
> +
> + entry->addr = dst;
> + entry->huge_page = !!(flags & AMDGPU_PDE_PTE);
> +
> + if (parent->bo->shadow) {
> + pd_addr = amdgpu_bo_gpu_offset(parent->bo->shadow);
> + pde = pd_addr + (entry - parent->entries) * 8;
> + amdgpu_vm_do_set_ptes(p, pde, dst, 1, 0, flags);
> + }
> +
> + pd_addr = amdgpu_bo_gpu_offset(parent->bo);
> + pde = pd_addr + (entry - parent->entries) * 8;
> + amdgpu_vm_do_set_ptes(p, pde, dst, 1, 0, flags);
> }
>
> /**
> @@ -1172,18 +1228,25 @@ static int amdgpu_vm_update_ptes(struct amdgpu_pte_update_params *params,
> unsigned nptes;
>
> /* walk over the address space and update the page tables */
> - for (addr = start; addr < end; addr += nptes) {
> - pt = amdgpu_vm_get_pt(params, addr);
> - if (!pt) {
> - pr_err("PT not found, aborting update_ptes\n");
> - return -EINVAL;
> - }
> + for (addr = start; addr < end; addr += nptes,
> + dst += nptes * AMDGPU_GPU_PAGE_SIZE) {
> + struct amdgpu_vm_pt *entry, *parent;
> +
> + amdgpu_vm_get_entry(params, addr, &entry, &parent);
> + if (!entry)
> + return -ENOENT;
>
> if ((addr & ~mask) == (end & ~mask))
> nptes = end - addr;
> else
> nptes = AMDGPU_VM_PTE_COUNT(adev) - (addr & mask);
>
> + amdgpu_vm_handle_huge_pages(params, entry, parent,
> + nptes, dst, flags);
> + if (entry->huge_page)
> + continue;
> +
> + pt = entry->bo;
> if (pt->shadow) {
> pe_start = amdgpu_bo_gpu_offset(pt->shadow);
> pe_start += (addr & mask) * 8;
> @@ -1195,8 +1258,6 @@ static int amdgpu_vm_update_ptes(struct amdgpu_pte_update_params *params,
> pe_start += (addr & mask) * 8;
> params->func(params, pe_start, dst, nptes,
> AMDGPU_GPU_PAGE_SIZE, flags);
> -
> - dst += nptes * AMDGPU_GPU_PAGE_SIZE;
> }
>
> return 0;
> @@ -1330,6 +1391,9 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
> /* padding, etc. */
> ndw = 64;
>
> + /* one PDE write for each huge page */
> + ndw += ((nptes >> adev->vm_manager.block_size) + 1) * 7;
> +
> if (src) {
> /* only copy commands needed */
> ndw += ncmds * 7;
> @@ -1409,6 +1473,7 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
>
> error_free:
> amdgpu_job_free(job);
> + amdgpu_vm_invalidate_level(&vm->root);
> return r;
> }
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
> index aabe815..79145e6 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
> @@ -70,6 +70,9 @@ struct amdgpu_bo_list_entry;
> /* TILED for VEGA10, reserved for older ASICs */
> #define AMDGPU_PTE_PRT (1ULL << 51)
>
> +/* PDE is handled as PTE for VEGA10 */
> +#define AMDGPU_PDE_PTE (1ULL << 54)
> +
> /* VEGA10 only */
> #define AMDGPU_PTE_MTYPE(a) ((uint64_t)a << 57)
> #define AMDGPU_PTE_MTYPE_MASK AMDGPU_PTE_MTYPE(3ULL)
> @@ -92,6 +95,7 @@ struct amdgpu_bo_list_entry;
> struct amdgpu_vm_pt {
> struct amdgpu_bo *bo;
> uint64_t addr;
> + bool huge_page;
>
> /* array of page tables, one for each directory entry */
> struct amdgpu_vm_pt *entries;
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx
prev parent reply other threads:[~2017-06-01 3:06 UTC|newest]
Thread overview: 7+ messages / expand[flat|nested] mbox.gz Atom feed top
2017-05-31 12:38 [PATCH 1/4] drm/amdgpu: further cleanup amdgpu_vm_need_pipeline_sync Christian König
[not found] ` <1496234326-2581-1-git-send-email-deathsimple-ANTagKRnAhcb1SvskN2V4Q@public.gmane.org>
2017-05-31 12:38 ` [PATCH 2/4] drm/amdgpu: simplify VM shadow handling Christian König
[not found] ` <1496234326-2581-2-git-send-email-deathsimple-ANTagKRnAhcb1SvskN2V4Q@public.gmane.org>
2017-06-01 2:19 ` zhoucm1
2017-05-31 12:38 ` [PATCH 3/4] drm/amdgpu: increase fragmentation size for Vega10 Christian König
[not found] ` <1496234326-2581-3-git-send-email-deathsimple-ANTagKRnAhcb1SvskN2V4Q@public.gmane.org>
2017-06-01 3:04 ` zhoucm1
2017-05-31 12:38 ` [PATCH 4/4] drm/amdgpu: enable huge page handling in the VM v3 Christian König
[not found] ` <1496234326-2581-4-git-send-email-deathsimple-ANTagKRnAhcb1SvskN2V4Q@public.gmane.org>
2017-06-01 3:06 ` zhoucm1 [this message]
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=592F84CE.4030709@amd.com \
--to=david1.zhou-5c7gfcevmho@public.gmane.org \
--cc=amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org \
--cc=deathsimple-ANTagKRnAhcb1SvskN2V4Q@public.gmane.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.