All of lore.kernel.org
 help / color / mirror / Atom feed
From: zhoucm1 <david1.zhou-5C7GfCeVMHo@public.gmane.org>
To: Felix Kuehling <Felix.Kuehling-5C7GfCeVMHo@public.gmane.org>,
	amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org
Subject: Re: [PATCH] drm/amdgpu: Optimize recursion in amdgpu_vm_update_level
Date: Wed, 12 Jul 2017 12:30:41 +0800	[thread overview]
Message-ID: <5965A5F1.80902@amd.com> (raw)
In-Reply-To: <1499831769-3764-1-git-send-email-Felix.Kuehling-5C7GfCeVMHo@public.gmane.org>

nice improvement, just some nitpick inline, otherwise the patch is 
Reviewed-by: Chunming Zhou <david1.zhou@amd.com>.

On 2017年07月12日 11:56, Felix Kuehling wrote:
> When lots of virtual address spaces is used, there can be thousands
> of page table BOs. amdgpu_vm_update_level iterates over all of them
> recursively. In many cases only a few or none at all need to be
> updated. Minimize unnecessary code execution and memory usage in
> those cases.
>
> This speeds up memory mapping in a synthetic KFD memory mapping
> benchmark by roughly a factor two.
>
> Signed-off-by: Felix Kuehling <Felix.Kuehling@amd.com>
> ---
>   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 109 +++++++++++++++++----------------
>   1 file changed, 55 insertions(+), 54 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> index ff5de3a..23b899b 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> @@ -1025,7 +1025,7 @@ static int amdgpu_vm_update_level(struct amdgpu_device *adev,
>   {
>   	struct amdgpu_bo *shadow;
>   	struct amdgpu_ring *ring = NULL;
> -	uint64_t pd_addr, shadow_addr = 0;
> +	uint64_t pd_addr = 0, shadow_addr = 0;
>   	uint32_t incr = amdgpu_vm_bo_size(adev, level + 1);
>   	uint64_t last_pde = ~0, last_pt = ~0, last_shadow = ~0;
>   	unsigned count = 0, pt_idx, ndw = 0;
> @@ -1044,48 +1044,19 @@ static int amdgpu_vm_update_level(struct amdgpu_device *adev,
>   
>   	WARN_ON(vm->use_cpu_for_update && shadow);
>   	if (vm->use_cpu_for_update && !shadow) {
> -		r = amdgpu_bo_kmap(parent->bo, (void **)&pd_addr);
> -		if (r)
> -			return r;
> -		r = amdgpu_vm_bo_wait(adev, parent->bo);
> -		if (unlikely(r)) {
> -			amdgpu_bo_kunmap(parent->bo);
> -			return r;
> -		}
> +		/* Defer kmapping until it's actually needed. Some
> +		 * PDBs may need no update at all
> +		 */
>   		params.func = amdgpu_vm_cpu_set_ptes;
> +		params.ib = (void *)(long)-1;
>   	} else {
> -		if (shadow) {
> -			r = amdgpu_ttm_bind(&shadow->tbo, &shadow->tbo.mem);
> -			if (r)
> -				return r;
> -		}
> -		ring = container_of(vm->entity.sched, struct amdgpu_ring,
> -				    sched);
> -
> -		/* padding, etc. */
> -		ndw = 64;
> -
> -		/* assume the worst case */
> -		ndw += parent->last_entry_used * 6;
> -
> -		pd_addr = amdgpu_bo_gpu_offset(parent->bo);
> -
> -		if (shadow) {
> -			shadow_addr = amdgpu_bo_gpu_offset(shadow);
> -			ndw *= 2;
> -		} else {
> -			shadow_addr = 0;
> -		}
> -
> -		r = amdgpu_job_alloc_with_ib(adev, ndw * 4, &job);
> -		if (r)
> -			return r;
> -
> -		params.ib = &job->ibs[0];
> +		/* Defer IB allocation until it's actually
> +		 * needed. Some PDBs may need no update at all
> +		 */
> +		params.ib = NULL;
>   		params.func = amdgpu_vm_do_set_ptes;
>   	}
>   
> -
>   	/* walk over the address space and update the directory */
>   	for (pt_idx = 0; pt_idx <= parent->last_entry_used; ++pt_idx) {
>   		struct amdgpu_bo *bo = parent->entries[pt_idx].bo;
> @@ -1094,22 +1065,53 @@ static int amdgpu_vm_update_level(struct amdgpu_device *adev,
>   		if (bo == NULL)
>   			continue;
>   
> -		if (bo->shadow) {
> -			struct amdgpu_bo *pt_shadow = bo->shadow;
> -
> -			r = amdgpu_ttm_bind(&pt_shadow->tbo,
> -					    &pt_shadow->tbo.mem);
> -			if (r)
> -				return r;
> -		}
> -
> -		pt = amdgpu_bo_gpu_offset(bo);
> -		pt = amdgpu_gart_get_vm_pde(adev, pt);
> +		pt = amdgpu_gart_get_vm_pde(adev, bo->tbo.offset);
Getting offset from amdgpu_bo_gpu_offset(bo)  is better I think, 
although we can assume pt bo is a known domain.

>   		if (parent->entries[pt_idx].addr == pt)
>   			continue;
>   
>   		parent->entries[pt_idx].addr = pt;
>   
> +		if (!params.ib) {
> +			if (shadow) {
> +				r = amdgpu_ttm_bind(&shadow->tbo,
> +						    &shadow->tbo.mem);
> +				if (r)
> +					return r;
> +			}
> +
> +			ring = container_of(vm->entity.sched,
> +					    struct amdgpu_ring, sched);
> +
> +			/* padding, etc. */
> +			ndw = 64;
> +
> +			/* assume the worst case */
> +			ndw += (parent->last_entry_used - pt_idx) * 6;
> +
> +			pd_addr = parent->bo->tbo.offset;
> +
> +			if (shadow) {
> +				shadow_addr = shadow->tbo.offset;
> +				ndw *= 2;
> +			} else {
> +				shadow_addr = 0;
> +			}
> +			r = amdgpu_job_alloc_with_ib(adev, ndw * 4, &job);
> +			if (r)
> +				return r;
> +
> +			params.ib = &job->ibs[0];
> +		} else if (!pd_addr) {
> +			r = amdgpu_bo_kmap(parent->bo, (void **)&pd_addr);
> +			if (r)
> +				return r;
> +			r = amdgpu_vm_bo_wait(adev, parent->bo);
> +			if (unlikely(r)) {
> +				amdgpu_bo_kunmap(parent->bo);
> +				return r;
> +			}
> +		}
> +
>   		pde = pd_addr + pt_idx * 8;
>   		if (((last_pde + 8 * count) != pde) ||
>   		    ((last_pt + incr * count) != pt) ||
> @@ -1148,9 +1150,9 @@ static int amdgpu_vm_update_level(struct amdgpu_device *adev,
>   
>   	if (params.func == amdgpu_vm_cpu_set_ptes)
>   		amdgpu_bo_kunmap(parent->bo);
> -	else if (params.ib->length_dw == 0) {
> +	else if (params.ib && params.ib->length_dw == 0) {
>   		amdgpu_job_free(job);
> -	} else {
> +	} else if (params.ib) {
>   		amdgpu_ring_pad_ib(ring, params.ib);
>   		amdgpu_sync_resv(adev, &job->sync, parent->bo->tbo.resv,
>   				 AMDGPU_FENCE_OWNER_VM);
> @@ -1166,8 +1168,7 @@ static int amdgpu_vm_update_level(struct amdgpu_device *adev,
>   
>   		amdgpu_bo_fence(parent->bo, fence, true);
>   		dma_fence_put(vm->last_dir_update);
> -		vm->last_dir_update = dma_fence_get(fence);
> -		dma_fence_put(fence);
> +		vm->last_dir_update = fence;
Here I prefer to keep previous code, the logic is more clear.:)
>   	}
>   	/*
>   	 * Recurse into the subdirectories. This recursion is harmless because
> @@ -1176,7 +1177,7 @@ static int amdgpu_vm_update_level(struct amdgpu_device *adev,
>   	for (pt_idx = 0; pt_idx <= parent->last_entry_used; ++pt_idx) {
>   		struct amdgpu_vm_pt *entry = &parent->entries[pt_idx];
>   
> -		if (!entry->bo)
> +		if (!entry->bo || !entry->entries)
In fact, !entry->entries would be checked at the begin of this function 
in next loop, but either way is ok to me here.

Regards,
David Zhou
>   			continue;
>   
>   		r = amdgpu_vm_update_level(adev, vm, entry, level + 1);

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

  parent reply	other threads:[~2017-07-12  4:30 UTC|newest]

Thread overview: 7+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2017-07-12  3:56 [PATCH] drm/amdgpu: Optimize recursion in amdgpu_vm_update_level Felix Kuehling
     [not found] ` <1499831769-3764-1-git-send-email-Felix.Kuehling-5C7GfCeVMHo@public.gmane.org>
2017-07-12  4:30   ` zhoucm1 [this message]
     [not found]     ` <5965A5F1.80902-5C7GfCeVMHo@public.gmane.org>
2017-07-12  4:56       ` Felix Kuehling
     [not found]         ` <4e7dee07-81d0-381d-579b-52e7e5969112-5C7GfCeVMHo@public.gmane.org>
2017-07-12  5:52           ` zhoucm1
2017-07-12  7:50   ` Christian König
     [not found]     ` <c50326a0-6813-d484-b289-5681b306d723-ANTagKRnAhcb1SvskN2V4Q@public.gmane.org>
2017-07-12 19:38       ` Felix Kuehling
     [not found]         ` <083c3f4e-72b3-1ad1-6d11-a9eba48e3c9c-5C7GfCeVMHo@public.gmane.org>
2017-07-13 13:45           ` Christian König

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=5965A5F1.80902@amd.com \
    --to=david1.zhou-5c7gfcevmho@public.gmane.org \
    --cc=Felix.Kuehling-5C7GfCeVMHo@public.gmane.org \
    --cc=amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.