From: Huang Rui <ray.huang-5C7GfCeVMHo@public.gmane.org>
To: "Christian König"
<ckoenig.leichtzumerken-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
Cc: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org
Subject: Re: [PATCH 5/5] drm/amdgpu: fix shadow BO restoring
Date: Mon, 17 Sep 2018 17:10:00 +0800 [thread overview]
Message-ID: <20180917090901.GD25456@hr-amur2> (raw)
In-Reply-To: <20180914134257.2196-5-christian.koenig-5C7GfCeVMHo@public.gmane.org>
On Fri, Sep 14, 2018 at 03:42:57PM +0200, Christian König wrote:
> Don't grab the reservation lock any more and simplify the handling quite
> a bit.
>
> Signed-off-by: Christian König <christian.koenig@amd.com>
> ---
> drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 109 ++++++++---------------------
> drivers/gpu/drm/amd/amdgpu/amdgpu_object.c | 46 ++++--------
> drivers/gpu/drm/amd/amdgpu/amdgpu_object.h | 8 +--
> 3 files changed, 43 insertions(+), 120 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> index 899342c6dfad..1cbc372964f8 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> @@ -2954,54 +2954,6 @@ static int amdgpu_device_ip_post_soft_reset(struct amdgpu_device *adev)
> return 0;
> }
>
> -/**
> - * amdgpu_device_recover_vram_from_shadow - restore shadowed VRAM buffers
> - *
> - * @adev: amdgpu_device pointer
> - * @ring: amdgpu_ring for the engine handling the buffer operations
> - * @bo: amdgpu_bo buffer whose shadow is being restored
> - * @fence: dma_fence associated with the operation
> - *
> - * Restores the VRAM buffer contents from the shadow in GTT. Used to
> - * restore things like GPUVM page tables after a GPU reset where
> - * the contents of VRAM might be lost.
> - * Returns 0 on success, negative error code on failure.
> - */
> -static int amdgpu_device_recover_vram_from_shadow(struct amdgpu_device *adev,
> - struct amdgpu_ring *ring,
> - struct amdgpu_bo *bo,
> - struct dma_fence **fence)
> -{
> - uint32_t domain;
> - int r;
> -
> - if (!bo->shadow)
> - return 0;
> -
> - r = amdgpu_bo_reserve(bo, true);
> - if (r)
> - return r;
> - domain = amdgpu_mem_type_to_domain(bo->tbo.mem.mem_type);
> - /* if bo has been evicted, then no need to recover */
> - if (domain == AMDGPU_GEM_DOMAIN_VRAM) {
> - r = amdgpu_bo_validate(bo->shadow);
> - if (r) {
> - DRM_ERROR("bo validate failed!\n");
> - goto err;
> - }
> -
> - r = amdgpu_bo_restore_from_shadow(adev, ring, bo,
> - NULL, fence, true);
> - if (r) {
> - DRM_ERROR("recover page table failed!\n");
> - goto err;
> - }
> - }
> -err:
> - amdgpu_bo_unreserve(bo);
> - return r;
> -}
> -
> /**
> * amdgpu_device_recover_vram - Recover some VRAM contents
> *
> @@ -3010,16 +2962,15 @@ static int amdgpu_device_recover_vram_from_shadow(struct amdgpu_device *adev,
> * Restores the contents of VRAM buffers from the shadows in GTT. Used to
> * restore things like GPUVM page tables after a GPU reset where
> * the contents of VRAM might be lost.
> - * Returns 0 on success, 1 on failure.
> + *
> + * Returns:
> + * 0 on success, negative error code on failure.
> */
> static int amdgpu_device_recover_vram(struct amdgpu_device *adev)
> {
> - struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring;
> - struct amdgpu_bo *bo, *tmp;
> struct dma_fence *fence = NULL, *next = NULL;
> - long r = 1;
> - int i = 0;
> - long tmo;
> + struct amdgpu_bo *shadow;
> + long r = 1, tmo;
>
> if (amdgpu_sriov_runtime(adev))
> tmo = msecs_to_jiffies(8000);
> @@ -3028,44 +2979,40 @@ static int amdgpu_device_recover_vram(struct amdgpu_device *adev)
>
> DRM_INFO("recover vram bo from shadow start\n");
> mutex_lock(&adev->shadow_list_lock);
> - list_for_each_entry_safe(bo, tmp, &adev->shadow_list, shadow_list) {
> - next = NULL;
> - amdgpu_device_recover_vram_from_shadow(adev, ring, bo, &next);
> + list_for_each_entry(shadow, &adev->shadow_list, shadow_list) {
> +
> + /* No need to recover an evicted BO */
> + if (shadow->tbo.mem.mem_type != TTM_PL_TT ||
> + shadow->parent->tbo.mem.mem_type != TTM_PL_VRAM)
> + continue;
> +
> + r = amdgpu_bo_restore_shadow(shadow, &next);
> + if (r)
> + break;
> +
> if (fence) {
> r = dma_fence_wait_timeout(fence, false, tmo);
> - if (r == 0)
> - pr_err("wait fence %p[%d] timeout\n", fence, i);
> - else if (r < 0)
> - pr_err("wait fence %p[%d] interrupted\n", fence, i);
> - if (r < 1) {
> - dma_fence_put(fence);
> - fence = next;
> + dma_fence_put(fence);
> + fence = next;
> + if (r <= 0)
> break;
> - }
> - i++;
> + } else {
> + fence = next;
> }
> -
> - dma_fence_put(fence);
> - fence = next;
> }
> mutex_unlock(&adev->shadow_list_lock);
>
> - if (fence) {
> - r = dma_fence_wait_timeout(fence, false, tmo);
> - if (r == 0)
> - pr_err("wait fence %p[%d] timeout\n", fence, i);
> - else if (r < 0)
> - pr_err("wait fence %p[%d] interrupted\n", fence, i);
> -
> - }
> + if (fence)
> + tmo = dma_fence_wait_timeout(fence, false, tmo);
> dma_fence_put(fence);
>
> - if (r > 0)
> - DRM_INFO("recover vram bo from shadow done\n");
> - else
> + if (r <= 0 || tmo <= 0) {
> DRM_ERROR("recover vram bo from shadow failed\n");
> + return -EIO;
> + }
>
> - return (r > 0) ? 0 : 1;
> + DRM_INFO("recover vram bo from shadow done\n");
> + return 0;
> }
>
> /**
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
> index 650c45c896f0..ca8a0b7a5ac3 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
> @@ -547,7 +547,7 @@ static int amdgpu_bo_create_shadow(struct amdgpu_device *adev,
> if (!r) {
> bo->shadow->parent = amdgpu_bo_ref(bo);
> mutex_lock(&adev->shadow_list_lock);
> - list_add_tail(&bo->shadow_list, &adev->shadow_list);
> + list_add_tail(&bo->shadow->shadow_list, &adev->shadow_list);
> mutex_unlock(&adev->shadow_list_lock);
> }
>
> @@ -679,13 +679,10 @@ int amdgpu_bo_validate(struct amdgpu_bo *bo)
> }
>
> /**
> - * amdgpu_bo_restore_from_shadow - restore an &amdgpu_bo buffer object
> - * @adev: amdgpu device object
> - * @ring: amdgpu_ring for the engine handling the buffer operations
> - * @bo: &amdgpu_bo buffer to be restored
> - * @resv: reservation object with embedded fence
> + * amdgpu_bo_restore_shadow - restore an &amdgpu_bo shadow
> + *
> + * @shadow: &amdgpu_bo shadow to be restored
> * @fence: dma_fence associated with the operation
> - * @direct: whether to submit the job directly
> *
> * Copies a buffer object's shadow content back to the object.
> * This is used for recovering a buffer from its shadow in case of a gpu
> @@ -694,36 +691,19 @@ int amdgpu_bo_validate(struct amdgpu_bo *bo)
> * Returns:
> * 0 for success or a negative error code on failure.
> */
> -int amdgpu_bo_restore_from_shadow(struct amdgpu_device *adev,
> - struct amdgpu_ring *ring,
> - struct amdgpu_bo *bo,
> - struct reservation_object *resv,
> - struct dma_fence **fence,
> - bool direct)
> +int amdgpu_bo_restore_shadow(struct amdgpu_bo *shadow, struct dma_fence **fence)
>
> {
> - struct amdgpu_bo *shadow = bo->shadow;
> - uint64_t bo_addr, shadow_addr;
> - int r;
> -
> - if (!shadow)
> - return -EINVAL;
> -
> - bo_addr = amdgpu_bo_gpu_offset(bo);
> - shadow_addr = amdgpu_bo_gpu_offset(bo->shadow);
> -
> - r = reservation_object_reserve_shared(bo->tbo.resv);
> - if (r)
> - goto err;
> + struct amdgpu_device *adev = amdgpu_ttm_adev(shadow->tbo.bdev);
> + struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring;
> + uint64_t shadow_addr, parent_addr;
>
May I know why won't need the reservation_object_reserve_shared() here? Is
it because we only copy buffer from shadow parent bo instead of orignal bo?
Thanks,
Ray
> - r = amdgpu_copy_buffer(ring, shadow_addr, bo_addr,
> - amdgpu_bo_size(bo), resv, fence,
> - direct, false);
> - if (!r)
> - amdgpu_bo_fence(bo, *fence, true);
> + shadow_addr = amdgpu_bo_gpu_offset(shadow);
> + parent_addr = amdgpu_bo_gpu_offset(shadow->parent);
>
> -err:
> - return r;
> + return amdgpu_copy_buffer(ring, shadow_addr, parent_addr,
> + amdgpu_bo_size(shadow), NULL, fence,
> + true, false);
> }
>
> /**
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
> index 64337ff2ad63..7d3312d0da11 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
> @@ -273,12 +273,8 @@ int amdgpu_bo_backup_to_shadow(struct amdgpu_device *adev,
> struct reservation_object *resv,
> struct dma_fence **fence, bool direct);
> int amdgpu_bo_validate(struct amdgpu_bo *bo);
> -int amdgpu_bo_restore_from_shadow(struct amdgpu_device *adev,
> - struct amdgpu_ring *ring,
> - struct amdgpu_bo *bo,
> - struct reservation_object *resv,
> - struct dma_fence **fence,
> - bool direct);
> +int amdgpu_bo_restore_shadow(struct amdgpu_bo *shadow,
> + struct dma_fence **fence);
> uint32_t amdgpu_bo_get_preferred_pin_domain(struct amdgpu_device *adev,
> uint32_t domain);
>
> --
> 2.14.1
>
> _______________________________________________
> amd-gfx mailing list
> amd-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx
next prev parent reply other threads:[~2018-09-17 9:10 UTC|newest]
Thread overview: 12+ messages / expand[flat|nested] mbox.gz Atom feed top
2018-09-14 13:42 [PATCH 1/5] drm/amdgpu: stop pipelining VM PDs/PTs moves Christian König
[not found] ` <20180914134257.2196-1-christian.koenig-5C7GfCeVMHo@public.gmane.org>
2018-09-14 13:42 ` [PATCH 2/5] drm/amdgpu: always enable shadow BOs v2 Christian König
2018-09-14 13:42 ` [PATCH 3/5] drm/amdgpu: shadow BOs don't need any alignment Christian König
2018-09-14 13:42 ` [PATCH 4/5] drm/amdgpu: always recover VRAM during GPU recovery Christian König
2018-09-14 13:42 ` [PATCH 5/5] drm/amdgpu: fix shadow BO restoring Christian König
[not found] ` <20180914134257.2196-5-christian.koenig-5C7GfCeVMHo@public.gmane.org>
2018-09-17 9:10 ` Huang Rui [this message]
2018-09-17 11:42 ` Christian König
[not found] ` <e1c78397-4a41-0f74-2049-5c63a279630b-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
2018-09-18 8:40 ` Huang Rui
-- strict thread matches above, loose matches on Subject: below --
2018-09-11 9:55 [PATCH 1/5] drm/amdgpu: stop pipelining VM PDs/PTs moves Christian König
[not found] ` <20180911095602.10152-1-christian.koenig-5C7GfCeVMHo@public.gmane.org>
2018-09-11 9:56 ` [PATCH 5/5] drm/amdgpu: fix shadow BO restoring Christian König
[not found] ` <20180911095602.10152-5-christian.koenig-5C7GfCeVMHo@public.gmane.org>
2018-09-13 9:29 ` Zhang, Jerry(Junwei)
[not found] ` <073f24f7-df48-55f6-f4fb-2d250cfd2dd1-5C7GfCeVMHo@public.gmane.org>
2018-09-14 11:54 ` Christian König
[not found] ` <fa8fb53f-ff84-fb45-59c7-1d52ec57fba7-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
2018-09-18 6:15 ` Zhang, Jerry(Junwei)
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20180917090901.GD25456@hr-amur2 \
--to=ray.huang-5c7gfcevmho@public.gmane.org \
--cc=amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org \
--cc=ckoenig.leichtzumerken-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox