All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] drm/amdgpu: once more fix the call oder in amdgpu_ttm_move()
@ 2024-03-21 12:43 Christian König
  2024-03-21 14:12 ` Tvrtko Ursulin
  2024-03-21 21:01 ` Alex Deucher
  0 siblings, 2 replies; 7+ messages in thread
From: Christian König @ 2024-03-21 12:43 UTC (permalink / raw)
  To: alexander.deucher, amd-gfx

This reverts drm/amdgpu: fix ftrace event amdgpu_bo_move always move
on same heap. The basic problem here is that after the move the old
location is simply not available any more.

Some fixes where suggested, but essentially we should call the move
notification before actually moving things because only this way we have
the correct order for DMA-buf and VM move notifications as well.

Also rework the statistic handling so that we don't update the eviction
counter before the move.

Signed-off-by: Christian König <christian.koenig@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_object.c | 15 +++----
 drivers/gpu/drm/amd/amdgpu/amdgpu_object.h |  4 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c    | 48 ++++++++++++----------
 3 files changed, 37 insertions(+), 30 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
index 425cebcc5cbf..eb7d824763b9 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
@@ -1245,19 +1245,20 @@ int amdgpu_bo_get_metadata(struct amdgpu_bo *bo, void *buffer,
  * amdgpu_bo_move_notify - notification about a memory move
  * @bo: pointer to a buffer object
  * @evict: if this move is evicting the buffer from the graphics address space
+ * @new_mem: new resource for backing the BO
  *
  * Marks the corresponding &amdgpu_bo buffer object as invalid, also performs
  * bookkeeping.
  * TTM driver callback which is called when ttm moves a buffer.
  */
-void amdgpu_bo_move_notify(struct ttm_buffer_object *bo, bool evict)
+void amdgpu_bo_move_notify(struct ttm_buffer_object *bo,
+			   bool evict,
+			   struct ttm_resource *new_mem)
 {
 	struct amdgpu_device *adev = amdgpu_ttm_adev(bo->bdev);
+	struct ttm_resource *old_mem = bo->resource;
 	struct amdgpu_bo *abo;
 
-	if (!amdgpu_bo_is_amdgpu_bo(bo))
-		return;
-
 	abo = ttm_to_amdgpu_bo(bo);
 	amdgpu_vm_bo_invalidate(adev, abo, evict);
 
@@ -1267,9 +1268,9 @@ void amdgpu_bo_move_notify(struct ttm_buffer_object *bo, bool evict)
 	    bo->resource->mem_type != TTM_PL_SYSTEM)
 		dma_buf_move_notify(abo->tbo.base.dma_buf);
 
-	/* remember the eviction */
-	if (evict)
-		atomic64_inc(&adev->num_evictions);
+	/* move_notify is called before move happens */
+	trace_amdgpu_bo_move(abo, new_mem ? new_mem->mem_type : -1,
+			     old_mem ? old_mem->mem_type : -1);
 }
 
 void amdgpu_bo_get_memory(struct amdgpu_bo *bo,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
index a3ea8a82db23..d28e21baef16 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
@@ -344,7 +344,9 @@ int amdgpu_bo_set_metadata (struct amdgpu_bo *bo, void *metadata,
 int amdgpu_bo_get_metadata(struct amdgpu_bo *bo, void *buffer,
 			   size_t buffer_size, uint32_t *metadata_size,
 			   uint64_t *flags);
-void amdgpu_bo_move_notify(struct ttm_buffer_object *bo, bool evict);
+void amdgpu_bo_move_notify(struct ttm_buffer_object *bo,
+			   bool evict,
+			   struct ttm_resource *new_mem);
 void amdgpu_bo_release_notify(struct ttm_buffer_object *bo);
 vm_fault_t amdgpu_bo_fault_reserve_notify(struct ttm_buffer_object *bo);
 void amdgpu_bo_fence(struct amdgpu_bo *bo, struct dma_fence *fence,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
index a5ceec7820cf..460b23918bfc 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@@ -471,14 +471,16 @@ static int amdgpu_bo_move(struct ttm_buffer_object *bo, bool evict,
 
 	if (!old_mem || (old_mem->mem_type == TTM_PL_SYSTEM &&
 			 bo->ttm == NULL)) {
+		amdgpu_bo_move_notify(bo, evict, new_mem);
 		ttm_bo_move_null(bo, new_mem);
-		goto out;
+		return 0;
 	}
 	if (old_mem->mem_type == TTM_PL_SYSTEM &&
 	    (new_mem->mem_type == TTM_PL_TT ||
 	     new_mem->mem_type == AMDGPU_PL_PREEMPT)) {
+		amdgpu_bo_move_notify(bo, evict, new_mem);
 		ttm_bo_move_null(bo, new_mem);
-		goto out;
+		return 0;
 	}
 	if ((old_mem->mem_type == TTM_PL_TT ||
 	     old_mem->mem_type == AMDGPU_PL_PREEMPT) &&
@@ -488,9 +490,10 @@ static int amdgpu_bo_move(struct ttm_buffer_object *bo, bool evict,
 			return r;
 
 		amdgpu_ttm_backend_unbind(bo->bdev, bo->ttm);
+		amdgpu_bo_move_notify(bo, evict, new_mem);
 		ttm_resource_free(bo, &bo->resource);
 		ttm_bo_assign_mem(bo, new_mem);
-		goto out;
+		return 0;
 	}
 
 	if (old_mem->mem_type == AMDGPU_PL_GDS ||
@@ -502,8 +505,9 @@ static int amdgpu_bo_move(struct ttm_buffer_object *bo, bool evict,
 	    new_mem->mem_type == AMDGPU_PL_OA ||
 	    new_mem->mem_type == AMDGPU_PL_DOORBELL) {
 		/* Nothing to save here */
+		amdgpu_bo_move_notify(bo, evict, new_mem);
 		ttm_bo_move_null(bo, new_mem);
-		goto out;
+		return 0;
 	}
 
 	if (bo->type == ttm_bo_type_device &&
@@ -515,22 +519,23 @@ static int amdgpu_bo_move(struct ttm_buffer_object *bo, bool evict,
 		abo->flags &= ~AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED;
 	}
 
-	if (adev->mman.buffer_funcs_enabled) {
-		if (((old_mem->mem_type == TTM_PL_SYSTEM &&
-		      new_mem->mem_type == TTM_PL_VRAM) ||
-		     (old_mem->mem_type == TTM_PL_VRAM &&
-		      new_mem->mem_type == TTM_PL_SYSTEM))) {
-			hop->fpfn = 0;
-			hop->lpfn = 0;
-			hop->mem_type = TTM_PL_TT;
-			hop->flags = TTM_PL_FLAG_TEMPORARY;
-			return -EMULTIHOP;
-		}
+	if (adev->mman.buffer_funcs_enabled &&
+	    ((old_mem->mem_type == TTM_PL_SYSTEM &&
+	      new_mem->mem_type == TTM_PL_VRAM) ||
+	     (old_mem->mem_type == TTM_PL_VRAM &&
+	      new_mem->mem_type == TTM_PL_SYSTEM))) {
+		hop->fpfn = 0;
+		hop->lpfn = 0;
+		hop->mem_type = TTM_PL_TT;
+		hop->flags = TTM_PL_FLAG_TEMPORARY;
+		return -EMULTIHOP;
+	}
 
+	amdgpu_bo_move_notify(bo, evict, new_mem);
+	if (adev->mman.buffer_funcs_enabled)
 		r = amdgpu_move_blit(bo, evict, new_mem, old_mem);
-	} else {
+	else
 		r = -ENODEV;
-	}
 
 	if (r) {
 		/* Check that all memory is CPU accessible */
@@ -545,11 +550,10 @@ static int amdgpu_bo_move(struct ttm_buffer_object *bo, bool evict,
 			return r;
 	}
 
-	trace_amdgpu_bo_move(abo, new_mem->mem_type, old_mem->mem_type);
-out:
-	/* update statistics */
+	/* update statistics after the move */
+	if (evict)
+		atomic64_inc(&adev->num_evictions);
 	atomic64_add(bo->base.size, &adev->num_bytes_moved);
-	amdgpu_bo_move_notify(bo, evict);
 	return 0;
 }
 
@@ -1551,7 +1555,7 @@ static int amdgpu_ttm_access_memory(struct ttm_buffer_object *bo,
 static void
 amdgpu_bo_delete_mem_notify(struct ttm_buffer_object *bo)
 {
-	amdgpu_bo_move_notify(bo, false);
+	amdgpu_bo_move_notify(bo, false, NULL);
 }
 
 static struct ttm_device_funcs amdgpu_bo_driver = {
-- 
2.34.1


^ permalink raw reply related	[flat|nested] 7+ messages in thread

* Re: [PATCH] drm/amdgpu: once more fix the call oder in amdgpu_ttm_move()
  2024-03-21 12:43 [PATCH] drm/amdgpu: once more fix the call oder in amdgpu_ttm_move() Christian König
@ 2024-03-21 14:12 ` Tvrtko Ursulin
  2024-03-21 14:37   ` Christian König
  2024-03-21 21:01 ` Alex Deucher
  1 sibling, 1 reply; 7+ messages in thread
From: Tvrtko Ursulin @ 2024-03-21 14:12 UTC (permalink / raw)
  To: Christian König, alexander.deucher, amd-gfx


On 21/03/2024 12:43, Christian König wrote:
> This reverts drm/amdgpu: fix ftrace event amdgpu_bo_move always move
> on same heap. The basic problem here is that after the move the old
> location is simply not available any more.
> 
> Some fixes where suggested, but essentially we should call the move
> notification before actually moving things because only this way we have
> the correct order for DMA-buf and VM move notifications as well.
> 
> Also rework the statistic handling so that we don't update the eviction
> counter before the move.
> 
> Signed-off-by: Christian König <christian.koenig@amd.com>

Don't forget:

Fixes: 94aeb4117343 ("drm/amdgpu: fix ftrace event amdgpu_bo_move always 
move on same heap")
Closes: https://gitlab.freedesktop.org/drm/amd/-/issues/3171

;)

Regards,

Tvrtko

> ---
>   drivers/gpu/drm/amd/amdgpu/amdgpu_object.c | 15 +++----
>   drivers/gpu/drm/amd/amdgpu/amdgpu_object.h |  4 +-
>   drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c    | 48 ++++++++++++----------
>   3 files changed, 37 insertions(+), 30 deletions(-)
> 
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
> index 425cebcc5cbf..eb7d824763b9 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
> @@ -1245,19 +1245,20 @@ int amdgpu_bo_get_metadata(struct amdgpu_bo *bo, void *buffer,
>    * amdgpu_bo_move_notify - notification about a memory move
>    * @bo: pointer to a buffer object
>    * @evict: if this move is evicting the buffer from the graphics address space
> + * @new_mem: new resource for backing the BO
>    *
>    * Marks the corresponding &amdgpu_bo buffer object as invalid, also performs
>    * bookkeeping.
>    * TTM driver callback which is called when ttm moves a buffer.
>    */
> -void amdgpu_bo_move_notify(struct ttm_buffer_object *bo, bool evict)
> +void amdgpu_bo_move_notify(struct ttm_buffer_object *bo,
> +			   bool evict,
> +			   struct ttm_resource *new_mem)
>   {
>   	struct amdgpu_device *adev = amdgpu_ttm_adev(bo->bdev);
> +	struct ttm_resource *old_mem = bo->resource;
>   	struct amdgpu_bo *abo;
>   
> -	if (!amdgpu_bo_is_amdgpu_bo(bo))
> -		return;
> -
>   	abo = ttm_to_amdgpu_bo(bo);
>   	amdgpu_vm_bo_invalidate(adev, abo, evict);
>   
> @@ -1267,9 +1268,9 @@ void amdgpu_bo_move_notify(struct ttm_buffer_object *bo, bool evict)
>   	    bo->resource->mem_type != TTM_PL_SYSTEM)
>   		dma_buf_move_notify(abo->tbo.base.dma_buf);
>   
> -	/* remember the eviction */
> -	if (evict)
> -		atomic64_inc(&adev->num_evictions);
> +	/* move_notify is called before move happens */
> +	trace_amdgpu_bo_move(abo, new_mem ? new_mem->mem_type : -1,
> +			     old_mem ? old_mem->mem_type : -1);
>   }
>   
>   void amdgpu_bo_get_memory(struct amdgpu_bo *bo,
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
> index a3ea8a82db23..d28e21baef16 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
> @@ -344,7 +344,9 @@ int amdgpu_bo_set_metadata (struct amdgpu_bo *bo, void *metadata,
>   int amdgpu_bo_get_metadata(struct amdgpu_bo *bo, void *buffer,
>   			   size_t buffer_size, uint32_t *metadata_size,
>   			   uint64_t *flags);
> -void amdgpu_bo_move_notify(struct ttm_buffer_object *bo, bool evict);
> +void amdgpu_bo_move_notify(struct ttm_buffer_object *bo,
> +			   bool evict,
> +			   struct ttm_resource *new_mem);
>   void amdgpu_bo_release_notify(struct ttm_buffer_object *bo);
>   vm_fault_t amdgpu_bo_fault_reserve_notify(struct ttm_buffer_object *bo);
>   void amdgpu_bo_fence(struct amdgpu_bo *bo, struct dma_fence *fence,
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> index a5ceec7820cf..460b23918bfc 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> @@ -471,14 +471,16 @@ static int amdgpu_bo_move(struct ttm_buffer_object *bo, bool evict,
>   
>   	if (!old_mem || (old_mem->mem_type == TTM_PL_SYSTEM &&
>   			 bo->ttm == NULL)) {
> +		amdgpu_bo_move_notify(bo, evict, new_mem);
>   		ttm_bo_move_null(bo, new_mem);
> -		goto out;
> +		return 0;
>   	}
>   	if (old_mem->mem_type == TTM_PL_SYSTEM &&
>   	    (new_mem->mem_type == TTM_PL_TT ||
>   	     new_mem->mem_type == AMDGPU_PL_PREEMPT)) {
> +		amdgpu_bo_move_notify(bo, evict, new_mem);
>   		ttm_bo_move_null(bo, new_mem);
> -		goto out;
> +		return 0;
>   	}
>   	if ((old_mem->mem_type == TTM_PL_TT ||
>   	     old_mem->mem_type == AMDGPU_PL_PREEMPT) &&
> @@ -488,9 +490,10 @@ static int amdgpu_bo_move(struct ttm_buffer_object *bo, bool evict,
>   			return r;
>   
>   		amdgpu_ttm_backend_unbind(bo->bdev, bo->ttm);
> +		amdgpu_bo_move_notify(bo, evict, new_mem);
>   		ttm_resource_free(bo, &bo->resource);
>   		ttm_bo_assign_mem(bo, new_mem);
> -		goto out;
> +		return 0;
>   	}
>   
>   	if (old_mem->mem_type == AMDGPU_PL_GDS ||
> @@ -502,8 +505,9 @@ static int amdgpu_bo_move(struct ttm_buffer_object *bo, bool evict,
>   	    new_mem->mem_type == AMDGPU_PL_OA ||
>   	    new_mem->mem_type == AMDGPU_PL_DOORBELL) {
>   		/* Nothing to save here */
> +		amdgpu_bo_move_notify(bo, evict, new_mem);
>   		ttm_bo_move_null(bo, new_mem);
> -		goto out;
> +		return 0;
>   	}
>   
>   	if (bo->type == ttm_bo_type_device &&
> @@ -515,22 +519,23 @@ static int amdgpu_bo_move(struct ttm_buffer_object *bo, bool evict,
>   		abo->flags &= ~AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED;
>   	}
>   
> -	if (adev->mman.buffer_funcs_enabled) {
> -		if (((old_mem->mem_type == TTM_PL_SYSTEM &&
> -		      new_mem->mem_type == TTM_PL_VRAM) ||
> -		     (old_mem->mem_type == TTM_PL_VRAM &&
> -		      new_mem->mem_type == TTM_PL_SYSTEM))) {
> -			hop->fpfn = 0;
> -			hop->lpfn = 0;
> -			hop->mem_type = TTM_PL_TT;
> -			hop->flags = TTM_PL_FLAG_TEMPORARY;
> -			return -EMULTIHOP;
> -		}
> +	if (adev->mman.buffer_funcs_enabled &&
> +	    ((old_mem->mem_type == TTM_PL_SYSTEM &&
> +	      new_mem->mem_type == TTM_PL_VRAM) ||
> +	     (old_mem->mem_type == TTM_PL_VRAM &&
> +	      new_mem->mem_type == TTM_PL_SYSTEM))) {
> +		hop->fpfn = 0;
> +		hop->lpfn = 0;
> +		hop->mem_type = TTM_PL_TT;
> +		hop->flags = TTM_PL_FLAG_TEMPORARY;
> +		return -EMULTIHOP;
> +	}
>   
> +	amdgpu_bo_move_notify(bo, evict, new_mem);
> +	if (adev->mman.buffer_funcs_enabled)
>   		r = amdgpu_move_blit(bo, evict, new_mem, old_mem);
> -	} else {
> +	else
>   		r = -ENODEV;
> -	}
>   
>   	if (r) {
>   		/* Check that all memory is CPU accessible */
> @@ -545,11 +550,10 @@ static int amdgpu_bo_move(struct ttm_buffer_object *bo, bool evict,
>   			return r;
>   	}
>   
> -	trace_amdgpu_bo_move(abo, new_mem->mem_type, old_mem->mem_type);
> -out:
> -	/* update statistics */
> +	/* update statistics after the move */
> +	if (evict)
> +		atomic64_inc(&adev->num_evictions);
>   	atomic64_add(bo->base.size, &adev->num_bytes_moved);
> -	amdgpu_bo_move_notify(bo, evict);
>   	return 0;
>   }
>   
> @@ -1551,7 +1555,7 @@ static int amdgpu_ttm_access_memory(struct ttm_buffer_object *bo,
>   static void
>   amdgpu_bo_delete_mem_notify(struct ttm_buffer_object *bo)
>   {
> -	amdgpu_bo_move_notify(bo, false);
> +	amdgpu_bo_move_notify(bo, false, NULL);
>   }
>   
>   static struct ttm_device_funcs amdgpu_bo_driver = {

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH] drm/amdgpu: once more fix the call oder in amdgpu_ttm_move()
  2024-03-21 14:12 ` Tvrtko Ursulin
@ 2024-03-21 14:37   ` Christian König
  2024-04-05 14:41     ` Oleksandr Natalenko
  2024-04-18 16:10     ` Alex Deucher
  0 siblings, 2 replies; 7+ messages in thread
From: Christian König @ 2024-03-21 14:37 UTC (permalink / raw)
  To: Tvrtko Ursulin, alexander.deucher, amd-gfx

Am 21.03.24 um 15:12 schrieb Tvrtko Ursulin:
>
> On 21/03/2024 12:43, Christian König wrote:
>> This reverts drm/amdgpu: fix ftrace event amdgpu_bo_move always move
>> on same heap. The basic problem here is that after the move the old
>> location is simply not available any more.
>>
>> Some fixes where suggested, but essentially we should call the move
>> notification before actually moving things because only this way we have
>> the correct order for DMA-buf and VM move notifications as well.
>>
>> Also rework the statistic handling so that we don't update the eviction
>> counter before the move.
>>
>> Signed-off-by: Christian König <christian.koenig@amd.com>
>
> Don't forget:
>
> Fixes: 94aeb4117343 ("drm/amdgpu: fix ftrace event amdgpu_bo_move 
> always move on same heap")
> Closes: https://gitlab.freedesktop.org/drm/amd/-/issues/3171

Ah, thanks. I already wanted to ask if there is any bug report about 
that as well.

Regards,
Christian.

>
> ;)
>
> Regards,
>
> Tvrtko
>
>> ---
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_object.c | 15 +++----
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_object.h |  4 +-
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c    | 48 ++++++++++++----------
>>   3 files changed, 37 insertions(+), 30 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c 
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
>> index 425cebcc5cbf..eb7d824763b9 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
>> @@ -1245,19 +1245,20 @@ int amdgpu_bo_get_metadata(struct amdgpu_bo 
>> *bo, void *buffer,
>>    * amdgpu_bo_move_notify - notification about a memory move
>>    * @bo: pointer to a buffer object
>>    * @evict: if this move is evicting the buffer from the graphics 
>> address space
>> + * @new_mem: new resource for backing the BO
>>    *
>>    * Marks the corresponding &amdgpu_bo buffer object as invalid, 
>> also performs
>>    * bookkeeping.
>>    * TTM driver callback which is called when ttm moves a buffer.
>>    */
>> -void amdgpu_bo_move_notify(struct ttm_buffer_object *bo, bool evict)
>> +void amdgpu_bo_move_notify(struct ttm_buffer_object *bo,
>> +               bool evict,
>> +               struct ttm_resource *new_mem)
>>   {
>>       struct amdgpu_device *adev = amdgpu_ttm_adev(bo->bdev);
>> +    struct ttm_resource *old_mem = bo->resource;
>>       struct amdgpu_bo *abo;
>>   -    if (!amdgpu_bo_is_amdgpu_bo(bo))
>> -        return;
>> -
>>       abo = ttm_to_amdgpu_bo(bo);
>>       amdgpu_vm_bo_invalidate(adev, abo, evict);
>>   @@ -1267,9 +1268,9 @@ void amdgpu_bo_move_notify(struct 
>> ttm_buffer_object *bo, bool evict)
>>           bo->resource->mem_type != TTM_PL_SYSTEM)
>>           dma_buf_move_notify(abo->tbo.base.dma_buf);
>>   -    /* remember the eviction */
>> -    if (evict)
>> -        atomic64_inc(&adev->num_evictions);
>> +    /* move_notify is called before move happens */
>> +    trace_amdgpu_bo_move(abo, new_mem ? new_mem->mem_type : -1,
>> +                 old_mem ? old_mem->mem_type : -1);
>>   }
>>     void amdgpu_bo_get_memory(struct amdgpu_bo *bo,
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h 
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
>> index a3ea8a82db23..d28e21baef16 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
>> @@ -344,7 +344,9 @@ int amdgpu_bo_set_metadata (struct amdgpu_bo *bo, 
>> void *metadata,
>>   int amdgpu_bo_get_metadata(struct amdgpu_bo *bo, void *buffer,
>>                  size_t buffer_size, uint32_t *metadata_size,
>>                  uint64_t *flags);
>> -void amdgpu_bo_move_notify(struct ttm_buffer_object *bo, bool evict);
>> +void amdgpu_bo_move_notify(struct ttm_buffer_object *bo,
>> +               bool evict,
>> +               struct ttm_resource *new_mem);
>>   void amdgpu_bo_release_notify(struct ttm_buffer_object *bo);
>>   vm_fault_t amdgpu_bo_fault_reserve_notify(struct ttm_buffer_object 
>> *bo);
>>   void amdgpu_bo_fence(struct amdgpu_bo *bo, struct dma_fence *fence,
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c 
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
>> index a5ceec7820cf..460b23918bfc 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
>> @@ -471,14 +471,16 @@ static int amdgpu_bo_move(struct 
>> ttm_buffer_object *bo, bool evict,
>>         if (!old_mem || (old_mem->mem_type == TTM_PL_SYSTEM &&
>>                bo->ttm == NULL)) {
>> +        amdgpu_bo_move_notify(bo, evict, new_mem);
>>           ttm_bo_move_null(bo, new_mem);
>> -        goto out;
>> +        return 0;
>>       }
>>       if (old_mem->mem_type == TTM_PL_SYSTEM &&
>>           (new_mem->mem_type == TTM_PL_TT ||
>>            new_mem->mem_type == AMDGPU_PL_PREEMPT)) {
>> +        amdgpu_bo_move_notify(bo, evict, new_mem);
>>           ttm_bo_move_null(bo, new_mem);
>> -        goto out;
>> +        return 0;
>>       }
>>       if ((old_mem->mem_type == TTM_PL_TT ||
>>            old_mem->mem_type == AMDGPU_PL_PREEMPT) &&
>> @@ -488,9 +490,10 @@ static int amdgpu_bo_move(struct 
>> ttm_buffer_object *bo, bool evict,
>>               return r;
>>             amdgpu_ttm_backend_unbind(bo->bdev, bo->ttm);
>> +        amdgpu_bo_move_notify(bo, evict, new_mem);
>>           ttm_resource_free(bo, &bo->resource);
>>           ttm_bo_assign_mem(bo, new_mem);
>> -        goto out;
>> +        return 0;
>>       }
>>         if (old_mem->mem_type == AMDGPU_PL_GDS ||
>> @@ -502,8 +505,9 @@ static int amdgpu_bo_move(struct 
>> ttm_buffer_object *bo, bool evict,
>>           new_mem->mem_type == AMDGPU_PL_OA ||
>>           new_mem->mem_type == AMDGPU_PL_DOORBELL) {
>>           /* Nothing to save here */
>> +        amdgpu_bo_move_notify(bo, evict, new_mem);
>>           ttm_bo_move_null(bo, new_mem);
>> -        goto out;
>> +        return 0;
>>       }
>>         if (bo->type == ttm_bo_type_device &&
>> @@ -515,22 +519,23 @@ static int amdgpu_bo_move(struct 
>> ttm_buffer_object *bo, bool evict,
>>           abo->flags &= ~AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED;
>>       }
>>   -    if (adev->mman.buffer_funcs_enabled) {
>> -        if (((old_mem->mem_type == TTM_PL_SYSTEM &&
>> -              new_mem->mem_type == TTM_PL_VRAM) ||
>> -             (old_mem->mem_type == TTM_PL_VRAM &&
>> -              new_mem->mem_type == TTM_PL_SYSTEM))) {
>> -            hop->fpfn = 0;
>> -            hop->lpfn = 0;
>> -            hop->mem_type = TTM_PL_TT;
>> -            hop->flags = TTM_PL_FLAG_TEMPORARY;
>> -            return -EMULTIHOP;
>> -        }
>> +    if (adev->mman.buffer_funcs_enabled &&
>> +        ((old_mem->mem_type == TTM_PL_SYSTEM &&
>> +          new_mem->mem_type == TTM_PL_VRAM) ||
>> +         (old_mem->mem_type == TTM_PL_VRAM &&
>> +          new_mem->mem_type == TTM_PL_SYSTEM))) {
>> +        hop->fpfn = 0;
>> +        hop->lpfn = 0;
>> +        hop->mem_type = TTM_PL_TT;
>> +        hop->flags = TTM_PL_FLAG_TEMPORARY;
>> +        return -EMULTIHOP;
>> +    }
>>   +    amdgpu_bo_move_notify(bo, evict, new_mem);
>> +    if (adev->mman.buffer_funcs_enabled)
>>           r = amdgpu_move_blit(bo, evict, new_mem, old_mem);
>> -    } else {
>> +    else
>>           r = -ENODEV;
>> -    }
>>         if (r) {
>>           /* Check that all memory is CPU accessible */
>> @@ -545,11 +550,10 @@ static int amdgpu_bo_move(struct 
>> ttm_buffer_object *bo, bool evict,
>>               return r;
>>       }
>>   -    trace_amdgpu_bo_move(abo, new_mem->mem_type, old_mem->mem_type);
>> -out:
>> -    /* update statistics */
>> +    /* update statistics after the move */
>> +    if (evict)
>> +        atomic64_inc(&adev->num_evictions);
>>       atomic64_add(bo->base.size, &adev->num_bytes_moved);
>> -    amdgpu_bo_move_notify(bo, evict);
>>       return 0;
>>   }
>>   @@ -1551,7 +1555,7 @@ static int amdgpu_ttm_access_memory(struct 
>> ttm_buffer_object *bo,
>>   static void
>>   amdgpu_bo_delete_mem_notify(struct ttm_buffer_object *bo)
>>   {
>> -    amdgpu_bo_move_notify(bo, false);
>> +    amdgpu_bo_move_notify(bo, false, NULL);
>>   }
>>     static struct ttm_device_funcs amdgpu_bo_driver = {


^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH] drm/amdgpu: once more fix the call oder in amdgpu_ttm_move()
  2024-03-21 12:43 [PATCH] drm/amdgpu: once more fix the call oder in amdgpu_ttm_move() Christian König
  2024-03-21 14:12 ` Tvrtko Ursulin
@ 2024-03-21 21:01 ` Alex Deucher
  1 sibling, 0 replies; 7+ messages in thread
From: Alex Deucher @ 2024-03-21 21:01 UTC (permalink / raw)
  To: Christian König; +Cc: alexander.deucher, amd-gfx

On Thu, Mar 21, 2024 at 8:52 AM Christian König
<ckoenig.leichtzumerken@gmail.com> wrote:
>
> This reverts drm/amdgpu: fix ftrace event amdgpu_bo_move always move
> on same heap. The basic problem here is that after the move the old
> location is simply not available any more.
>
> Some fixes where suggested, but essentially we should call the move

where -> were

> notification before actually moving things because only this way we have
> the correct order for DMA-buf and VM move notifications as well.
>
> Also rework the statistic handling so that we don't update the eviction
> counter before the move.
>
> Signed-off-by: Christian König <christian.koenig@amd.com>

Reviewed-by: Alex Deucher <alexander.deucher@amd.com>

> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_object.c | 15 +++----
>  drivers/gpu/drm/amd/amdgpu/amdgpu_object.h |  4 +-
>  drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c    | 48 ++++++++++++----------
>  3 files changed, 37 insertions(+), 30 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
> index 425cebcc5cbf..eb7d824763b9 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
> @@ -1245,19 +1245,20 @@ int amdgpu_bo_get_metadata(struct amdgpu_bo *bo, void *buffer,
>   * amdgpu_bo_move_notify - notification about a memory move
>   * @bo: pointer to a buffer object
>   * @evict: if this move is evicting the buffer from the graphics address space
> + * @new_mem: new resource for backing the BO
>   *
>   * Marks the corresponding &amdgpu_bo buffer object as invalid, also performs
>   * bookkeeping.
>   * TTM driver callback which is called when ttm moves a buffer.
>   */
> -void amdgpu_bo_move_notify(struct ttm_buffer_object *bo, bool evict)
> +void amdgpu_bo_move_notify(struct ttm_buffer_object *bo,
> +                          bool evict,
> +                          struct ttm_resource *new_mem)
>  {
>         struct amdgpu_device *adev = amdgpu_ttm_adev(bo->bdev);
> +       struct ttm_resource *old_mem = bo->resource;
>         struct amdgpu_bo *abo;
>
> -       if (!amdgpu_bo_is_amdgpu_bo(bo))
> -               return;
> -
>         abo = ttm_to_amdgpu_bo(bo);
>         amdgpu_vm_bo_invalidate(adev, abo, evict);
>
> @@ -1267,9 +1268,9 @@ void amdgpu_bo_move_notify(struct ttm_buffer_object *bo, bool evict)
>             bo->resource->mem_type != TTM_PL_SYSTEM)
>                 dma_buf_move_notify(abo->tbo.base.dma_buf);
>
> -       /* remember the eviction */
> -       if (evict)
> -               atomic64_inc(&adev->num_evictions);
> +       /* move_notify is called before move happens */
> +       trace_amdgpu_bo_move(abo, new_mem ? new_mem->mem_type : -1,
> +                            old_mem ? old_mem->mem_type : -1);
>  }
>
>  void amdgpu_bo_get_memory(struct amdgpu_bo *bo,
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
> index a3ea8a82db23..d28e21baef16 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
> @@ -344,7 +344,9 @@ int amdgpu_bo_set_metadata (struct amdgpu_bo *bo, void *metadata,
>  int amdgpu_bo_get_metadata(struct amdgpu_bo *bo, void *buffer,
>                            size_t buffer_size, uint32_t *metadata_size,
>                            uint64_t *flags);
> -void amdgpu_bo_move_notify(struct ttm_buffer_object *bo, bool evict);
> +void amdgpu_bo_move_notify(struct ttm_buffer_object *bo,
> +                          bool evict,
> +                          struct ttm_resource *new_mem);
>  void amdgpu_bo_release_notify(struct ttm_buffer_object *bo);
>  vm_fault_t amdgpu_bo_fault_reserve_notify(struct ttm_buffer_object *bo);
>  void amdgpu_bo_fence(struct amdgpu_bo *bo, struct dma_fence *fence,
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> index a5ceec7820cf..460b23918bfc 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> @@ -471,14 +471,16 @@ static int amdgpu_bo_move(struct ttm_buffer_object *bo, bool evict,
>
>         if (!old_mem || (old_mem->mem_type == TTM_PL_SYSTEM &&
>                          bo->ttm == NULL)) {
> +               amdgpu_bo_move_notify(bo, evict, new_mem);
>                 ttm_bo_move_null(bo, new_mem);
> -               goto out;
> +               return 0;
>         }
>         if (old_mem->mem_type == TTM_PL_SYSTEM &&
>             (new_mem->mem_type == TTM_PL_TT ||
>              new_mem->mem_type == AMDGPU_PL_PREEMPT)) {
> +               amdgpu_bo_move_notify(bo, evict, new_mem);
>                 ttm_bo_move_null(bo, new_mem);
> -               goto out;
> +               return 0;
>         }
>         if ((old_mem->mem_type == TTM_PL_TT ||
>              old_mem->mem_type == AMDGPU_PL_PREEMPT) &&
> @@ -488,9 +490,10 @@ static int amdgpu_bo_move(struct ttm_buffer_object *bo, bool evict,
>                         return r;
>
>                 amdgpu_ttm_backend_unbind(bo->bdev, bo->ttm);
> +               amdgpu_bo_move_notify(bo, evict, new_mem);
>                 ttm_resource_free(bo, &bo->resource);
>                 ttm_bo_assign_mem(bo, new_mem);
> -               goto out;
> +               return 0;
>         }
>
>         if (old_mem->mem_type == AMDGPU_PL_GDS ||
> @@ -502,8 +505,9 @@ static int amdgpu_bo_move(struct ttm_buffer_object *bo, bool evict,
>             new_mem->mem_type == AMDGPU_PL_OA ||
>             new_mem->mem_type == AMDGPU_PL_DOORBELL) {
>                 /* Nothing to save here */
> +               amdgpu_bo_move_notify(bo, evict, new_mem);
>                 ttm_bo_move_null(bo, new_mem);
> -               goto out;
> +               return 0;
>         }
>
>         if (bo->type == ttm_bo_type_device &&
> @@ -515,22 +519,23 @@ static int amdgpu_bo_move(struct ttm_buffer_object *bo, bool evict,
>                 abo->flags &= ~AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED;
>         }
>
> -       if (adev->mman.buffer_funcs_enabled) {
> -               if (((old_mem->mem_type == TTM_PL_SYSTEM &&
> -                     new_mem->mem_type == TTM_PL_VRAM) ||
> -                    (old_mem->mem_type == TTM_PL_VRAM &&
> -                     new_mem->mem_type == TTM_PL_SYSTEM))) {
> -                       hop->fpfn = 0;
> -                       hop->lpfn = 0;
> -                       hop->mem_type = TTM_PL_TT;
> -                       hop->flags = TTM_PL_FLAG_TEMPORARY;
> -                       return -EMULTIHOP;
> -               }
> +       if (adev->mman.buffer_funcs_enabled &&
> +           ((old_mem->mem_type == TTM_PL_SYSTEM &&
> +             new_mem->mem_type == TTM_PL_VRAM) ||
> +            (old_mem->mem_type == TTM_PL_VRAM &&
> +             new_mem->mem_type == TTM_PL_SYSTEM))) {
> +               hop->fpfn = 0;
> +               hop->lpfn = 0;
> +               hop->mem_type = TTM_PL_TT;
> +               hop->flags = TTM_PL_FLAG_TEMPORARY;
> +               return -EMULTIHOP;
> +       }
>
> +       amdgpu_bo_move_notify(bo, evict, new_mem);
> +       if (adev->mman.buffer_funcs_enabled)
>                 r = amdgpu_move_blit(bo, evict, new_mem, old_mem);
> -       } else {
> +       else
>                 r = -ENODEV;
> -       }
>
>         if (r) {
>                 /* Check that all memory is CPU accessible */
> @@ -545,11 +550,10 @@ static int amdgpu_bo_move(struct ttm_buffer_object *bo, bool evict,
>                         return r;
>         }
>
> -       trace_amdgpu_bo_move(abo, new_mem->mem_type, old_mem->mem_type);
> -out:
> -       /* update statistics */
> +       /* update statistics after the move */
> +       if (evict)
> +               atomic64_inc(&adev->num_evictions);
>         atomic64_add(bo->base.size, &adev->num_bytes_moved);
> -       amdgpu_bo_move_notify(bo, evict);
>         return 0;
>  }
>
> @@ -1551,7 +1555,7 @@ static int amdgpu_ttm_access_memory(struct ttm_buffer_object *bo,
>  static void
>  amdgpu_bo_delete_mem_notify(struct ttm_buffer_object *bo)
>  {
> -       amdgpu_bo_move_notify(bo, false);
> +       amdgpu_bo_move_notify(bo, false, NULL);
>  }
>
>  static struct ttm_device_funcs amdgpu_bo_driver = {
> --
> 2.34.1
>

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH] drm/amdgpu: once more fix the call oder in amdgpu_ttm_move()
  2024-03-21 14:37   ` Christian König
@ 2024-04-05 14:41     ` Oleksandr Natalenko
  2024-04-18 16:10     ` Alex Deucher
  1 sibling, 0 replies; 7+ messages in thread
From: Oleksandr Natalenko @ 2024-04-05 14:41 UTC (permalink / raw)
  To: Christian König; +Cc: Tvrtko Ursulin, alexander.deucher, amd-gfx

[-- Attachment #1: Type: text/plain, Size: 10818 bytes --]

Hello Christian.

On čtvrtek 21. března 2024 15:37:27, CEST Christian König wrote:
> Am 21.03.24 um 15:12 schrieb Tvrtko Ursulin:
> >
> > On 21/03/2024 12:43, Christian König wrote:
> >> This reverts drm/amdgpu: fix ftrace event amdgpu_bo_move always move
> >> on same heap. The basic problem here is that after the move the old
> >> location is simply not available any more.
> >>
> >> Some fixes where suggested, but essentially we should call the move
> >> notification before actually moving things because only this way we have
> >> the correct order for DMA-buf and VM move notifications as well.
> >>
> >> Also rework the statistic handling so that we don't update the eviction
> >> counter before the move.
> >>
> >> Signed-off-by: Christian König <christian.koenig@amd.com>
> >
> > Don't forget:
> >
> > Fixes: 94aeb4117343 ("drm/amdgpu: fix ftrace event amdgpu_bo_move 
> > always move on same heap")
> > Closes: https://gitlab.freedesktop.org/drm/amd/-/issues/3171
> 
> Ah, thanks. I already wanted to ask if there is any bug report about 
> that as well.

Do you happen to know if there's some pre-requisite for this patch to also be picked while backporting your fix into v6.8? I've tried applying this single patch on top of bare v6.8 and got lots of BUGs triggered:

```
BUG: unable to handle page fault for address: 00000000001001c0
#PF: supervisor read access in kernel mode
#PF: error_code(0x0000) - not-present page
PGD 0 P4D 0
Oops: 0000 [#1] PREEMPT SMP NOPTI
CPU: 3 PID: 378 Comm: kworker/u68:0 Not tainted 6.8.0-pf3 #1 30fa7177996c08e3c7c351ca59508acf72424acd
Hardware name: ASUS System Product Name/Pro WS X570-ACE, BIOS 4702 10/20/2023
Workqueue: ttm ttm_bo_delayed_delete [ttm]
RIP: 0010:amdgpu_vm_bo_invalidate+0x22/0x390 [amdgpu]
Code: 90 90 90 90 90 90 90 90 66 0f 1f 00 0f 1f 44 00 00 41 56 41 55 41 54 55 48 89 f5 53 48 8b 86 58 02 00 00 48 85 c0 74 16 31 c9 <83> b8 c0 01 00 00 01 0f 84 cb 02 00 00 48 39 cd 48 0f 44 e8 48 8b
RSP: 0018:ffffa43440e47e00 EFLAGS: 00010246
RAX: 0000000000100000 RBX: ffff93419508ae00 RCX: 0000000000000000
RDX: 0000000000000000 RSI: ffff93419508ada8 RDI: ffff934195900000
RBP: ffff93419508ada8 R08: 0000000000000000 R09: 0000000000000000
R10: 0000000000000001 R11: ffff934180a6ea80 R12: ffff93418302f060
R13: ffff93419508ada8 R14: ffff93418d30de05 R15: ffff93419508afb0
FS:  0000000000000000(0000) GS:ffff93602eac0000(0000) knlGS:0000000000000000
CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
CR2: 00000000001001c0 CR3: 000000010a7ac000 CR4: 0000000000f50ef0
PKRU: 55555554
Call Trace:
 <TASK>
 amdgpu_bo_move_notify+0x3a/0xf0 [amdgpu 84c82d766599797bed2ef6971fa457123a4823ba]
 ttm_bo_delayed_delete+0x59/0xd0 [ttm d0d6b8ddf810a50c01887c0fcb83d6ad65d08ff1]
 process_one_work+0x17b/0x340
 worker_thread+0x301/0x490
 kthread+0xe8/0x120
 ret_from_fork+0x34/0x50
 ret_from_fork_asm+0x1b/0x30
 </TASK>
```

Thank you.

> Regards,
> Christian.
> 
> >
> > ;)
> >
> > Regards,
> >
> > Tvrtko
> >
> >> ---
> >>   drivers/gpu/drm/amd/amdgpu/amdgpu_object.c | 15 +++----
> >>   drivers/gpu/drm/amd/amdgpu/amdgpu_object.h |  4 +-
> >>   drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c    | 48 ++++++++++++----------
> >>   3 files changed, 37 insertions(+), 30 deletions(-)
> >>
> >> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c 
> >> b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
> >> index 425cebcc5cbf..eb7d824763b9 100644
> >> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
> >> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
> >> @@ -1245,19 +1245,20 @@ int amdgpu_bo_get_metadata(struct amdgpu_bo 
> >> *bo, void *buffer,
> >>    * amdgpu_bo_move_notify - notification about a memory move
> >>    * @bo: pointer to a buffer object
> >>    * @evict: if this move is evicting the buffer from the graphics 
> >> address space
> >> + * @new_mem: new resource for backing the BO
> >>    *
> >>    * Marks the corresponding &amdgpu_bo buffer object as invalid, 
> >> also performs
> >>    * bookkeeping.
> >>    * TTM driver callback which is called when ttm moves a buffer.
> >>    */
> >> -void amdgpu_bo_move_notify(struct ttm_buffer_object *bo, bool evict)
> >> +void amdgpu_bo_move_notify(struct ttm_buffer_object *bo,
> >> +               bool evict,
> >> +               struct ttm_resource *new_mem)
> >>   {
> >>       struct amdgpu_device *adev = amdgpu_ttm_adev(bo->bdev);
> >> +    struct ttm_resource *old_mem = bo->resource;
> >>       struct amdgpu_bo *abo;
> >>   -    if (!amdgpu_bo_is_amdgpu_bo(bo))
> >> -        return;
> >> -
> >>       abo = ttm_to_amdgpu_bo(bo);
> >>       amdgpu_vm_bo_invalidate(adev, abo, evict);
> >>   @@ -1267,9 +1268,9 @@ void amdgpu_bo_move_notify(struct 
> >> ttm_buffer_object *bo, bool evict)
> >>           bo->resource->mem_type != TTM_PL_SYSTEM)
> >>           dma_buf_move_notify(abo->tbo.base.dma_buf);
> >>   -    /* remember the eviction */
> >> -    if (evict)
> >> -        atomic64_inc(&adev->num_evictions);
> >> +    /* move_notify is called before move happens */
> >> +    trace_amdgpu_bo_move(abo, new_mem ? new_mem->mem_type : -1,
> >> +                 old_mem ? old_mem->mem_type : -1);
> >>   }
> >>     void amdgpu_bo_get_memory(struct amdgpu_bo *bo,
> >> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h 
> >> b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
> >> index a3ea8a82db23..d28e21baef16 100644
> >> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
> >> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
> >> @@ -344,7 +344,9 @@ int amdgpu_bo_set_metadata (struct amdgpu_bo *bo, 
> >> void *metadata,
> >>   int amdgpu_bo_get_metadata(struct amdgpu_bo *bo, void *buffer,
> >>                  size_t buffer_size, uint32_t *metadata_size,
> >>                  uint64_t *flags);
> >> -void amdgpu_bo_move_notify(struct ttm_buffer_object *bo, bool evict);
> >> +void amdgpu_bo_move_notify(struct ttm_buffer_object *bo,
> >> +               bool evict,
> >> +               struct ttm_resource *new_mem);
> >>   void amdgpu_bo_release_notify(struct ttm_buffer_object *bo);
> >>   vm_fault_t amdgpu_bo_fault_reserve_notify(struct ttm_buffer_object 
> >> *bo);
> >>   void amdgpu_bo_fence(struct amdgpu_bo *bo, struct dma_fence *fence,
> >> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c 
> >> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> >> index a5ceec7820cf..460b23918bfc 100644
> >> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> >> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> >> @@ -471,14 +471,16 @@ static int amdgpu_bo_move(struct 
> >> ttm_buffer_object *bo, bool evict,
> >>         if (!old_mem || (old_mem->mem_type == TTM_PL_SYSTEM &&
> >>                bo->ttm == NULL)) {
> >> +        amdgpu_bo_move_notify(bo, evict, new_mem);
> >>           ttm_bo_move_null(bo, new_mem);
> >> -        goto out;
> >> +        return 0;
> >>       }
> >>       if (old_mem->mem_type == TTM_PL_SYSTEM &&
> >>           (new_mem->mem_type == TTM_PL_TT ||
> >>            new_mem->mem_type == AMDGPU_PL_PREEMPT)) {
> >> +        amdgpu_bo_move_notify(bo, evict, new_mem);
> >>           ttm_bo_move_null(bo, new_mem);
> >> -        goto out;
> >> +        return 0;
> >>       }
> >>       if ((old_mem->mem_type == TTM_PL_TT ||
> >>            old_mem->mem_type == AMDGPU_PL_PREEMPT) &&
> >> @@ -488,9 +490,10 @@ static int amdgpu_bo_move(struct 
> >> ttm_buffer_object *bo, bool evict,
> >>               return r;
> >>             amdgpu_ttm_backend_unbind(bo->bdev, bo->ttm);
> >> +        amdgpu_bo_move_notify(bo, evict, new_mem);
> >>           ttm_resource_free(bo, &bo->resource);
> >>           ttm_bo_assign_mem(bo, new_mem);
> >> -        goto out;
> >> +        return 0;
> >>       }
> >>         if (old_mem->mem_type == AMDGPU_PL_GDS ||
> >> @@ -502,8 +505,9 @@ static int amdgpu_bo_move(struct 
> >> ttm_buffer_object *bo, bool evict,
> >>           new_mem->mem_type == AMDGPU_PL_OA ||
> >>           new_mem->mem_type == AMDGPU_PL_DOORBELL) {
> >>           /* Nothing to save here */
> >> +        amdgpu_bo_move_notify(bo, evict, new_mem);
> >>           ttm_bo_move_null(bo, new_mem);
> >> -        goto out;
> >> +        return 0;
> >>       }
> >>         if (bo->type == ttm_bo_type_device &&
> >> @@ -515,22 +519,23 @@ static int amdgpu_bo_move(struct 
> >> ttm_buffer_object *bo, bool evict,
> >>           abo->flags &= ~AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED;
> >>       }
> >>   -    if (adev->mman.buffer_funcs_enabled) {
> >> -        if (((old_mem->mem_type == TTM_PL_SYSTEM &&
> >> -              new_mem->mem_type == TTM_PL_VRAM) ||
> >> -             (old_mem->mem_type == TTM_PL_VRAM &&
> >> -              new_mem->mem_type == TTM_PL_SYSTEM))) {
> >> -            hop->fpfn = 0;
> >> -            hop->lpfn = 0;
> >> -            hop->mem_type = TTM_PL_TT;
> >> -            hop->flags = TTM_PL_FLAG_TEMPORARY;
> >> -            return -EMULTIHOP;
> >> -        }
> >> +    if (adev->mman.buffer_funcs_enabled &&
> >> +        ((old_mem->mem_type == TTM_PL_SYSTEM &&
> >> +          new_mem->mem_type == TTM_PL_VRAM) ||
> >> +         (old_mem->mem_type == TTM_PL_VRAM &&
> >> +          new_mem->mem_type == TTM_PL_SYSTEM))) {
> >> +        hop->fpfn = 0;
> >> +        hop->lpfn = 0;
> >> +        hop->mem_type = TTM_PL_TT;
> >> +        hop->flags = TTM_PL_FLAG_TEMPORARY;
> >> +        return -EMULTIHOP;
> >> +    }
> >>   +    amdgpu_bo_move_notify(bo, evict, new_mem);
> >> +    if (adev->mman.buffer_funcs_enabled)
> >>           r = amdgpu_move_blit(bo, evict, new_mem, old_mem);
> >> -    } else {
> >> +    else
> >>           r = -ENODEV;
> >> -    }
> >>         if (r) {
> >>           /* Check that all memory is CPU accessible */
> >> @@ -545,11 +550,10 @@ static int amdgpu_bo_move(struct 
> >> ttm_buffer_object *bo, bool evict,
> >>               return r;
> >>       }
> >>   -    trace_amdgpu_bo_move(abo, new_mem->mem_type, old_mem->mem_type);
> >> -out:
> >> -    /* update statistics */
> >> +    /* update statistics after the move */
> >> +    if (evict)
> >> +        atomic64_inc(&adev->num_evictions);
> >>       atomic64_add(bo->base.size, &adev->num_bytes_moved);
> >> -    amdgpu_bo_move_notify(bo, evict);
> >>       return 0;
> >>   }
> >>   @@ -1551,7 +1555,7 @@ static int amdgpu_ttm_access_memory(struct 
> >> ttm_buffer_object *bo,
> >>   static void
> >>   amdgpu_bo_delete_mem_notify(struct ttm_buffer_object *bo)
> >>   {
> >> -    amdgpu_bo_move_notify(bo, false);
> >> +    amdgpu_bo_move_notify(bo, false, NULL);
> >>   }
> >>     static struct ttm_device_funcs amdgpu_bo_driver = {
> 
> 
> 


-- 
Oleksandr Natalenko (post-factum)

[-- Attachment #2: This is a digitally signed message part. --]
[-- Type: application/pgp-signature, Size: 833 bytes --]

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH] drm/amdgpu: once more fix the call oder in amdgpu_ttm_move()
  2024-03-21 14:37   ` Christian König
  2024-04-05 14:41     ` Oleksandr Natalenko
@ 2024-04-18 16:10     ` Alex Deucher
  2024-04-22 15:05       ` Christian König
  1 sibling, 1 reply; 7+ messages in thread
From: Alex Deucher @ 2024-04-18 16:10 UTC (permalink / raw)
  To: Christian König; +Cc: Tvrtko Ursulin, alexander.deucher, amd-gfx

On Thu, Mar 21, 2024 at 10:37 AM Christian König
<ckoenig.leichtzumerken@gmail.com> wrote:
>
> Am 21.03.24 um 15:12 schrieb Tvrtko Ursulin:
> >
> > On 21/03/2024 12:43, Christian König wrote:
> >> This reverts drm/amdgpu: fix ftrace event amdgpu_bo_move always move
> >> on same heap. The basic problem here is that after the move the old
> >> location is simply not available any more.
> >>
> >> Some fixes where suggested, but essentially we should call the move
> >> notification before actually moving things because only this way we have
> >> the correct order for DMA-buf and VM move notifications as well.
> >>
> >> Also rework the statistic handling so that we don't update the eviction
> >> counter before the move.
> >>
> >> Signed-off-by: Christian König <christian.koenig@amd.com>
> >
> > Don't forget:
> >
> > Fixes: 94aeb4117343 ("drm/amdgpu: fix ftrace event amdgpu_bo_move
> > always move on same heap")
> > Closes: https://gitlab.freedesktop.org/drm/amd/-/issues/3171
>
> Ah, thanks. I already wanted to ask if there is any bug report about
> that as well.

Did this ever land?  I don't see it anywhere.

Alex

>
> Regards,
> Christian.
>
> >
> > ;)
> >
> > Regards,
> >
> > Tvrtko
> >
> >> ---
> >>   drivers/gpu/drm/amd/amdgpu/amdgpu_object.c | 15 +++----
> >>   drivers/gpu/drm/amd/amdgpu/amdgpu_object.h |  4 +-
> >>   drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c    | 48 ++++++++++++----------
> >>   3 files changed, 37 insertions(+), 30 deletions(-)
> >>
> >> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
> >> b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
> >> index 425cebcc5cbf..eb7d824763b9 100644
> >> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
> >> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
> >> @@ -1245,19 +1245,20 @@ int amdgpu_bo_get_metadata(struct amdgpu_bo
> >> *bo, void *buffer,
> >>    * amdgpu_bo_move_notify - notification about a memory move
> >>    * @bo: pointer to a buffer object
> >>    * @evict: if this move is evicting the buffer from the graphics
> >> address space
> >> + * @new_mem: new resource for backing the BO
> >>    *
> >>    * Marks the corresponding &amdgpu_bo buffer object as invalid,
> >> also performs
> >>    * bookkeeping.
> >>    * TTM driver callback which is called when ttm moves a buffer.
> >>    */
> >> -void amdgpu_bo_move_notify(struct ttm_buffer_object *bo, bool evict)
> >> +void amdgpu_bo_move_notify(struct ttm_buffer_object *bo,
> >> +               bool evict,
> >> +               struct ttm_resource *new_mem)
> >>   {
> >>       struct amdgpu_device *adev = amdgpu_ttm_adev(bo->bdev);
> >> +    struct ttm_resource *old_mem = bo->resource;
> >>       struct amdgpu_bo *abo;
> >>   -    if (!amdgpu_bo_is_amdgpu_bo(bo))
> >> -        return;
> >> -
> >>       abo = ttm_to_amdgpu_bo(bo);
> >>       amdgpu_vm_bo_invalidate(adev, abo, evict);
> >>   @@ -1267,9 +1268,9 @@ void amdgpu_bo_move_notify(struct
> >> ttm_buffer_object *bo, bool evict)
> >>           bo->resource->mem_type != TTM_PL_SYSTEM)
> >>           dma_buf_move_notify(abo->tbo.base.dma_buf);
> >>   -    /* remember the eviction */
> >> -    if (evict)
> >> -        atomic64_inc(&adev->num_evictions);
> >> +    /* move_notify is called before move happens */
> >> +    trace_amdgpu_bo_move(abo, new_mem ? new_mem->mem_type : -1,
> >> +                 old_mem ? old_mem->mem_type : -1);
> >>   }
> >>     void amdgpu_bo_get_memory(struct amdgpu_bo *bo,
> >> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
> >> b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
> >> index a3ea8a82db23..d28e21baef16 100644
> >> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
> >> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
> >> @@ -344,7 +344,9 @@ int amdgpu_bo_set_metadata (struct amdgpu_bo *bo,
> >> void *metadata,
> >>   int amdgpu_bo_get_metadata(struct amdgpu_bo *bo, void *buffer,
> >>                  size_t buffer_size, uint32_t *metadata_size,
> >>                  uint64_t *flags);
> >> -void amdgpu_bo_move_notify(struct ttm_buffer_object *bo, bool evict);
> >> +void amdgpu_bo_move_notify(struct ttm_buffer_object *bo,
> >> +               bool evict,
> >> +               struct ttm_resource *new_mem);
> >>   void amdgpu_bo_release_notify(struct ttm_buffer_object *bo);
> >>   vm_fault_t amdgpu_bo_fault_reserve_notify(struct ttm_buffer_object
> >> *bo);
> >>   void amdgpu_bo_fence(struct amdgpu_bo *bo, struct dma_fence *fence,
> >> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> >> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> >> index a5ceec7820cf..460b23918bfc 100644
> >> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> >> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> >> @@ -471,14 +471,16 @@ static int amdgpu_bo_move(struct
> >> ttm_buffer_object *bo, bool evict,
> >>         if (!old_mem || (old_mem->mem_type == TTM_PL_SYSTEM &&
> >>                bo->ttm == NULL)) {
> >> +        amdgpu_bo_move_notify(bo, evict, new_mem);
> >>           ttm_bo_move_null(bo, new_mem);
> >> -        goto out;
> >> +        return 0;
> >>       }
> >>       if (old_mem->mem_type == TTM_PL_SYSTEM &&
> >>           (new_mem->mem_type == TTM_PL_TT ||
> >>            new_mem->mem_type == AMDGPU_PL_PREEMPT)) {
> >> +        amdgpu_bo_move_notify(bo, evict, new_mem);
> >>           ttm_bo_move_null(bo, new_mem);
> >> -        goto out;
> >> +        return 0;
> >>       }
> >>       if ((old_mem->mem_type == TTM_PL_TT ||
> >>            old_mem->mem_type == AMDGPU_PL_PREEMPT) &&
> >> @@ -488,9 +490,10 @@ static int amdgpu_bo_move(struct
> >> ttm_buffer_object *bo, bool evict,
> >>               return r;
> >>             amdgpu_ttm_backend_unbind(bo->bdev, bo->ttm);
> >> +        amdgpu_bo_move_notify(bo, evict, new_mem);
> >>           ttm_resource_free(bo, &bo->resource);
> >>           ttm_bo_assign_mem(bo, new_mem);
> >> -        goto out;
> >> +        return 0;
> >>       }
> >>         if (old_mem->mem_type == AMDGPU_PL_GDS ||
> >> @@ -502,8 +505,9 @@ static int amdgpu_bo_move(struct
> >> ttm_buffer_object *bo, bool evict,
> >>           new_mem->mem_type == AMDGPU_PL_OA ||
> >>           new_mem->mem_type == AMDGPU_PL_DOORBELL) {
> >>           /* Nothing to save here */
> >> +        amdgpu_bo_move_notify(bo, evict, new_mem);
> >>           ttm_bo_move_null(bo, new_mem);
> >> -        goto out;
> >> +        return 0;
> >>       }
> >>         if (bo->type == ttm_bo_type_device &&
> >> @@ -515,22 +519,23 @@ static int amdgpu_bo_move(struct
> >> ttm_buffer_object *bo, bool evict,
> >>           abo->flags &= ~AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED;
> >>       }
> >>   -    if (adev->mman.buffer_funcs_enabled) {
> >> -        if (((old_mem->mem_type == TTM_PL_SYSTEM &&
> >> -              new_mem->mem_type == TTM_PL_VRAM) ||
> >> -             (old_mem->mem_type == TTM_PL_VRAM &&
> >> -              new_mem->mem_type == TTM_PL_SYSTEM))) {
> >> -            hop->fpfn = 0;
> >> -            hop->lpfn = 0;
> >> -            hop->mem_type = TTM_PL_TT;
> >> -            hop->flags = TTM_PL_FLAG_TEMPORARY;
> >> -            return -EMULTIHOP;
> >> -        }
> >> +    if (adev->mman.buffer_funcs_enabled &&
> >> +        ((old_mem->mem_type == TTM_PL_SYSTEM &&
> >> +          new_mem->mem_type == TTM_PL_VRAM) ||
> >> +         (old_mem->mem_type == TTM_PL_VRAM &&
> >> +          new_mem->mem_type == TTM_PL_SYSTEM))) {
> >> +        hop->fpfn = 0;
> >> +        hop->lpfn = 0;
> >> +        hop->mem_type = TTM_PL_TT;
> >> +        hop->flags = TTM_PL_FLAG_TEMPORARY;
> >> +        return -EMULTIHOP;
> >> +    }
> >>   +    amdgpu_bo_move_notify(bo, evict, new_mem);
> >> +    if (adev->mman.buffer_funcs_enabled)
> >>           r = amdgpu_move_blit(bo, evict, new_mem, old_mem);
> >> -    } else {
> >> +    else
> >>           r = -ENODEV;
> >> -    }
> >>         if (r) {
> >>           /* Check that all memory is CPU accessible */
> >> @@ -545,11 +550,10 @@ static int amdgpu_bo_move(struct
> >> ttm_buffer_object *bo, bool evict,
> >>               return r;
> >>       }
> >>   -    trace_amdgpu_bo_move(abo, new_mem->mem_type, old_mem->mem_type);
> >> -out:
> >> -    /* update statistics */
> >> +    /* update statistics after the move */
> >> +    if (evict)
> >> +        atomic64_inc(&adev->num_evictions);
> >>       atomic64_add(bo->base.size, &adev->num_bytes_moved);
> >> -    amdgpu_bo_move_notify(bo, evict);
> >>       return 0;
> >>   }
> >>   @@ -1551,7 +1555,7 @@ static int amdgpu_ttm_access_memory(struct
> >> ttm_buffer_object *bo,
> >>   static void
> >>   amdgpu_bo_delete_mem_notify(struct ttm_buffer_object *bo)
> >>   {
> >> -    amdgpu_bo_move_notify(bo, false);
> >> +    amdgpu_bo_move_notify(bo, false, NULL);
> >>   }
> >>     static struct ttm_device_funcs amdgpu_bo_driver = {
>

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH] drm/amdgpu: once more fix the call oder in amdgpu_ttm_move()
  2024-04-18 16:10     ` Alex Deucher
@ 2024-04-22 15:05       ` Christian König
  0 siblings, 0 replies; 7+ messages in thread
From: Christian König @ 2024-04-22 15:05 UTC (permalink / raw)
  To: Alex Deucher; +Cc: Tvrtko Ursulin, alexander.deucher, amd-gfx

Am 18.04.24 um 18:10 schrieb Alex Deucher:
> On Thu, Mar 21, 2024 at 10:37 AM Christian König
> <ckoenig.leichtzumerken@gmail.com> wrote:
>> Am 21.03.24 um 15:12 schrieb Tvrtko Ursulin:
>>> On 21/03/2024 12:43, Christian König wrote:
>>>> This reverts drm/amdgpu: fix ftrace event amdgpu_bo_move always move
>>>> on same heap. The basic problem here is that after the move the old
>>>> location is simply not available any more.
>>>>
>>>> Some fixes where suggested, but essentially we should call the move
>>>> notification before actually moving things because only this way we have
>>>> the correct order for DMA-buf and VM move notifications as well.
>>>>
>>>> Also rework the statistic handling so that we don't update the eviction
>>>> counter before the move.
>>>>
>>>> Signed-off-by: Christian König <christian.koenig@amd.com>
>>> Don't forget:
>>>
>>> Fixes: 94aeb4117343 ("drm/amdgpu: fix ftrace event amdgpu_bo_move
>>> always move on same heap")
>>> Closes: https://gitlab.freedesktop.org/drm/amd/-/issues/3171
>> Ah, thanks. I already wanted to ask if there is any bug report about
>> that as well.
> Did this ever land?  I don't see it anywhere.

No, I never found time to actually rebase and push it.

Just did so 10 minutes ago, should probably show up in 
amd-staging-drm-next unless there isn't any CI hickup again.

Christian.

>
> Alex
>
>> Regards,
>> Christian.
>>
>>> ;)
>>>
>>> Regards,
>>>
>>> Tvrtko
>>>
>>>> ---
>>>>    drivers/gpu/drm/amd/amdgpu/amdgpu_object.c | 15 +++----
>>>>    drivers/gpu/drm/amd/amdgpu/amdgpu_object.h |  4 +-
>>>>    drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c    | 48 ++++++++++++----------
>>>>    3 files changed, 37 insertions(+), 30 deletions(-)
>>>>
>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
>>>> index 425cebcc5cbf..eb7d824763b9 100644
>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
>>>> @@ -1245,19 +1245,20 @@ int amdgpu_bo_get_metadata(struct amdgpu_bo
>>>> *bo, void *buffer,
>>>>     * amdgpu_bo_move_notify - notification about a memory move
>>>>     * @bo: pointer to a buffer object
>>>>     * @evict: if this move is evicting the buffer from the graphics
>>>> address space
>>>> + * @new_mem: new resource for backing the BO
>>>>     *
>>>>     * Marks the corresponding &amdgpu_bo buffer object as invalid,
>>>> also performs
>>>>     * bookkeeping.
>>>>     * TTM driver callback which is called when ttm moves a buffer.
>>>>     */
>>>> -void amdgpu_bo_move_notify(struct ttm_buffer_object *bo, bool evict)
>>>> +void amdgpu_bo_move_notify(struct ttm_buffer_object *bo,
>>>> +               bool evict,
>>>> +               struct ttm_resource *new_mem)
>>>>    {
>>>>        struct amdgpu_device *adev = amdgpu_ttm_adev(bo->bdev);
>>>> +    struct ttm_resource *old_mem = bo->resource;
>>>>        struct amdgpu_bo *abo;
>>>>    -    if (!amdgpu_bo_is_amdgpu_bo(bo))
>>>> -        return;
>>>> -
>>>>        abo = ttm_to_amdgpu_bo(bo);
>>>>        amdgpu_vm_bo_invalidate(adev, abo, evict);
>>>>    @@ -1267,9 +1268,9 @@ void amdgpu_bo_move_notify(struct
>>>> ttm_buffer_object *bo, bool evict)
>>>>            bo->resource->mem_type != TTM_PL_SYSTEM)
>>>>            dma_buf_move_notify(abo->tbo.base.dma_buf);
>>>>    -    /* remember the eviction */
>>>> -    if (evict)
>>>> -        atomic64_inc(&adev->num_evictions);
>>>> +    /* move_notify is called before move happens */
>>>> +    trace_amdgpu_bo_move(abo, new_mem ? new_mem->mem_type : -1,
>>>> +                 old_mem ? old_mem->mem_type : -1);
>>>>    }
>>>>      void amdgpu_bo_get_memory(struct amdgpu_bo *bo,
>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
>>>> index a3ea8a82db23..d28e21baef16 100644
>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
>>>> @@ -344,7 +344,9 @@ int amdgpu_bo_set_metadata (struct amdgpu_bo *bo,
>>>> void *metadata,
>>>>    int amdgpu_bo_get_metadata(struct amdgpu_bo *bo, void *buffer,
>>>>                   size_t buffer_size, uint32_t *metadata_size,
>>>>                   uint64_t *flags);
>>>> -void amdgpu_bo_move_notify(struct ttm_buffer_object *bo, bool evict);
>>>> +void amdgpu_bo_move_notify(struct ttm_buffer_object *bo,
>>>> +               bool evict,
>>>> +               struct ttm_resource *new_mem);
>>>>    void amdgpu_bo_release_notify(struct ttm_buffer_object *bo);
>>>>    vm_fault_t amdgpu_bo_fault_reserve_notify(struct ttm_buffer_object
>>>> *bo);
>>>>    void amdgpu_bo_fence(struct amdgpu_bo *bo, struct dma_fence *fence,
>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
>>>> index a5ceec7820cf..460b23918bfc 100644
>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
>>>> @@ -471,14 +471,16 @@ static int amdgpu_bo_move(struct
>>>> ttm_buffer_object *bo, bool evict,
>>>>          if (!old_mem || (old_mem->mem_type == TTM_PL_SYSTEM &&
>>>>                 bo->ttm == NULL)) {
>>>> +        amdgpu_bo_move_notify(bo, evict, new_mem);
>>>>            ttm_bo_move_null(bo, new_mem);
>>>> -        goto out;
>>>> +        return 0;
>>>>        }
>>>>        if (old_mem->mem_type == TTM_PL_SYSTEM &&
>>>>            (new_mem->mem_type == TTM_PL_TT ||
>>>>             new_mem->mem_type == AMDGPU_PL_PREEMPT)) {
>>>> +        amdgpu_bo_move_notify(bo, evict, new_mem);
>>>>            ttm_bo_move_null(bo, new_mem);
>>>> -        goto out;
>>>> +        return 0;
>>>>        }
>>>>        if ((old_mem->mem_type == TTM_PL_TT ||
>>>>             old_mem->mem_type == AMDGPU_PL_PREEMPT) &&
>>>> @@ -488,9 +490,10 @@ static int amdgpu_bo_move(struct
>>>> ttm_buffer_object *bo, bool evict,
>>>>                return r;
>>>>              amdgpu_ttm_backend_unbind(bo->bdev, bo->ttm);
>>>> +        amdgpu_bo_move_notify(bo, evict, new_mem);
>>>>            ttm_resource_free(bo, &bo->resource);
>>>>            ttm_bo_assign_mem(bo, new_mem);
>>>> -        goto out;
>>>> +        return 0;
>>>>        }
>>>>          if (old_mem->mem_type == AMDGPU_PL_GDS ||
>>>> @@ -502,8 +505,9 @@ static int amdgpu_bo_move(struct
>>>> ttm_buffer_object *bo, bool evict,
>>>>            new_mem->mem_type == AMDGPU_PL_OA ||
>>>>            new_mem->mem_type == AMDGPU_PL_DOORBELL) {
>>>>            /* Nothing to save here */
>>>> +        amdgpu_bo_move_notify(bo, evict, new_mem);
>>>>            ttm_bo_move_null(bo, new_mem);
>>>> -        goto out;
>>>> +        return 0;
>>>>        }
>>>>          if (bo->type == ttm_bo_type_device &&
>>>> @@ -515,22 +519,23 @@ static int amdgpu_bo_move(struct
>>>> ttm_buffer_object *bo, bool evict,
>>>>            abo->flags &= ~AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED;
>>>>        }
>>>>    -    if (adev->mman.buffer_funcs_enabled) {
>>>> -        if (((old_mem->mem_type == TTM_PL_SYSTEM &&
>>>> -              new_mem->mem_type == TTM_PL_VRAM) ||
>>>> -             (old_mem->mem_type == TTM_PL_VRAM &&
>>>> -              new_mem->mem_type == TTM_PL_SYSTEM))) {
>>>> -            hop->fpfn = 0;
>>>> -            hop->lpfn = 0;
>>>> -            hop->mem_type = TTM_PL_TT;
>>>> -            hop->flags = TTM_PL_FLAG_TEMPORARY;
>>>> -            return -EMULTIHOP;
>>>> -        }
>>>> +    if (adev->mman.buffer_funcs_enabled &&
>>>> +        ((old_mem->mem_type == TTM_PL_SYSTEM &&
>>>> +          new_mem->mem_type == TTM_PL_VRAM) ||
>>>> +         (old_mem->mem_type == TTM_PL_VRAM &&
>>>> +          new_mem->mem_type == TTM_PL_SYSTEM))) {
>>>> +        hop->fpfn = 0;
>>>> +        hop->lpfn = 0;
>>>> +        hop->mem_type = TTM_PL_TT;
>>>> +        hop->flags = TTM_PL_FLAG_TEMPORARY;
>>>> +        return -EMULTIHOP;
>>>> +    }
>>>>    +    amdgpu_bo_move_notify(bo, evict, new_mem);
>>>> +    if (adev->mman.buffer_funcs_enabled)
>>>>            r = amdgpu_move_blit(bo, evict, new_mem, old_mem);
>>>> -    } else {
>>>> +    else
>>>>            r = -ENODEV;
>>>> -    }
>>>>          if (r) {
>>>>            /* Check that all memory is CPU accessible */
>>>> @@ -545,11 +550,10 @@ static int amdgpu_bo_move(struct
>>>> ttm_buffer_object *bo, bool evict,
>>>>                return r;
>>>>        }
>>>>    -    trace_amdgpu_bo_move(abo, new_mem->mem_type, old_mem->mem_type);
>>>> -out:
>>>> -    /* update statistics */
>>>> +    /* update statistics after the move */
>>>> +    if (evict)
>>>> +        atomic64_inc(&adev->num_evictions);
>>>>        atomic64_add(bo->base.size, &adev->num_bytes_moved);
>>>> -    amdgpu_bo_move_notify(bo, evict);
>>>>        return 0;
>>>>    }
>>>>    @@ -1551,7 +1555,7 @@ static int amdgpu_ttm_access_memory(struct
>>>> ttm_buffer_object *bo,
>>>>    static void
>>>>    amdgpu_bo_delete_mem_notify(struct ttm_buffer_object *bo)
>>>>    {
>>>> -    amdgpu_bo_move_notify(bo, false);
>>>> +    amdgpu_bo_move_notify(bo, false, NULL);
>>>>    }
>>>>      static struct ttm_device_funcs amdgpu_bo_driver = {


^ permalink raw reply	[flat|nested] 7+ messages in thread

end of thread, other threads:[~2024-04-22 15:05 UTC | newest]

Thread overview: 7+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2024-03-21 12:43 [PATCH] drm/amdgpu: once more fix the call oder in amdgpu_ttm_move() Christian König
2024-03-21 14:12 ` Tvrtko Ursulin
2024-03-21 14:37   ` Christian König
2024-04-05 14:41     ` Oleksandr Natalenko
2024-04-18 16:10     ` Alex Deucher
2024-04-22 15:05       ` Christian König
2024-03-21 21:01 ` Alex Deucher

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.