public inbox for amd-gfx@lists.freedesktop.org
 help / color / mirror / Atom feed
* [PATCH 1/2] drm/amdgpu: move size calculations to the front of the file again
@ 2018-08-31 13:27 Christian König
       [not found] ` <20180831132741.112280-1-christian.koenig-5C7GfCeVMHo@public.gmane.org>
  0 siblings, 1 reply; 7+ messages in thread
From: Christian König @ 2018-08-31 13:27 UTC (permalink / raw)
  To: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW

amdgpu_vm_bo_* functions should come much later.

Signed-off-by: Christian König <christian.koenig@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 90 +++++++++++++++++-----------------
 1 file changed, 45 insertions(+), 45 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index d59222fb5931..a9275a99d793 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -133,51 +133,6 @@ struct amdgpu_prt_cb {
 	struct dma_fence_cb cb;
 };
 
-/**
- * amdgpu_vm_bo_base_init - Adds bo to the list of bos associated with the vm
- *
- * @base: base structure for tracking BO usage in a VM
- * @vm: vm to which bo is to be added
- * @bo: amdgpu buffer object
- *
- * Initialize a bo_va_base structure and add it to the appropriate lists
- *
- */
-static void amdgpu_vm_bo_base_init(struct amdgpu_vm_bo_base *base,
-				   struct amdgpu_vm *vm,
-				   struct amdgpu_bo *bo)
-{
-	base->vm = vm;
-	base->bo = bo;
-	INIT_LIST_HEAD(&base->bo_list);
-	INIT_LIST_HEAD(&base->vm_status);
-
-	if (!bo)
-		return;
-	list_add_tail(&base->bo_list, &bo->va);
-
-	if (bo->tbo.resv != vm->root.base.bo->tbo.resv)
-		return;
-
-	vm->bulk_moveable = false;
-	if (bo->tbo.type == ttm_bo_type_kernel)
-		list_move(&base->vm_status, &vm->relocated);
-	else
-		list_move(&base->vm_status, &vm->idle);
-
-	if (bo->preferred_domains &
-	    amdgpu_mem_type_to_domain(bo->tbo.mem.mem_type))
-		return;
-
-	/*
-	 * we checked all the prerequisites, but it looks like this per vm bo
-	 * is currently evicted. add the bo to the evicted list to make sure it
-	 * is validated on next vm use to avoid fault.
-	 * */
-	list_move_tail(&base->vm_status, &vm->evicted);
-	base->moved = true;
-}
-
 /**
  * amdgpu_vm_level_shift - return the addr shift for each level
  *
@@ -249,6 +204,51 @@ static unsigned amdgpu_vm_bo_size(struct amdgpu_device *adev, unsigned level)
 	return AMDGPU_GPU_PAGE_ALIGN(amdgpu_vm_num_entries(adev, level) * 8);
 }
 
+/**
+ * amdgpu_vm_bo_base_init - Adds bo to the list of bos associated with the vm
+ *
+ * @base: base structure for tracking BO usage in a VM
+ * @vm: vm to which bo is to be added
+ * @bo: amdgpu buffer object
+ *
+ * Initialize a bo_va_base structure and add it to the appropriate lists
+ *
+ */
+static void amdgpu_vm_bo_base_init(struct amdgpu_vm_bo_base *base,
+				   struct amdgpu_vm *vm,
+				   struct amdgpu_bo *bo)
+{
+	base->vm = vm;
+	base->bo = bo;
+	INIT_LIST_HEAD(&base->bo_list);
+	INIT_LIST_HEAD(&base->vm_status);
+
+	if (!bo)
+		return;
+	list_add_tail(&base->bo_list, &bo->va);
+
+	if (bo->tbo.resv != vm->root.base.bo->tbo.resv)
+		return;
+
+	vm->bulk_moveable = false;
+	if (bo->tbo.type == ttm_bo_type_kernel)
+		list_move(&base->vm_status, &vm->relocated);
+	else
+		list_move(&base->vm_status, &vm->idle);
+
+	if (bo->preferred_domains &
+	    amdgpu_mem_type_to_domain(bo->tbo.mem.mem_type))
+		return;
+
+	/*
+	 * we checked all the prerequisites, but it looks like this per vm bo
+	 * is currently evicted. add the bo to the evicted list to make sure it
+	 * is validated on next vm use to avoid fault.
+	 * */
+	list_move_tail(&base->vm_status, &vm->evicted);
+	base->moved = true;
+}
+
 /**
  * amdgpu_vm_get_pd_bo - add the VM PD to a validation list
  *
-- 
2.14.1

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 7+ messages in thread

* [PATCH 2/2] drm/amdgpu: improve VM state machine documentation
       [not found] ` <20180831132741.112280-1-christian.koenig-5C7GfCeVMHo@public.gmane.org>
@ 2018-08-31 13:27   ` Christian König
       [not found]     ` <20180831132741.112280-2-christian.koenig-5C7GfCeVMHo@public.gmane.org>
  2018-09-03  2:52   ` [PATCH 1/2] drm/amdgpu: move size calculations to the front of the file again Zhang, Jerry (Junwei)
  1 sibling, 1 reply; 7+ messages in thread
From: Christian König @ 2018-08-31 13:27 UTC (permalink / raw)
  To: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW

Since we have a lot of FAQ on the VM state machine try to improve the
documentation by adding functions for each state move.

Signed-off-by: Christian König <christian.koenig@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 107 ++++++++++++++++++++++++---------
 1 file changed, 79 insertions(+), 28 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index a9275a99d793..40c22635fefd 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -204,6 +204,69 @@ static unsigned amdgpu_vm_bo_size(struct amdgpu_device *adev, unsigned level)
 	return AMDGPU_GPU_PAGE_ALIGN(amdgpu_vm_num_entries(adev, level) * 8);
 }
 
+/**
+ * amdgpu_vm_bo_evicted - vm_bo is evicted
+ *
+ * @vm_bo: vm_bo which is evicted
+ *
+ * State for PDs/PTs and per VM BOs which are not at the location they should
+ * be.
+ */
+static void amdgpu_vm_bo_evicted(struct amdgpu_vm_bo_base *vm_bo)
+{
+	struct amdgpu_vm *vm = vm_bo->vm;
+	struct amdgpu_bo *bo = vm_bo->bo;
+
+	vm_bo->moved = true;
+	if (bo->tbo.type == ttm_bo_type_kernel)
+		list_move(&vm_bo->vm_status, &vm->evicted);
+	else
+		list_move_tail(&vm_bo->vm_status, &vm->evicted);
+}
+
+/**
+ * amdgpu_vm_bo_relocated - vm_bo is reloacted
+ *
+ * @vm_bo: vm_bo which is relocated
+ *
+ * State for PDs/PTs which needs to update their parent PD.
+ */
+static void amdgpu_vm_bo_relocated(struct amdgpu_vm_bo_base *vm_bo)
+{
+	list_move(&vm_bo->vm_status, &vm_bo->vm->relocated);
+}
+
+/**
+ * amdgpu_vm_bo_moved - vm_bo is moved
+ *
+ * @vm_bo: vm_bo which is moved
+ *
+ * State for per VM and normal BOs which are moved, but that change is not yet
+ * reflected in the page tables.
+ */
+static void amdgpu_vm_bo_moved(struct amdgpu_vm_bo_base *vm_bo)
+{
+	struct amdgpu_vm *vm = vm_bo->vm;
+
+	spin_lock(&vm->moved_lock);
+	list_move(&vm_bo->vm_status, &vm->moved);
+	spin_unlock(&vm->moved_lock);
+}
+
+/**
+ * amdgpu_vm_bo_idle - vm_bo is idle
+ *
+ * @vm_bo: vm_bo which is now idle
+ *
+ * State for PDs/PTs and per VM BOs which have gone through the state machine
+ * and are now idle.
+ */
+static void amdgpu_vm_bo_idle(struct amdgpu_vm_bo_base *vm_bo)
+{
+	list_move(&vm_bo->vm_status, &vm_bo->vm->idle);
+	vm_bo->moved = false;
+}
+
 /**
  * amdgpu_vm_bo_base_init - Adds bo to the list of bos associated with the vm
  *
@@ -232,9 +295,9 @@ static void amdgpu_vm_bo_base_init(struct amdgpu_vm_bo_base *base,
 
 	vm->bulk_moveable = false;
 	if (bo->tbo.type == ttm_bo_type_kernel)
-		list_move(&base->vm_status, &vm->relocated);
+		amdgpu_vm_bo_relocated(base);
 	else
-		list_move(&base->vm_status, &vm->idle);
+		amdgpu_vm_bo_idle(base);
 
 	if (bo->preferred_domains &
 	    amdgpu_mem_type_to_domain(bo->tbo.mem.mem_type))
@@ -245,8 +308,7 @@ static void amdgpu_vm_bo_base_init(struct amdgpu_vm_bo_base *base,
 	 * is currently evicted. add the bo to the evicted list to make sure it
 	 * is validated on next vm use to avoid fault.
 	 * */
-	list_move_tail(&base->vm_status, &vm->evicted);
-	base->moved = true;
+	amdgpu_vm_bo_evicted(base);
 }
 
 /**
@@ -342,9 +404,7 @@ int amdgpu_vm_validate_pt_bos(struct amdgpu_device *adev, struct amdgpu_vm *vm,
 			break;
 
 		if (bo->tbo.type != ttm_bo_type_kernel) {
-			spin_lock(&vm->moved_lock);
-			list_move(&bo_base->vm_status, &vm->moved);
-			spin_unlock(&vm->moved_lock);
+			amdgpu_vm_bo_moved(bo_base);
 		} else {
 			if (vm->use_cpu_for_update)
 				r = amdgpu_bo_kmap(bo, NULL);
@@ -352,7 +412,7 @@ int amdgpu_vm_validate_pt_bos(struct amdgpu_device *adev, struct amdgpu_vm *vm,
 				r = amdgpu_ttm_alloc_gart(&bo->tbo);
 			if (r)
 				break;
-			list_move(&bo_base->vm_status, &vm->relocated);
+			amdgpu_vm_bo_relocated(bo_base);
 		}
 	}
 
@@ -1123,8 +1183,7 @@ int amdgpu_vm_update_directories(struct amdgpu_device *adev,
 		bo_base = list_first_entry(&vm->relocated,
 					   struct amdgpu_vm_bo_base,
 					   vm_status);
-		bo_base->moved = false;
-		list_move(&bo_base->vm_status, &vm->idle);
+		amdgpu_vm_bo_idle(bo_base);
 
 		bo = bo_base->bo->parent;
 		if (!bo)
@@ -1243,7 +1302,7 @@ static void amdgpu_vm_handle_huge_pages(struct amdgpu_pte_update_params *p,
 		if (entry->huge) {
 			/* Add the entry to the relocated list to update it. */
 			entry->huge = false;
-			list_move(&entry->base.vm_status, &p->vm->relocated);
+			amdgpu_vm_bo_relocated(&entry->base);
 		}
 		return;
 	}
@@ -1746,9 +1805,9 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev,
 		uint32_t mem_type = bo->tbo.mem.mem_type;
 
 		if (!(bo->preferred_domains & amdgpu_mem_type_to_domain(mem_type)))
-			list_add_tail(&bo_va->base.vm_status, &vm->evicted);
+			amdgpu_vm_bo_evicted(&bo_va->base);
 		else
-			list_add(&bo_va->base.vm_status, &vm->idle);
+			amdgpu_vm_bo_idle(&bo_va->base);
 	}
 
 	list_splice_init(&bo_va->invalids, &bo_va->valids);
@@ -2472,28 +2531,20 @@ void amdgpu_vm_bo_invalidate(struct amdgpu_device *adev,
 
 	list_for_each_entry(bo_base, &bo->va, bo_list) {
 		struct amdgpu_vm *vm = bo_base->vm;
-		bool was_moved = bo_base->moved;
 
-		bo_base->moved = true;
 		if (evicted && bo->tbo.resv == vm->root.base.bo->tbo.resv) {
-			if (bo->tbo.type == ttm_bo_type_kernel)
-				list_move(&bo_base->vm_status, &vm->evicted);
-			else
-				list_move_tail(&bo_base->vm_status,
-					       &vm->evicted);
+			amdgpu_vm_bo_evicted(bo_base);
 			continue;
 		}
 
-		if (was_moved)
+		if (bo_base->moved)
 			continue;
 
-		if (bo->tbo.type == ttm_bo_type_kernel) {
-			list_move(&bo_base->vm_status, &vm->relocated);
-		} else {
-			spin_lock(&bo_base->vm->moved_lock);
-			list_move(&bo_base->vm_status, &vm->moved);
-			spin_unlock(&bo_base->vm->moved_lock);
-		}
+		bo_base->moved = true;
+		if (bo->tbo.type == ttm_bo_type_kernel)
+			amdgpu_vm_bo_relocated(bo_base);
+		else
+			amdgpu_vm_bo_moved(bo_base);
 	}
 }
 
-- 
2.14.1

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 7+ messages in thread

* Re: [PATCH 2/2] drm/amdgpu: improve VM state machine documentation
       [not found]     ` <20180831132741.112280-2-christian.koenig-5C7GfCeVMHo@public.gmane.org>
@ 2018-08-31 23:51       ` Felix Kuehling
       [not found]         ` <2ac28ad5-ccd3-51f0-f4da-3fce40a9f20a-5C7GfCeVMHo@public.gmane.org>
  0 siblings, 1 reply; 7+ messages in thread
From: Felix Kuehling @ 2018-08-31 23:51 UTC (permalink / raw)
  To: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW, Christian König

Thanks for this. A few comments and a question inline.

On 2018-08-31 09:27 AM, Christian König wrote:
> Since we have a lot of FAQ on the VM state machine try to improve the
> documentation by adding functions for each state move.
>
> Signed-off-by: Christian König <christian.koenig@amd.com>
> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 107 ++++++++++++++++++++++++---------
>  1 file changed, 79 insertions(+), 28 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> index a9275a99d793..40c22635fefd 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> @@ -204,6 +204,69 @@ static unsigned amdgpu_vm_bo_size(struct amdgpu_device *adev, unsigned level)
>  	return AMDGPU_GPU_PAGE_ALIGN(amdgpu_vm_num_entries(adev, level) * 8);
>  }
>  
> +/**
> + * amdgpu_vm_bo_evicted - vm_bo is evicted
> + *
> + * @vm_bo: vm_bo which is evicted
> + *
> + * State for PDs/PTs and per VM BOs which are not at the location they should
> + * be.
> + */
> +static void amdgpu_vm_bo_evicted(struct amdgpu_vm_bo_base *vm_bo)
> +{
> +	struct amdgpu_vm *vm = vm_bo->vm;
> +	struct amdgpu_bo *bo = vm_bo->bo;
> +
> +	vm_bo->moved = true;
> +	if (bo->tbo.type == ttm_bo_type_kernel)
> +		list_move(&vm_bo->vm_status, &vm->evicted);
> +	else
> +		list_move_tail(&vm_bo->vm_status, &vm->evicted);
> +}
> +
> +/**
> + * amdgpu_vm_bo_relocated - vm_bo is reloacted
> + *
> + * @vm_bo: vm_bo which is relocated
> + *
> + * State for PDs/PTs which needs to update their parent PD.
> + */
> +static void amdgpu_vm_bo_relocated(struct amdgpu_vm_bo_base *vm_bo)
> +{
> +	list_move(&vm_bo->vm_status, &vm_bo->vm->relocated);
> +}
> +
> +/**
> + * amdgpu_vm_bo_moved - vm_bo is moved
> + *
> + * @vm_bo: vm_bo which is moved
> + *
> + * State for per VM and normal BOs which are moved, but that change is not yet
> + * reflected in the page tables.

I have a question here. Why does amdgpu_cs_vm_handling call
amdgpu_vm_bo_update manually for its BO list entries? Wouldn't it be
enough to just call amdgpu_vm_handle_moved?

> + */
> +static void amdgpu_vm_bo_moved(struct amdgpu_vm_bo_base *vm_bo)
> +{
> +	struct amdgpu_vm *vm = vm_bo->vm;
> +
> +	spin_lock(&vm->moved_lock);
> +	list_move(&vm_bo->vm_status, &vm->moved);
> +	spin_unlock(&vm->moved_lock);

If vm->moved_lock protects the moved list, do we also need to take it
whenever something is moved from that list? That could potentially be
any list_move operation that uses vm_bo->vm_status. I found one case
below where that may not be handled correctly.

> +}
> +
> +/**
> + * amdgpu_vm_bo_idle - vm_bo is idle
> + *
> + * @vm_bo: vm_bo which is now idle
> + *
> + * State for PDs/PTs and per VM BOs which have gone through the state machine
> + * and are now idle.
> + */
> +static void amdgpu_vm_bo_idle(struct amdgpu_vm_bo_base *vm_bo)
> +{
> +	list_move(&vm_bo->vm_status, &vm_bo->vm->idle);
> +	vm_bo->moved = false;
> +}
> +
>  /**
>   * amdgpu_vm_bo_base_init - Adds bo to the list of bos associated with the vm
>   *
> @@ -232,9 +295,9 @@ static void amdgpu_vm_bo_base_init(struct amdgpu_vm_bo_base *base,
>  
>  	vm->bulk_moveable = false;
>  	if (bo->tbo.type == ttm_bo_type_kernel)
> -		list_move(&base->vm_status, &vm->relocated);
> +		amdgpu_vm_bo_relocated(base);
>  	else
> -		list_move(&base->vm_status, &vm->idle);
> +		amdgpu_vm_bo_idle(base);
>  
>  	if (bo->preferred_domains &
>  	    amdgpu_mem_type_to_domain(bo->tbo.mem.mem_type))
> @@ -245,8 +308,7 @@ static void amdgpu_vm_bo_base_init(struct amdgpu_vm_bo_base *base,
>  	 * is currently evicted. add the bo to the evicted list to make sure it
>  	 * is validated on next vm use to avoid fault.
>  	 * */
> -	list_move_tail(&base->vm_status, &vm->evicted);
> -	base->moved = true;
> +	amdgpu_vm_bo_evicted(base);
>  }
>  
>  /**
> @@ -342,9 +404,7 @@ int amdgpu_vm_validate_pt_bos(struct amdgpu_device *adev, struct amdgpu_vm *vm,
>  			break;
>  
>  		if (bo->tbo.type != ttm_bo_type_kernel) {
> -			spin_lock(&vm->moved_lock);
> -			list_move(&bo_base->vm_status, &vm->moved);
> -			spin_unlock(&vm->moved_lock);
> +			amdgpu_vm_bo_moved(bo_base);
>  		} else {
>  			if (vm->use_cpu_for_update)
>  				r = amdgpu_bo_kmap(bo, NULL);
> @@ -352,7 +412,7 @@ int amdgpu_vm_validate_pt_bos(struct amdgpu_device *adev, struct amdgpu_vm *vm,
>  				r = amdgpu_ttm_alloc_gart(&bo->tbo);
>  			if (r)
>  				break;
> -			list_move(&bo_base->vm_status, &vm->relocated);
> +			amdgpu_vm_bo_relocated(bo_base);
>  		}
>  	}
>  
> @@ -1123,8 +1183,7 @@ int amdgpu_vm_update_directories(struct amdgpu_device *adev,
>  		bo_base = list_first_entry(&vm->relocated,
>  					   struct amdgpu_vm_bo_base,
>  					   vm_status);
> -		bo_base->moved = false;
> -		list_move(&bo_base->vm_status, &vm->idle);
> +		amdgpu_vm_bo_idle(bo_base);
>  
>  		bo = bo_base->bo->parent;
>  		if (!bo)
> @@ -1243,7 +1302,7 @@ static void amdgpu_vm_handle_huge_pages(struct amdgpu_pte_update_params *p,
>  		if (entry->huge) {
>  			/* Add the entry to the relocated list to update it. */
>  			entry->huge = false;
> -			list_move(&entry->base.vm_status, &p->vm->relocated);
> +			amdgpu_vm_bo_relocated(&entry->base);
>  		}
>  		return;
>  	}
> @@ -1746,9 +1805,9 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev,
>  		uint32_t mem_type = bo->tbo.mem.mem_type;
>  
>  		if (!(bo->preferred_domains & amdgpu_mem_type_to_domain(mem_type)))
> -			list_add_tail(&bo_va->base.vm_status, &vm->evicted);
> +			amdgpu_vm_bo_evicted(&bo_va->base);
>  		else
> -			list_add(&bo_va->base.vm_status, &vm->idle);
> +			amdgpu_vm_bo_idle(&bo_va->base);

There is a small change in behaviour here for clearing
bo_va->base.moved. Not sure if it matters.

>  	}
>  
>  	list_splice_init(&bo_va->invalids, &bo_va->valids);
> @@ -2472,28 +2531,20 @@ void amdgpu_vm_bo_invalidate(struct amdgpu_device *adev,
>  
>  	list_for_each_entry(bo_base, &bo->va, bo_list) {
>  		struct amdgpu_vm *vm = bo_base->vm;
> -		bool was_moved = bo_base->moved;
>  
> -		bo_base->moved = true;
>  		if (evicted && bo->tbo.resv == vm->root.base.bo->tbo.resv) {
> -			if (bo->tbo.type == ttm_bo_type_kernel)
> -				list_move(&bo_base->vm_status, &vm->evicted);
> -			else
> -				list_move_tail(&bo_base->vm_status,
> -					       &vm->evicted);
> +			amdgpu_vm_bo_evicted(bo_base);

I think here it's possible that the BO was on the moved list. I think
that means amdgpu_vm_bo_evicted should take the moved_lock just in case.

Regards,
  Felix

>  			continue;
>  		}
>  
> -		if (was_moved)
> +		if (bo_base->moved)
>  			continue;
>  
> -		if (bo->tbo.type == ttm_bo_type_kernel) {
> -			list_move(&bo_base->vm_status, &vm->relocated);
> -		} else {
> -			spin_lock(&bo_base->vm->moved_lock);
> -			list_move(&bo_base->vm_status, &vm->moved);
> -			spin_unlock(&bo_base->vm->moved_lock);
> -		}
> +		bo_base->moved = true;
> +		if (bo->tbo.type == ttm_bo_type_kernel)
> +			amdgpu_vm_bo_relocated(bo_base);
> +		else
> +			amdgpu_vm_bo_moved(bo_base);
>  	}
>  }
>  

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH 2/2] drm/amdgpu: improve VM state machine documentation
       [not found]         ` <2ac28ad5-ccd3-51f0-f4da-3fce40a9f20a-5C7GfCeVMHo@public.gmane.org>
@ 2018-09-01  8:16           ` Christian König
       [not found]             ` <5424812b-2efb-4643-114d-a4609a0c69a3-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
  0 siblings, 1 reply; 7+ messages in thread
From: Christian König @ 2018-09-01  8:16 UTC (permalink / raw)
  To: Felix Kuehling, amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW

Am 01.09.2018 um 01:51 schrieb Felix Kuehling:
> Thanks for this. A few comments and a question inline.
>
> On 2018-08-31 09:27 AM, Christian König wrote:
>> Since we have a lot of FAQ on the VM state machine try to improve the
>> documentation by adding functions for each state move.
>>
>> Signed-off-by: Christian König <christian.koenig@amd.com>
>> ---
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 107 ++++++++++++++++++++++++---------
>>   1 file changed, 79 insertions(+), 28 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>> index a9275a99d793..40c22635fefd 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>> @@ -204,6 +204,69 @@ static unsigned amdgpu_vm_bo_size(struct amdgpu_device *adev, unsigned level)
>>   	return AMDGPU_GPU_PAGE_ALIGN(amdgpu_vm_num_entries(adev, level) * 8);
>>   }
>>   
>> +/**
>> + * amdgpu_vm_bo_evicted - vm_bo is evicted
>> + *
>> + * @vm_bo: vm_bo which is evicted
>> + *
>> + * State for PDs/PTs and per VM BOs which are not at the location they should
>> + * be.
>> + */
>> +static void amdgpu_vm_bo_evicted(struct amdgpu_vm_bo_base *vm_bo)
>> +{
>> +	struct amdgpu_vm *vm = vm_bo->vm;
>> +	struct amdgpu_bo *bo = vm_bo->bo;
>> +
>> +	vm_bo->moved = true;
>> +	if (bo->tbo.type == ttm_bo_type_kernel)
>> +		list_move(&vm_bo->vm_status, &vm->evicted);
>> +	else
>> +		list_move_tail(&vm_bo->vm_status, &vm->evicted);
>> +}
>> +
>> +/**
>> + * amdgpu_vm_bo_relocated - vm_bo is reloacted
>> + *
>> + * @vm_bo: vm_bo which is relocated
>> + *
>> + * State for PDs/PTs which needs to update their parent PD.
>> + */
>> +static void amdgpu_vm_bo_relocated(struct amdgpu_vm_bo_base *vm_bo)
>> +{
>> +	list_move(&vm_bo->vm_status, &vm_bo->vm->relocated);
>> +}
>> +
>> +/**
>> + * amdgpu_vm_bo_moved - vm_bo is moved
>> + *
>> + * @vm_bo: vm_bo which is moved
>> + *
>> + * State for per VM and normal BOs which are moved, but that change is not yet
>> + * reflected in the page tables.
> I have a question here. Why does amdgpu_cs_vm_handling call
> amdgpu_vm_bo_update manually for its BO list entries? Wouldn't it be
> enough to just call amdgpu_vm_handle_moved?

No, it is still possible that the mapping of the BO was never updated 
because the VM page tables where evicted.

We still need to make sure that all BOs of the current submission are 
updated correctly.

>
>> + */
>> +static void amdgpu_vm_bo_moved(struct amdgpu_vm_bo_base *vm_bo)
>> +{
>> +	struct amdgpu_vm *vm = vm_bo->vm;
>> +
>> +	spin_lock(&vm->moved_lock);
>> +	list_move(&vm_bo->vm_status, &vm->moved);
>> +	spin_unlock(&vm->moved_lock);
> If vm->moved_lock protects the moved list, do we also need to take it
> whenever something is moved from that list?

Correct, yes.

> That could potentially be
> any list_move operation that uses vm_bo->vm_status. I found one case
> below where that may not be handled correctly.
>
>> +}
>> +
>> +/**
>> + * amdgpu_vm_bo_idle - vm_bo is idle
>> + *
>> + * @vm_bo: vm_bo which is now idle
>> + *
>> + * State for PDs/PTs and per VM BOs which have gone through the state machine
>> + * and are now idle.
>> + */
>> +static void amdgpu_vm_bo_idle(struct amdgpu_vm_bo_base *vm_bo)
>> +{
>> +	list_move(&vm_bo->vm_status, &vm_bo->vm->idle);
>> +	vm_bo->moved = false;
>> +}
>> +
>>   /**
>>    * amdgpu_vm_bo_base_init - Adds bo to the list of bos associated with the vm
>>    *
>> @@ -232,9 +295,9 @@ static void amdgpu_vm_bo_base_init(struct amdgpu_vm_bo_base *base,
>>   
>>   	vm->bulk_moveable = false;
>>   	if (bo->tbo.type == ttm_bo_type_kernel)
>> -		list_move(&base->vm_status, &vm->relocated);
>> +		amdgpu_vm_bo_relocated(base);
>>   	else
>> -		list_move(&base->vm_status, &vm->idle);
>> +		amdgpu_vm_bo_idle(base);
>>   
>>   	if (bo->preferred_domains &
>>   	    amdgpu_mem_type_to_domain(bo->tbo.mem.mem_type))
>> @@ -245,8 +308,7 @@ static void amdgpu_vm_bo_base_init(struct amdgpu_vm_bo_base *base,
>>   	 * is currently evicted. add the bo to the evicted list to make sure it
>>   	 * is validated on next vm use to avoid fault.
>>   	 * */
>> -	list_move_tail(&base->vm_status, &vm->evicted);
>> -	base->moved = true;
>> +	amdgpu_vm_bo_evicted(base);
>>   }
>>   
>>   /**
>> @@ -342,9 +404,7 @@ int amdgpu_vm_validate_pt_bos(struct amdgpu_device *adev, struct amdgpu_vm *vm,
>>   			break;
>>   
>>   		if (bo->tbo.type != ttm_bo_type_kernel) {
>> -			spin_lock(&vm->moved_lock);
>> -			list_move(&bo_base->vm_status, &vm->moved);
>> -			spin_unlock(&vm->moved_lock);
>> +			amdgpu_vm_bo_moved(bo_base);
>>   		} else {
>>   			if (vm->use_cpu_for_update)
>>   				r = amdgpu_bo_kmap(bo, NULL);
>> @@ -352,7 +412,7 @@ int amdgpu_vm_validate_pt_bos(struct amdgpu_device *adev, struct amdgpu_vm *vm,
>>   				r = amdgpu_ttm_alloc_gart(&bo->tbo);
>>   			if (r)
>>   				break;
>> -			list_move(&bo_base->vm_status, &vm->relocated);
>> +			amdgpu_vm_bo_relocated(bo_base);
>>   		}
>>   	}
>>   
>> @@ -1123,8 +1183,7 @@ int amdgpu_vm_update_directories(struct amdgpu_device *adev,
>>   		bo_base = list_first_entry(&vm->relocated,
>>   					   struct amdgpu_vm_bo_base,
>>   					   vm_status);
>> -		bo_base->moved = false;
>> -		list_move(&bo_base->vm_status, &vm->idle);
>> +		amdgpu_vm_bo_idle(bo_base);
>>   
>>   		bo = bo_base->bo->parent;
>>   		if (!bo)
>> @@ -1243,7 +1302,7 @@ static void amdgpu_vm_handle_huge_pages(struct amdgpu_pte_update_params *p,
>>   		if (entry->huge) {
>>   			/* Add the entry to the relocated list to update it. */
>>   			entry->huge = false;
>> -			list_move(&entry->base.vm_status, &p->vm->relocated);
>> +			amdgpu_vm_bo_relocated(&entry->base);
>>   		}
>>   		return;
>>   	}
>> @@ -1746,9 +1805,9 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev,
>>   		uint32_t mem_type = bo->tbo.mem.mem_type;
>>   
>>   		if (!(bo->preferred_domains & amdgpu_mem_type_to_domain(mem_type)))
>> -			list_add_tail(&bo_va->base.vm_status, &vm->evicted);
>> +			amdgpu_vm_bo_evicted(&bo_va->base);
>>   		else
>> -			list_add(&bo_va->base.vm_status, &vm->idle);
>> +			amdgpu_vm_bo_idle(&bo_va->base);
> There is a small change in behaviour here for clearing
> bo_va->base.moved. Not sure if it matters.

Using list_add is minimal more efficient.

>
>>   	}
>>   
>>   	list_splice_init(&bo_va->invalids, &bo_va->valids);
>> @@ -2472,28 +2531,20 @@ void amdgpu_vm_bo_invalidate(struct amdgpu_device *adev,
>>   
>>   	list_for_each_entry(bo_base, &bo->va, bo_list) {
>>   		struct amdgpu_vm *vm = bo_base->vm;
>> -		bool was_moved = bo_base->moved;
>>   
>> -		bo_base->moved = true;
>>   		if (evicted && bo->tbo.resv == vm->root.base.bo->tbo.resv) {
>> -			if (bo->tbo.type == ttm_bo_type_kernel)
>> -				list_move(&bo_base->vm_status, &vm->evicted);
>> -			else
>> -				list_move_tail(&bo_base->vm_status,
>> -					       &vm->evicted);
>> +			amdgpu_vm_bo_evicted(bo_base);
> I think here it's possible that the BO was on the moved list. I think
> that means amdgpu_vm_bo_evicted should take the moved_lock just in case.

Good point, probably best if I split up the moved list into per VM BOs 
and all other BOs.

That should make the locking much more clear.

Regards,
Christian.

>
> Regards,
>    Felix
>
>>   			continue;
>>   		}
>>   
>> -		if (was_moved)
>> +		if (bo_base->moved)
>>   			continue;
>>   
>> -		if (bo->tbo.type == ttm_bo_type_kernel) {
>> -			list_move(&bo_base->vm_status, &vm->relocated);
>> -		} else {
>> -			spin_lock(&bo_base->vm->moved_lock);
>> -			list_move(&bo_base->vm_status, &vm->moved);
>> -			spin_unlock(&bo_base->vm->moved_lock);
>> -		}
>> +		bo_base->moved = true;
>> +		if (bo->tbo.type == ttm_bo_type_kernel)
>> +			amdgpu_vm_bo_relocated(bo_base);
>> +		else
>> +			amdgpu_vm_bo_moved(bo_base);
>>   	}
>>   }
>>   

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH 2/2] drm/amdgpu: improve VM state machine documentation
       [not found]             ` <5424812b-2efb-4643-114d-a4609a0c69a3-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
@ 2018-09-01 16:45               ` Kuehling, Felix
       [not found]                 ` <DM5PR12MB170702D12F63415DA2979AEB920E0-2J9CzHegvk9TCtO+SvGBKwdYzm3356FpvxpqHgZTriW3zl9H0oFU5g@public.gmane.org>
  0 siblings, 1 reply; 7+ messages in thread
From: Kuehling, Felix @ 2018-09-01 16:45 UTC (permalink / raw)
  To: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org,
	Koenig, Christian

>>> @@ -1746,9 +1805,9 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev,
>>>               uint32_t mem_type = bo->tbo.mem.mem_type;
>>>
>>>               if (!(bo->preferred_domains & amdgpu_mem_type_to_domain(mem_type)))
>>> -                    list_add_tail(&bo_va->base.vm_status, &vm->evicted);
>>> +                    amdgpu_vm_bo_evicted(&bo_va->base);
>>>               else
>>> -                    list_add(&bo_va->base.vm_status, &vm->idle);
>>> +                    amdgpu_vm_bo_idle(&bo_va->base);
>> There is a small change in behaviour here for clearing
>> bo_va->base.moved. Not sure if it matters.
>
> Using list_add is minimal more efficient.

What I meant was that the original code would not always set bo_va->base.moved = false (it's done a few lines above this under some conditions, but not always). The new code will always set bo_va->base.moved = false inside amdgpu_vm_bo_idle.

Regards,
  Felix

________________________________________
From: Christian König <ckoenig.leichtzumerken@gmail.com>
Sent: Saturday, September 1, 2018 4:16:17 AM
To: Kuehling, Felix; amd-gfx@lists.freedesktop.org
Subject: Re: [PATCH 2/2] drm/amdgpu: improve VM state machine documentation

Am 01.09.2018 um 01:51 schrieb Felix Kuehling:
> Thanks for this. A few comments and a question inline.
>
> On 2018-08-31 09:27 AM, Christian König wrote:
>> Since we have a lot of FAQ on the VM state machine try to improve the
>> documentation by adding functions for each state move.
>>
>> Signed-off-by: Christian König <christian.koenig@amd.com>
>> ---
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 107 ++++++++++++++++++++++++---------
>>   1 file changed, 79 insertions(+), 28 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>> index a9275a99d793..40c22635fefd 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>> @@ -204,6 +204,69 @@ static unsigned amdgpu_vm_bo_size(struct amdgpu_device *adev, unsigned level)
>>      return AMDGPU_GPU_PAGE_ALIGN(amdgpu_vm_num_entries(adev, level) * 8);
>>   }
>>
>> +/**
>> + * amdgpu_vm_bo_evicted - vm_bo is evicted
>> + *
>> + * @vm_bo: vm_bo which is evicted
>> + *
>> + * State for PDs/PTs and per VM BOs which are not at the location they should
>> + * be.
>> + */
>> +static void amdgpu_vm_bo_evicted(struct amdgpu_vm_bo_base *vm_bo)
>> +{
>> +    struct amdgpu_vm *vm = vm_bo->vm;
>> +    struct amdgpu_bo *bo = vm_bo->bo;
>> +
>> +    vm_bo->moved = true;
>> +    if (bo->tbo.type == ttm_bo_type_kernel)
>> +            list_move(&vm_bo->vm_status, &vm->evicted);
>> +    else
>> +            list_move_tail(&vm_bo->vm_status, &vm->evicted);
>> +}
>> +
>> +/**
>> + * amdgpu_vm_bo_relocated - vm_bo is reloacted
>> + *
>> + * @vm_bo: vm_bo which is relocated
>> + *
>> + * State for PDs/PTs which needs to update their parent PD.
>> + */
>> +static void amdgpu_vm_bo_relocated(struct amdgpu_vm_bo_base *vm_bo)
>> +{
>> +    list_move(&vm_bo->vm_status, &vm_bo->vm->relocated);
>> +}
>> +
>> +/**
>> + * amdgpu_vm_bo_moved - vm_bo is moved
>> + *
>> + * @vm_bo: vm_bo which is moved
>> + *
>> + * State for per VM and normal BOs which are moved, but that change is not yet
>> + * reflected in the page tables.
> I have a question here. Why does amdgpu_cs_vm_handling call
> amdgpu_vm_bo_update manually for its BO list entries? Wouldn't it be
> enough to just call amdgpu_vm_handle_moved?

No, it is still possible that the mapping of the BO was never updated
because the VM page tables where evicted.

We still need to make sure that all BOs of the current submission are
updated correctly.

>
>> + */
>> +static void amdgpu_vm_bo_moved(struct amdgpu_vm_bo_base *vm_bo)
>> +{
>> +    struct amdgpu_vm *vm = vm_bo->vm;
>> +
>> +    spin_lock(&vm->moved_lock);
>> +    list_move(&vm_bo->vm_status, &vm->moved);
>> +    spin_unlock(&vm->moved_lock);
> If vm->moved_lock protects the moved list, do we also need to take it
> whenever something is moved from that list?

Correct, yes.

> That could potentially be
> any list_move operation that uses vm_bo->vm_status. I found one case
> below where that may not be handled correctly.
>
>> +}
>> +
>> +/**
>> + * amdgpu_vm_bo_idle - vm_bo is idle
>> + *
>> + * @vm_bo: vm_bo which is now idle
>> + *
>> + * State for PDs/PTs and per VM BOs which have gone through the state machine
>> + * and are now idle.
>> + */
>> +static void amdgpu_vm_bo_idle(struct amdgpu_vm_bo_base *vm_bo)
>> +{
>> +    list_move(&vm_bo->vm_status, &vm_bo->vm->idle);
>> +    vm_bo->moved = false;
>> +}
>> +
>>   /**
>>    * amdgpu_vm_bo_base_init - Adds bo to the list of bos associated with the vm
>>    *
>> @@ -232,9 +295,9 @@ static void amdgpu_vm_bo_base_init(struct amdgpu_vm_bo_base *base,
>>
>>      vm->bulk_moveable = false;
>>      if (bo->tbo.type == ttm_bo_type_kernel)
>> -            list_move(&base->vm_status, &vm->relocated);
>> +            amdgpu_vm_bo_relocated(base);
>>      else
>> -            list_move(&base->vm_status, &vm->idle);
>> +            amdgpu_vm_bo_idle(base);
>>
>>      if (bo->preferred_domains &
>>          amdgpu_mem_type_to_domain(bo->tbo.mem.mem_type))
>> @@ -245,8 +308,7 @@ static void amdgpu_vm_bo_base_init(struct amdgpu_vm_bo_base *base,
>>       * is currently evicted. add the bo to the evicted list to make sure it
>>       * is validated on next vm use to avoid fault.
>>       * */
>> -    list_move_tail(&base->vm_status, &vm->evicted);
>> -    base->moved = true;
>> +    amdgpu_vm_bo_evicted(base);
>>   }
>>
>>   /**
>> @@ -342,9 +404,7 @@ int amdgpu_vm_validate_pt_bos(struct amdgpu_device *adev, struct amdgpu_vm *vm,
>>                      break;
>>
>>              if (bo->tbo.type != ttm_bo_type_kernel) {
>> -                    spin_lock(&vm->moved_lock);
>> -                    list_move(&bo_base->vm_status, &vm->moved);
>> -                    spin_unlock(&vm->moved_lock);
>> +                    amdgpu_vm_bo_moved(bo_base);
>>              } else {
>>                      if (vm->use_cpu_for_update)
>>                              r = amdgpu_bo_kmap(bo, NULL);
>> @@ -352,7 +412,7 @@ int amdgpu_vm_validate_pt_bos(struct amdgpu_device *adev, struct amdgpu_vm *vm,
>>                              r = amdgpu_ttm_alloc_gart(&bo->tbo);
>>                      if (r)
>>                              break;
>> -                    list_move(&bo_base->vm_status, &vm->relocated);
>> +                    amdgpu_vm_bo_relocated(bo_base);
>>              }
>>      }
>>
>> @@ -1123,8 +1183,7 @@ int amdgpu_vm_update_directories(struct amdgpu_device *adev,
>>              bo_base = list_first_entry(&vm->relocated,
>>                                         struct amdgpu_vm_bo_base,
>>                                         vm_status);
>> -            bo_base->moved = false;
>> -            list_move(&bo_base->vm_status, &vm->idle);
>> +            amdgpu_vm_bo_idle(bo_base);
>>
>>              bo = bo_base->bo->parent;
>>              if (!bo)
>> @@ -1243,7 +1302,7 @@ static void amdgpu_vm_handle_huge_pages(struct amdgpu_pte_update_params *p,
>>              if (entry->huge) {
>>                      /* Add the entry to the relocated list to update it. */
>>                      entry->huge = false;
>> -                    list_move(&entry->base.vm_status, &p->vm->relocated);
>> +                    amdgpu_vm_bo_relocated(&entry->base);
>>              }
>>              return;
>>      }
>> @@ -1746,9 +1805,9 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev,
>>              uint32_t mem_type = bo->tbo.mem.mem_type;
>>
>>              if (!(bo->preferred_domains & amdgpu_mem_type_to_domain(mem_type)))
>> -                    list_add_tail(&bo_va->base.vm_status, &vm->evicted);
>> +                    amdgpu_vm_bo_evicted(&bo_va->base);
>>              else
>> -                    list_add(&bo_va->base.vm_status, &vm->idle);
>> +                    amdgpu_vm_bo_idle(&bo_va->base);
> There is a small change in behaviour here for clearing
> bo_va->base.moved. Not sure if it matters.

Using list_add is minimal more efficient.

>
>>      }
>>
>>      list_splice_init(&bo_va->invalids, &bo_va->valids);
>> @@ -2472,28 +2531,20 @@ void amdgpu_vm_bo_invalidate(struct amdgpu_device *adev,
>>
>>      list_for_each_entry(bo_base, &bo->va, bo_list) {
>>              struct amdgpu_vm *vm = bo_base->vm;
>> -            bool was_moved = bo_base->moved;
>>
>> -            bo_base->moved = true;
>>              if (evicted && bo->tbo.resv == vm->root.base.bo->tbo.resv) {
>> -                    if (bo->tbo.type == ttm_bo_type_kernel)
>> -                            list_move(&bo_base->vm_status, &vm->evicted);
>> -                    else
>> -                            list_move_tail(&bo_base->vm_status,
>> -                                           &vm->evicted);
>> +                    amdgpu_vm_bo_evicted(bo_base);
> I think here it's possible that the BO was on the moved list. I think
> that means amdgpu_vm_bo_evicted should take the moved_lock just in case.

Good point, probably best if I split up the moved list into per VM BOs
and all other BOs.

That should make the locking much more clear.

Regards,
Christian.

>
> Regards,
>    Felix
>
>>                      continue;
>>              }
>>
>> -            if (was_moved)
>> +            if (bo_base->moved)
>>                      continue;
>>
>> -            if (bo->tbo.type == ttm_bo_type_kernel) {
>> -                    list_move(&bo_base->vm_status, &vm->relocated);
>> -            } else {
>> -                    spin_lock(&bo_base->vm->moved_lock);
>> -                    list_move(&bo_base->vm_status, &vm->moved);
>> -                    spin_unlock(&bo_base->vm->moved_lock);
>> -            }
>> +            bo_base->moved = true;
>> +            if (bo->tbo.type == ttm_bo_type_kernel)
>> +                    amdgpu_vm_bo_relocated(bo_base);
>> +            else
>> +                    amdgpu_vm_bo_moved(bo_base);
>>      }
>>   }
>>

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH 2/2] drm/amdgpu: improve VM state machine documentation
       [not found]                 ` <DM5PR12MB170702D12F63415DA2979AEB920E0-2J9CzHegvk9TCtO+SvGBKwdYzm3356FpvxpqHgZTriW3zl9H0oFU5g@public.gmane.org>
@ 2018-09-01 18:04                   ` Christian König
  0 siblings, 0 replies; 7+ messages in thread
From: Christian König @ 2018-09-01 18:04 UTC (permalink / raw)
  To: Kuehling, Felix,
	amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org

Am 01.09.2018 um 18:45 schrieb Kuehling, Felix:
>>>> @@ -1746,9 +1805,9 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev,
>>>>                uint32_t mem_type = bo->tbo.mem.mem_type;
>>>>
>>>>                if (!(bo->preferred_domains & amdgpu_mem_type_to_domain(mem_type)))
>>>> -                    list_add_tail(&bo_va->base.vm_status, &vm->evicted);
>>>> +                    amdgpu_vm_bo_evicted(&bo_va->base);
>>>>                else
>>>> -                    list_add(&bo_va->base.vm_status, &vm->idle);
>>>> +                    amdgpu_vm_bo_idle(&bo_va->base);
>>> There is a small change in behaviour here for clearing
>>> bo_va->base.moved. Not sure if it matters.
>> Using list_add is minimal more efficient.
> What I meant was that the original code would not always set bo_va->base.moved = false (it's done a few lines above this under some conditions, but not always). The new code will always set bo_va->base.moved = false inside amdgpu_vm_bo_idle.

Good point as well, but that is indeed correct.

Regards,
Christian.

>
> Regards,
>    Felix
>
> ________________________________________
> From: Christian König <ckoenig.leichtzumerken@gmail.com>
> Sent: Saturday, September 1, 2018 4:16:17 AM
> To: Kuehling, Felix; amd-gfx@lists.freedesktop.org
> Subject: Re: [PATCH 2/2] drm/amdgpu: improve VM state machine documentation
>
> Am 01.09.2018 um 01:51 schrieb Felix Kuehling:
>> Thanks for this. A few comments and a question inline.
>>
>> On 2018-08-31 09:27 AM, Christian König wrote:
>>> Since we have a lot of FAQ on the VM state machine try to improve the
>>> documentation by adding functions for each state move.
>>>
>>> Signed-off-by: Christian König <christian.koenig@amd.com>
>>> ---
>>>    drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 107 ++++++++++++++++++++++++---------
>>>    1 file changed, 79 insertions(+), 28 deletions(-)
>>>
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>>> index a9275a99d793..40c22635fefd 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>>> @@ -204,6 +204,69 @@ static unsigned amdgpu_vm_bo_size(struct amdgpu_device *adev, unsigned level)
>>>       return AMDGPU_GPU_PAGE_ALIGN(amdgpu_vm_num_entries(adev, level) * 8);
>>>    }
>>>
>>> +/**
>>> + * amdgpu_vm_bo_evicted - vm_bo is evicted
>>> + *
>>> + * @vm_bo: vm_bo which is evicted
>>> + *
>>> + * State for PDs/PTs and per VM BOs which are not at the location they should
>>> + * be.
>>> + */
>>> +static void amdgpu_vm_bo_evicted(struct amdgpu_vm_bo_base *vm_bo)
>>> +{
>>> +    struct amdgpu_vm *vm = vm_bo->vm;
>>> +    struct amdgpu_bo *bo = vm_bo->bo;
>>> +
>>> +    vm_bo->moved = true;
>>> +    if (bo->tbo.type == ttm_bo_type_kernel)
>>> +            list_move(&vm_bo->vm_status, &vm->evicted);
>>> +    else
>>> +            list_move_tail(&vm_bo->vm_status, &vm->evicted);
>>> +}
>>> +
>>> +/**
>>> + * amdgpu_vm_bo_relocated - vm_bo is reloacted
>>> + *
>>> + * @vm_bo: vm_bo which is relocated
>>> + *
>>> + * State for PDs/PTs which needs to update their parent PD.
>>> + */
>>> +static void amdgpu_vm_bo_relocated(struct amdgpu_vm_bo_base *vm_bo)
>>> +{
>>> +    list_move(&vm_bo->vm_status, &vm_bo->vm->relocated);
>>> +}
>>> +
>>> +/**
>>> + * amdgpu_vm_bo_moved - vm_bo is moved
>>> + *
>>> + * @vm_bo: vm_bo which is moved
>>> + *
>>> + * State for per VM and normal BOs which are moved, but that change is not yet
>>> + * reflected in the page tables.
>> I have a question here. Why does amdgpu_cs_vm_handling call
>> amdgpu_vm_bo_update manually for its BO list entries? Wouldn't it be
>> enough to just call amdgpu_vm_handle_moved?
> No, it is still possible that the mapping of the BO was never updated
> because the VM page tables where evicted.
>
> We still need to make sure that all BOs of the current submission are
> updated correctly.
>
>>> + */
>>> +static void amdgpu_vm_bo_moved(struct amdgpu_vm_bo_base *vm_bo)
>>> +{
>>> +    struct amdgpu_vm *vm = vm_bo->vm;
>>> +
>>> +    spin_lock(&vm->moved_lock);
>>> +    list_move(&vm_bo->vm_status, &vm->moved);
>>> +    spin_unlock(&vm->moved_lock);
>> If vm->moved_lock protects the moved list, do we also need to take it
>> whenever something is moved from that list?
> Correct, yes.
>
>> That could potentially be
>> any list_move operation that uses vm_bo->vm_status. I found one case
>> below where that may not be handled correctly.
>>
>>> +}
>>> +
>>> +/**
>>> + * amdgpu_vm_bo_idle - vm_bo is idle
>>> + *
>>> + * @vm_bo: vm_bo which is now idle
>>> + *
>>> + * State for PDs/PTs and per VM BOs which have gone through the state machine
>>> + * and are now idle.
>>> + */
>>> +static void amdgpu_vm_bo_idle(struct amdgpu_vm_bo_base *vm_bo)
>>> +{
>>> +    list_move(&vm_bo->vm_status, &vm_bo->vm->idle);
>>> +    vm_bo->moved = false;
>>> +}
>>> +
>>>    /**
>>>     * amdgpu_vm_bo_base_init - Adds bo to the list of bos associated with the vm
>>>     *
>>> @@ -232,9 +295,9 @@ static void amdgpu_vm_bo_base_init(struct amdgpu_vm_bo_base *base,
>>>
>>>       vm->bulk_moveable = false;
>>>       if (bo->tbo.type == ttm_bo_type_kernel)
>>> -            list_move(&base->vm_status, &vm->relocated);
>>> +            amdgpu_vm_bo_relocated(base);
>>>       else
>>> -            list_move(&base->vm_status, &vm->idle);
>>> +            amdgpu_vm_bo_idle(base);
>>>
>>>       if (bo->preferred_domains &
>>>           amdgpu_mem_type_to_domain(bo->tbo.mem.mem_type))
>>> @@ -245,8 +308,7 @@ static void amdgpu_vm_bo_base_init(struct amdgpu_vm_bo_base *base,
>>>        * is currently evicted. add the bo to the evicted list to make sure it
>>>        * is validated on next vm use to avoid fault.
>>>        * */
>>> -    list_move_tail(&base->vm_status, &vm->evicted);
>>> -    base->moved = true;
>>> +    amdgpu_vm_bo_evicted(base);
>>>    }
>>>
>>>    /**
>>> @@ -342,9 +404,7 @@ int amdgpu_vm_validate_pt_bos(struct amdgpu_device *adev, struct amdgpu_vm *vm,
>>>                       break;
>>>
>>>               if (bo->tbo.type != ttm_bo_type_kernel) {
>>> -                    spin_lock(&vm->moved_lock);
>>> -                    list_move(&bo_base->vm_status, &vm->moved);
>>> -                    spin_unlock(&vm->moved_lock);
>>> +                    amdgpu_vm_bo_moved(bo_base);
>>>               } else {
>>>                       if (vm->use_cpu_for_update)
>>>                               r = amdgpu_bo_kmap(bo, NULL);
>>> @@ -352,7 +412,7 @@ int amdgpu_vm_validate_pt_bos(struct amdgpu_device *adev, struct amdgpu_vm *vm,
>>>                               r = amdgpu_ttm_alloc_gart(&bo->tbo);
>>>                       if (r)
>>>                               break;
>>> -                    list_move(&bo_base->vm_status, &vm->relocated);
>>> +                    amdgpu_vm_bo_relocated(bo_base);
>>>               }
>>>       }
>>>
>>> @@ -1123,8 +1183,7 @@ int amdgpu_vm_update_directories(struct amdgpu_device *adev,
>>>               bo_base = list_first_entry(&vm->relocated,
>>>                                          struct amdgpu_vm_bo_base,
>>>                                          vm_status);
>>> -            bo_base->moved = false;
>>> -            list_move(&bo_base->vm_status, &vm->idle);
>>> +            amdgpu_vm_bo_idle(bo_base);
>>>
>>>               bo = bo_base->bo->parent;
>>>               if (!bo)
>>> @@ -1243,7 +1302,7 @@ static void amdgpu_vm_handle_huge_pages(struct amdgpu_pte_update_params *p,
>>>               if (entry->huge) {
>>>                       /* Add the entry to the relocated list to update it. */
>>>                       entry->huge = false;
>>> -                    list_move(&entry->base.vm_status, &p->vm->relocated);
>>> +                    amdgpu_vm_bo_relocated(&entry->base);
>>>               }
>>>               return;
>>>       }
>>> @@ -1746,9 +1805,9 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev,
>>>               uint32_t mem_type = bo->tbo.mem.mem_type;
>>>
>>>               if (!(bo->preferred_domains & amdgpu_mem_type_to_domain(mem_type)))
>>> -                    list_add_tail(&bo_va->base.vm_status, &vm->evicted);
>>> +                    amdgpu_vm_bo_evicted(&bo_va->base);
>>>               else
>>> -                    list_add(&bo_va->base.vm_status, &vm->idle);
>>> +                    amdgpu_vm_bo_idle(&bo_va->base);
>> There is a small change in behaviour here for clearing
>> bo_va->base.moved. Not sure if it matters.
> Using list_add is minimal more efficient.
>
>>>       }
>>>
>>>       list_splice_init(&bo_va->invalids, &bo_va->valids);
>>> @@ -2472,28 +2531,20 @@ void amdgpu_vm_bo_invalidate(struct amdgpu_device *adev,
>>>
>>>       list_for_each_entry(bo_base, &bo->va, bo_list) {
>>>               struct amdgpu_vm *vm = bo_base->vm;
>>> -            bool was_moved = bo_base->moved;
>>>
>>> -            bo_base->moved = true;
>>>               if (evicted && bo->tbo.resv == vm->root.base.bo->tbo.resv) {
>>> -                    if (bo->tbo.type == ttm_bo_type_kernel)
>>> -                            list_move(&bo_base->vm_status, &vm->evicted);
>>> -                    else
>>> -                            list_move_tail(&bo_base->vm_status,
>>> -                                           &vm->evicted);
>>> +                    amdgpu_vm_bo_evicted(bo_base);
>> I think here it's possible that the BO was on the moved list. I think
>> that means amdgpu_vm_bo_evicted should take the moved_lock just in case.
> Good point, probably best if I split up the moved list into per VM BOs
> and all other BOs.
>
> That should make the locking much more clear.
>
> Regards,
> Christian.
>
>> Regards,
>>     Felix
>>
>>>                       continue;
>>>               }
>>>
>>> -            if (was_moved)
>>> +            if (bo_base->moved)
>>>                       continue;
>>>
>>> -            if (bo->tbo.type == ttm_bo_type_kernel) {
>>> -                    list_move(&bo_base->vm_status, &vm->relocated);
>>> -            } else {
>>> -                    spin_lock(&bo_base->vm->moved_lock);
>>> -                    list_move(&bo_base->vm_status, &vm->moved);
>>> -                    spin_unlock(&bo_base->vm->moved_lock);
>>> -            }
>>> +            bo_base->moved = true;
>>> +            if (bo->tbo.type == ttm_bo_type_kernel)
>>> +                    amdgpu_vm_bo_relocated(bo_base);
>>> +            else
>>> +                    amdgpu_vm_bo_moved(bo_base);
>>>       }
>>>    }
>>>

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH 1/2] drm/amdgpu: move size calculations to the front of the file again
       [not found] ` <20180831132741.112280-1-christian.koenig-5C7GfCeVMHo@public.gmane.org>
  2018-08-31 13:27   ` [PATCH 2/2] drm/amdgpu: improve VM state machine documentation Christian König
@ 2018-09-03  2:52   ` Zhang, Jerry (Junwei)
  1 sibling, 0 replies; 7+ messages in thread
From: Zhang, Jerry (Junwei) @ 2018-09-03  2:52 UTC (permalink / raw)
  To: Christian König, amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW

On 08/31/2018 09:27 PM, Christian König wrote:
> amdgpu_vm_bo_* functions should come much later.
>
> Signed-off-by: Christian König <christian.koenig@amd.com>
Reviewed-by: Junwei Zhang <Jerry.Zhang@amd.com>

> ---
>   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 90 +++++++++++++++++-----------------
>   1 file changed, 45 insertions(+), 45 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> index d59222fb5931..a9275a99d793 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> @@ -133,51 +133,6 @@ struct amdgpu_prt_cb {
>   	struct dma_fence_cb cb;
>   };
>
> -/**
> - * amdgpu_vm_bo_base_init - Adds bo to the list of bos associated with the vm
> - *
> - * @base: base structure for tracking BO usage in a VM
> - * @vm: vm to which bo is to be added
> - * @bo: amdgpu buffer object
> - *
> - * Initialize a bo_va_base structure and add it to the appropriate lists
> - *
> - */
> -static void amdgpu_vm_bo_base_init(struct amdgpu_vm_bo_base *base,
> -				   struct amdgpu_vm *vm,
> -				   struct amdgpu_bo *bo)
> -{
> -	base->vm = vm;
> -	base->bo = bo;
> -	INIT_LIST_HEAD(&base->bo_list);
> -	INIT_LIST_HEAD(&base->vm_status);
> -
> -	if (!bo)
> -		return;
> -	list_add_tail(&base->bo_list, &bo->va);
> -
> -	if (bo->tbo.resv != vm->root.base.bo->tbo.resv)
> -		return;
> -
> -	vm->bulk_moveable = false;
> -	if (bo->tbo.type == ttm_bo_type_kernel)
> -		list_move(&base->vm_status, &vm->relocated);
> -	else
> -		list_move(&base->vm_status, &vm->idle);
> -
> -	if (bo->preferred_domains &
> -	    amdgpu_mem_type_to_domain(bo->tbo.mem.mem_type))
> -		return;
> -
> -	/*
> -	 * we checked all the prerequisites, but it looks like this per vm bo
> -	 * is currently evicted. add the bo to the evicted list to make sure it
> -	 * is validated on next vm use to avoid fault.
> -	 * */
> -	list_move_tail(&base->vm_status, &vm->evicted);
> -	base->moved = true;
> -}
> -
>   /**
>    * amdgpu_vm_level_shift - return the addr shift for each level
>    *
> @@ -249,6 +204,51 @@ static unsigned amdgpu_vm_bo_size(struct amdgpu_device *adev, unsigned level)
>   	return AMDGPU_GPU_PAGE_ALIGN(amdgpu_vm_num_entries(adev, level) * 8);
>   }
>
> +/**
> + * amdgpu_vm_bo_base_init - Adds bo to the list of bos associated with the vm
> + *
> + * @base: base structure for tracking BO usage in a VM
> + * @vm: vm to which bo is to be added
> + * @bo: amdgpu buffer object
> + *
> + * Initialize a bo_va_base structure and add it to the appropriate lists
> + *
> + */
> +static void amdgpu_vm_bo_base_init(struct amdgpu_vm_bo_base *base,
> +				   struct amdgpu_vm *vm,
> +				   struct amdgpu_bo *bo)
> +{
> +	base->vm = vm;
> +	base->bo = bo;
> +	INIT_LIST_HEAD(&base->bo_list);
> +	INIT_LIST_HEAD(&base->vm_status);
> +
> +	if (!bo)
> +		return;
> +	list_add_tail(&base->bo_list, &bo->va);
> +
> +	if (bo->tbo.resv != vm->root.base.bo->tbo.resv)
> +		return;
> +
> +	vm->bulk_moveable = false;
> +	if (bo->tbo.type == ttm_bo_type_kernel)
> +		list_move(&base->vm_status, &vm->relocated);
> +	else
> +		list_move(&base->vm_status, &vm->idle);
> +
> +	if (bo->preferred_domains &
> +	    amdgpu_mem_type_to_domain(bo->tbo.mem.mem_type))
> +		return;
> +
> +	/*
> +	 * we checked all the prerequisites, but it looks like this per vm bo
> +	 * is currently evicted. add the bo to the evicted list to make sure it
> +	 * is validated on next vm use to avoid fault.
> +	 * */
> +	list_move_tail(&base->vm_status, &vm->evicted);
> +	base->moved = true;
> +}
> +
>   /**
>    * amdgpu_vm_get_pd_bo - add the VM PD to a validation list
>    *
>
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 7+ messages in thread

end of thread, other threads:[~2018-09-03  2:52 UTC | newest]

Thread overview: 7+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2018-08-31 13:27 [PATCH 1/2] drm/amdgpu: move size calculations to the front of the file again Christian König
     [not found] ` <20180831132741.112280-1-christian.koenig-5C7GfCeVMHo@public.gmane.org>
2018-08-31 13:27   ` [PATCH 2/2] drm/amdgpu: improve VM state machine documentation Christian König
     [not found]     ` <20180831132741.112280-2-christian.koenig-5C7GfCeVMHo@public.gmane.org>
2018-08-31 23:51       ` Felix Kuehling
     [not found]         ` <2ac28ad5-ccd3-51f0-f4da-3fce40a9f20a-5C7GfCeVMHo@public.gmane.org>
2018-09-01  8:16           ` Christian König
     [not found]             ` <5424812b-2efb-4643-114d-a4609a0c69a3-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
2018-09-01 16:45               ` Kuehling, Felix
     [not found]                 ` <DM5PR12MB170702D12F63415DA2979AEB920E0-2J9CzHegvk9TCtO+SvGBKwdYzm3356FpvxpqHgZTriW3zl9H0oFU5g@public.gmane.org>
2018-09-01 18:04                   ` Christian König
2018-09-03  2:52   ` [PATCH 1/2] drm/amdgpu: move size calculations to the front of the file again Zhang, Jerry (Junwei)

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox