All of lore.kernel.org
 help / color / mirror / Atom feed
From: Matthew Brost <matthew.brost@intel.com>
To: Francois Dugast <francois.dugast@intel.com>
Cc: intel-xe@lists.freedesktop.org, christian.koenig@amd.com
Subject: Re: [Intel-xe] [PATCH v2 2/2] drm/xe: switch to using drm_exec
Date: Thu, 13 Jul 2023 14:00:14 +0000	[thread overview]
Message-ID: <ZLADblgo7aS6fTNi@DUT025-TGLU> (raw)
In-Reply-To: <20230713131711.7-3-francois.dugast@intel.com>

On Thu, Jul 13, 2023 at 01:17:11PM +0000, Francois Dugast wrote:
> Replace the use of ttm_execbuf_util helpers with the drm_exec helpers.
> 
> v2:
> - Call dma-resv locking functions directly in xe_bo_lock and
>   xe_vm_lock functions (Matthew Brost)
> - Switch to execution context for GEM buffers v7 (Christian König)
> - Use DRM_EXEC_INTERRUPTIBLE_WAIT
> 
> Signed-off-by: Francois Dugast <francois.dugast@intel.com>
> ---
>  drivers/gpu/drm/xe/Kconfig           |   1 +
>  drivers/gpu/drm/xe/xe_bo.c           |  29 +++-
>  drivers/gpu/drm/xe/xe_bo_types.h     |   1 -
>  drivers/gpu/drm/xe/xe_exec.c         |  30 +---
>  drivers/gpu/drm/xe/xe_gt_pagefault.c |  55 +-----
>  drivers/gpu/drm/xe/xe_vm.c           | 246 +++++++++++++--------------
>  drivers/gpu/drm/xe/xe_vm.h           |  32 ++--
>  7 files changed, 168 insertions(+), 226 deletions(-)
> 
> diff --git a/drivers/gpu/drm/xe/Kconfig b/drivers/gpu/drm/xe/Kconfig
> index d44794f99338..b739faa401d3 100644
> --- a/drivers/gpu/drm/xe/Kconfig
> +++ b/drivers/gpu/drm/xe/Kconfig
> @@ -8,6 +8,7 @@ config DRM_XE
>  	select SHMEM
>  	select TMPFS
>  	select DRM_BUDDY
> +	select DRM_EXEC
>  	select DRM_KMS_HELPER
>  	select DRM_PANEL
>  	select DRM_SUBALLOC_HELPER
> diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c
> index 6353afa8d846..aafe69ed4320 100644
> --- a/drivers/gpu/drm/xe/xe_bo.c
> +++ b/drivers/gpu/drm/xe/xe_bo.c
> @@ -1806,17 +1806,32 @@ int xe_gem_mmap_offset_ioctl(struct drm_device *dev, void *data,
>  int xe_bo_lock(struct xe_bo *bo, struct ww_acquire_ctx *ww,
>  	       int num_resv, bool intr)
>  {
> -	struct ttm_validate_buffer tv_bo;
> -	LIST_HEAD(objs);
> -	LIST_HEAD(dups);
> +	struct dma_resv *obj;
> +	int err;
>  
>  	XE_BUG_ON(!ww);
>  
> -	tv_bo.num_shared = num_resv;
> -	tv_bo.bo = &bo->ttm;;
> -	list_add_tail(&tv_bo.head, &objs);
> +	obj = bo->ttm.base.resv;
> +	ww_acquire_init(ww, &reservation_ww_class);
> +
> +	if (intr)
> +		err = dma_resv_lock_interruptible(obj, ww);
> +	else
> +		err = dma_resv_lock(obj, ww);
> +
> +	if (unlikely(err))
> +		return err;
> +
> +	num_resv = max(num_resv, 1);

Why? If the user ask to reserve 0 fences this will reserve 1.

> +	err = dma_resv_reserve_fences(obj, num_resv);
> +	if (err)
> +		goto out_err;
>  
> -	return ttm_eu_reserve_buffers(ww, &objs, intr, &dups);
> +	return 0;
> +
> +out_err:
> +	dma_resv_unlock(obj);
> +	return err;
>  }
>  
>  void xe_bo_unlock(struct xe_bo *bo, struct ww_acquire_ctx *ww)
> diff --git a/drivers/gpu/drm/xe/xe_bo_types.h b/drivers/gpu/drm/xe/xe_bo_types.h
> index f6ee920303af..bc67263c6713 100644
> --- a/drivers/gpu/drm/xe/xe_bo_types.h
> +++ b/drivers/gpu/drm/xe/xe_bo_types.h
> @@ -11,7 +11,6 @@
>  #include <drm/drm_mm.h>
>  #include <drm/ttm/ttm_bo.h>
>  #include <drm/ttm/ttm_device.h>
> -#include <drm/ttm/ttm_execbuf_util.h>
>  #include <drm/ttm/ttm_placement.h>
>  
>  struct xe_device;
> diff --git a/drivers/gpu/drm/xe/xe_exec.c b/drivers/gpu/drm/xe/xe_exec.c
> index ba13d20ed348..32a6e16ec177 100644
> --- a/drivers/gpu/drm/xe/xe_exec.c
> +++ b/drivers/gpu/drm/xe/xe_exec.c
> @@ -6,6 +6,7 @@
>  #include "xe_exec.h"
>  
>  #include <drm/drm_device.h>
> +#include <drm/drm_exec.h>
>  #include <drm/drm_file.h>
>  #include <drm/xe_drm.h>
>  #include <linux/delay.h>
> @@ -95,23 +96,18 @@
>  
>  #define XE_EXEC_BIND_RETRY_TIMEOUT_MS 1000
>  
> -static int xe_exec_begin(struct xe_engine *e, struct ww_acquire_ctx *ww,
> -			 struct ttm_validate_buffer tv_onstack[],
> -			 struct ttm_validate_buffer **tv,
> -			 struct list_head *objs)
> +static int xe_exec_begin(struct xe_engine *e, struct drm_exec *exec)
>  {
>  	struct xe_vm *vm = e->vm;
>  	struct xe_vma *vma;
> -	LIST_HEAD(dups);
>  	ktime_t end = 0;
>  	int err = 0;
>  
> -	*tv = NULL;
>  	if (xe_vm_no_dma_fences(e->vm))
>  		return 0;
>  
>  retry:
> -	err = xe_vm_lock_dma_resv(vm, ww, tv_onstack, tv, objs, true, 1);
> +	err = xe_vm_lock_dma_resv(vm, exec, true, 1);
>  	if (err)
>  		return err;
>  
> @@ -128,8 +124,7 @@ static int xe_exec_begin(struct xe_engine *e, struct ww_acquire_ctx *ww,
>  
>  		err = xe_bo_validate(xe_vma_bo(vma), vm, false);
>  		if (err) {
> -			xe_vm_unlock_dma_resv(vm, tv_onstack, *tv, ww, objs);
> -			*tv = NULL;
> +			xe_vm_unlock_dma_resv(vm, exec);
>  			break;
>  		}
>  	}
> @@ -153,14 +148,10 @@ static int xe_exec_begin(struct xe_engine *e, struct ww_acquire_ctx *ww,
>  	return err;
>  }
>  
> -static void xe_exec_end(struct xe_engine *e,
> -			struct ttm_validate_buffer *tv_onstack,
> -			struct ttm_validate_buffer *tv,
> -			struct ww_acquire_ctx *ww,
> -			struct list_head *objs)
> +static void xe_exec_end(struct xe_engine *e, struct drm_exec *exec)
>  {
>  	if (!xe_vm_no_dma_fences(e->vm))
> -		xe_vm_unlock_dma_resv(e->vm, tv_onstack, tv, ww, objs);
> +		xe_vm_unlock_dma_resv(e->vm, exec);
>  }
>  
>  int xe_exec_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
> @@ -173,14 +164,11 @@ int xe_exec_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
>  	struct xe_engine *engine;
>  	struct xe_sync_entry *syncs = NULL;
>  	u64 addresses[XE_HW_ENGINE_MAX_INSTANCE];
> -	struct ttm_validate_buffer tv_onstack[XE_ONSTACK_TV];
> -	struct ttm_validate_buffer *tv = NULL;
>  	u32 i, num_syncs = 0;
>  	struct xe_sched_job *job;
>  	struct dma_fence *rebind_fence;
>  	struct xe_vm *vm;
> -	struct ww_acquire_ctx ww;
> -	struct list_head objs;
> +	struct drm_exec exec;
>  	bool write_locked;
>  	int err = 0;
>  
> @@ -293,7 +281,7 @@ int xe_exec_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
>  			goto err_unlock_list;
>  	}
>  
> -	err = xe_exec_begin(engine, &ww, tv_onstack, &tv, &objs);
> +	err = xe_exec_begin(engine, &exec);
>  	if (err)
>  		goto err_unlock_list;
>  
> @@ -412,7 +400,7 @@ int xe_exec_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
>  	if (err)
>  		xe_sched_job_put(job);
>  err_engine_end:
> -	xe_exec_end(engine, tv_onstack, tv, &ww, &objs);
> +	xe_exec_end(engine, &exec);
>  err_unlock_list:
>  	if (write_locked)
>  		up_write(&vm->lock);
> diff --git a/drivers/gpu/drm/xe/xe_gt_pagefault.c b/drivers/gpu/drm/xe/xe_gt_pagefault.c
> index 125e4744fa38..94f59c29ba9b 100644
> --- a/drivers/gpu/drm/xe/xe_gt_pagefault.c
> +++ b/drivers/gpu/drm/xe/xe_gt_pagefault.c
> @@ -8,8 +8,8 @@
>  #include <linux/bitfield.h>
>  #include <linux/circ_buf.h>
>  
> +#include <drm/drm_exec.h>
>  #include <drm/drm_managed.h>
> -#include <drm/ttm/ttm_execbuf_util.h>
>  
>  #include "xe_bo.h"
>  #include "xe_gt.h"
> @@ -84,11 +84,6 @@ static bool vma_matches(struct xe_vma *vma, u64 page_addr)
>  	return true;
>  }
>  
> -static bool only_needs_bo_lock(struct xe_bo *bo)
> -{
> -	return bo && bo->vm;
> -}
> -
>  static struct xe_vma *lookup_vma(struct xe_vm *vm, u64 page_addr)
>  {
>  	struct xe_vma *vma = NULL;
> @@ -110,10 +105,7 @@ static int handle_pagefault(struct xe_gt *gt, struct pagefault *pf)
>  	struct xe_vm *vm;
>  	struct xe_vma *vma = NULL;
>  	struct xe_bo *bo;
> -	LIST_HEAD(objs);
> -	LIST_HEAD(dups);
> -	struct ttm_validate_buffer tv_bo, tv_vm;
> -	struct ww_acquire_ctx ww;
> +	struct drm_exec exec;
>  	struct dma_fence *fence;
>  	bool write_locked;
>  	int ret = 0;
> @@ -171,20 +163,7 @@ static int handle_pagefault(struct xe_gt *gt, struct pagefault *pf)
>  
>  	/* Lock VM and BOs dma-resv */
>  	bo = xe_vma_bo(vma);
> -	if (only_needs_bo_lock(bo)) {
> -		/* This path ensures the BO's LRU is updated */
> -		ret = xe_bo_lock(bo, &ww, xe->info.tile_count, false);
> -	} else {
> -		tv_vm.num_shared = xe->info.tile_count;
> -		tv_vm.bo = xe_vm_ttm_bo(vm);
> -		list_add(&tv_vm.head, &objs);
> -		if (bo) {
> -			tv_bo.bo = &bo->ttm;
> -			tv_bo.num_shared = xe->info.tile_count;
> -			list_add(&tv_bo.head, &objs);
> -		}
> -		ret = ttm_eu_reserve_buffers(&ww, &objs, false, &dups);
> -	}
> +	ret = xe_vm_bo_lock(vm, bo, &exec, xe->info.tile_count, false);
>  	if (ret)
>  		goto unlock_vm;
>  
> @@ -227,10 +206,7 @@ static int handle_pagefault(struct xe_gt *gt, struct pagefault *pf)
>  	vma->usm.tile_invalidated &= ~BIT(gt_to_tile(gt)->id);
>  
>  unlock_dma_resv:
> -	if (only_needs_bo_lock(bo))
> -		xe_bo_unlock(bo, &ww);
> -	else
> -		ttm_eu_backoff_reservation(&ww, &objs);
> +	xe_vm_bo_unlock(vm, bo, &exec, true);
>  unlock_vm:
>  	if (!ret)
>  		vm->usm.last_fault_vma = vma;
> @@ -498,10 +474,7 @@ static int handle_acc(struct xe_gt *gt, struct acc *acc)
>  	struct xe_vm *vm;
>  	struct xe_vma *vma;
>  	struct xe_bo *bo;
> -	LIST_HEAD(objs);
> -	LIST_HEAD(dups);
> -	struct ttm_validate_buffer tv_bo, tv_vm;
> -	struct ww_acquire_ctx ww;
> +	struct drm_exec exec;
>  	int ret = 0;
>  
>  	/* We only support ACC_TRIGGER at the moment */
> @@ -534,28 +507,14 @@ static int handle_acc(struct xe_gt *gt, struct acc *acc)
>  
>  	/* Lock VM and BOs dma-resv */
>  	bo = xe_vma_bo(vma);
> -	if (only_needs_bo_lock(bo)) {
> -		/* This path ensures the BO's LRU is updated */
> -		ret = xe_bo_lock(bo, &ww, xe->info.tile_count, false);
> -	} else {
> -		tv_vm.num_shared = xe->info.tile_count;
> -		tv_vm.bo = xe_vm_ttm_bo(vm);
> -		list_add(&tv_vm.head, &objs);
> -		tv_bo.bo = &bo->ttm;
> -		tv_bo.num_shared = xe->info.tile_count;
> -		list_add(&tv_bo.head, &objs);
> -		ret = ttm_eu_reserve_buffers(&ww, &objs, false, &dups);
> -	}
> +	ret = xe_vm_bo_lock(vm, bo, &exec, xe->info.tile_count, false);
>  	if (ret)
>  		goto unlock_vm;
>  
>  	/* Migrate to VRAM, move should invalidate the VMA first */
>  	ret = xe_bo_migrate(bo, XE_PL_VRAM0 + tile->id);
>  
> -	if (only_needs_bo_lock(bo))
> -		xe_bo_unlock(bo, &ww);
> -	else
> -		ttm_eu_backoff_reservation(&ww, &objs);
> +	xe_vm_bo_unlock(vm, bo, &exec, true);
>  unlock_vm:
>  	up_read(&vm->lock);
>  	xe_vm_put(vm);
> diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c
> index 6c216350084b..c89b10d5feb5 100644
> --- a/drivers/gpu/drm/xe/xe_vm.c
> +++ b/drivers/gpu/drm/xe/xe_vm.c
> @@ -7,8 +7,8 @@
>  
>  #include <linux/dma-fence-array.h>
>  
> +#include <drm/drm_exec.h>
>  #include <drm/drm_print.h>
> -#include <drm/ttm/ttm_execbuf_util.h>
>  #include <drm/ttm/ttm_tt.h>
>  #include <drm/xe_drm.h>
>  #include <linux/delay.h>
> @@ -321,11 +321,8 @@ static void resume_and_reinstall_preempt_fences(struct xe_vm *vm)
>  
>  int xe_vm_add_compute_engine(struct xe_vm *vm, struct xe_engine *e)
>  {
> -	struct ttm_validate_buffer tv_onstack[XE_ONSTACK_TV];
> -	struct ttm_validate_buffer *tv;
> -	struct ww_acquire_ctx ww;
> -	struct list_head objs;
>  	struct dma_fence *pfence;
> +	struct drm_exec exec;
>  	int err;
>  	bool wait;
>  
> @@ -333,7 +330,7 @@ int xe_vm_add_compute_engine(struct xe_vm *vm, struct xe_engine *e)
>  
>  	down_write(&vm->lock);
>  
> -	err = xe_vm_lock_dma_resv(vm, &ww, tv_onstack, &tv, &objs, true, 1);
> +	err = xe_vm_lock_dma_resv(vm, &exec, true, 1);
>  	if (err)
>  		goto out_unlock_outer;
>  
> @@ -367,7 +364,7 @@ int xe_vm_add_compute_engine(struct xe_vm *vm, struct xe_engine *e)
>  	up_read(&vm->userptr.notifier_lock);
>  
>  out_unlock:
> -	xe_vm_unlock_dma_resv(vm, tv_onstack, tv, &ww, &objs);
> +	xe_vm_unlock_dma_resv(vm, &exec);
>  out_unlock_outer:
>  	up_write(&vm->lock);
>  
> @@ -397,68 +394,43 @@ int __xe_vm_userptr_needs_repin(struct xe_vm *vm)
>   * xe_vm_lock_dma_resv() - Lock the vm dma_resv object and the dma_resv
>   * objects of the vm's external buffer objects.
>   * @vm: The vm.
> - * @ww: Pointer to a struct ww_acquire_ctx locking context.
> - * @tv_onstack: Array size XE_ONSTACK_TV of storage for the struct
> - * ttm_validate_buffers used for locking.
> - * @tv: Pointer to a pointer that on output contains the actual storage used.
> - * @objs: List head for the buffer objects locked.
> + * @exec: Pointer to a struct drm_exec execution context.
>   * @intr: Whether to lock interruptible.
>   * @num_shared: Number of dma-fence slots to reserve in the locked objects.
>   *
>   * Locks the vm dma-resv objects and all the dma-resv objects of the
> - * buffer objects on the vm external object list. The TTM utilities require
> - * a list of struct ttm_validate_buffers pointing to the actual buffer
> - * objects to lock. Storage for those struct ttm_validate_buffers should
> - * be provided in @tv_onstack, and is typically reserved on the stack
> - * of the caller. If the size of @tv_onstack isn't sufficient, then
> - * storage will be allocated internally using kvmalloc().
> + * buffer objects on the vm external object list using helpers provided
> + * by drm_exec.
>   *
>   * The function performs deadlock handling internally, and after a
>   * successful return the ww locking transaction should be considered
>   * sealed.
>   *
> - * Return: 0 on success, Negative error code on error. In particular if
> - * @intr is set to true, -EINTR or -ERESTARTSYS may be returned. In case
> - * of error, any locking performed has been reverted.
> + * Return: 0 on success, Negative error code on error.
>   */
> -int xe_vm_lock_dma_resv(struct xe_vm *vm, struct ww_acquire_ctx *ww,
> -			struct ttm_validate_buffer *tv_onstack,
> -			struct ttm_validate_buffer **tv,
> -			struct list_head *objs,
> -			bool intr,
> -			unsigned int num_shared)
> -{
> -	struct ttm_validate_buffer *tv_vm, *tv_bo;
> +int xe_vm_lock_dma_resv(struct xe_vm *vm, struct drm_exec *exec,
> +			bool intr, unsigned int num_shared)
> +{
>  	struct xe_vma *vma, *next;
> -	LIST_HEAD(dups);
> +	struct drm_gem_object *obj;
>  	int err;
>  
>  	lockdep_assert_held(&vm->lock);
>  
> -	if (vm->extobj.entries < XE_ONSTACK_TV) {
> -		tv_vm = tv_onstack;
> -	} else {
> -		tv_vm = kvmalloc_array(vm->extobj.entries + 1, sizeof(*tv_vm),
> -				       GFP_KERNEL);
> -		if (!tv_vm)
> -			return -ENOMEM;
> +	drm_exec_init(exec, intr ? DRM_EXEC_INTERRUPTIBLE_WAIT : 0);
> +	drm_exec_until_all_locked(exec) {
> +		err = drm_exec_prepare_obj(exec, xe_vm_gem(vm), num_shared);
> +		drm_exec_retry_on_contention(exec);
> +		if (unlikely(err) && err != -EALREADY)
> +			goto out_err;
> +		list_for_each_entry(vma, &vm->extobj.list, extobj.link) {
> +			obj = &xe_vma_bo(vma)->ttm.base;
> +			err = drm_exec_prepare_obj(exec, obj, num_shared);
> +			drm_exec_retry_on_contention(exec);
> +			if (unlikely(err) && err != -EALREADY)
> +				goto out_err;
> +		}
>  	}
> -	tv_bo = tv_vm + 1;
> -
> -	INIT_LIST_HEAD(objs);
> -	list_for_each_entry(vma, &vm->extobj.list, extobj.link) {
> -		tv_bo->num_shared = num_shared;
> -		tv_bo->bo = &xe_vma_bo(vma)->ttm;
> -
> -		list_add_tail(&tv_bo->head, objs);
> -		tv_bo++;
> -	}
> -	tv_vm->num_shared = num_shared;
> -	tv_vm->bo = xe_vm_ttm_bo(vm);
> -	list_add_tail(&tv_vm->head, objs);
> -	err = ttm_eu_reserve_buffers(ww, objs, intr, &dups);
> -	if (err)
> -		goto out_err;
>  
>  	spin_lock(&vm->notifier.list_lock);
>  	list_for_each_entry_safe(vma, next, &vm->notifier.rebind_list,
> @@ -470,14 +442,10 @@ int xe_vm_lock_dma_resv(struct xe_vm *vm, struct ww_acquire_ctx *ww,
>  			list_move_tail(&vma->rebind_link, &vm->rebind_list);
>  	}
>  	spin_unlock(&vm->notifier.list_lock);
> -
> -	*tv = tv_vm;
>  	return 0;
>  
>  out_err:
> -	if (tv_vm != tv_onstack)
> -		kvfree(tv_vm);
> -
> +	drm_exec_fini(exec);
>  	return err;
>  }
>  
> @@ -485,20 +453,16 @@ int xe_vm_lock_dma_resv(struct xe_vm *vm, struct ww_acquire_ctx *ww,
>   * xe_vm_unlock_dma_resv() - Unlock reservation objects locked by
>   * xe_vm_lock_dma_resv()
>   * @vm: The vm.
> - * @tv_onstack: The @tv_onstack array given to xe_vm_lock_dma_resv().
> - * @tv: The value of *@tv given by xe_vm_lock_dma_resv().
> - * @ww: The ww_acquire_context used for locking.
> - * @objs: The list returned from xe_vm_lock_dma_resv().
> + * @exec: The @drm_exec given to xe_vm_lock_dma_resv().
>   *
>   * Unlocks the reservation objects and frees any memory allocated by
>   * xe_vm_lock_dma_resv().
>   */
> -void xe_vm_unlock_dma_resv(struct xe_vm *vm,
> -			   struct ttm_validate_buffer *tv_onstack,
> -			   struct ttm_validate_buffer *tv,
> -			   struct ww_acquire_ctx *ww,
> -			   struct list_head *objs)
> +void xe_vm_unlock_dma_resv(struct xe_vm *vm, struct drm_exec *exec)
>  {
> +	struct drm_gem_object *obj, *skip = xe_vm_gem(vm);
> +	unsigned long index;
> +
>  	/*
>  	 * Nothing should've been able to enter the list while we were locked,
>  	 * since we've held the dma-resvs of all the vm's external objects,
> @@ -507,9 +471,13 @@ void xe_vm_unlock_dma_resv(struct xe_vm *vm,
>  	 */
>  	XE_WARN_ON(!list_empty(&vm->notifier.rebind_list));
>  
> -	ttm_eu_backoff_reservation(ww, objs);
> -	if (tv && tv != tv_onstack)
> -		kvfree(tv);
> +	drm_exec_for_each_locked_object(exec, index, obj) {
> +		struct xe_bo *bo = gem_to_xe_bo(obj);
> +
> +		if (obj != skip)
> +			ttm_bo_move_to_lru_tail_unlocked(&bo->ttm);
> +	}
> +	drm_exec_fini(exec);
>  }
>  
>  #define XE_VM_REBIND_RETRY_TIMEOUT_MS 1000
> @@ -536,10 +504,7 @@ static void preempt_rebind_work_func(struct work_struct *w)
>  {
>  	struct xe_vm *vm = container_of(w, struct xe_vm, preempt.rebind_work);
>  	struct xe_vma *vma;
> -	struct ttm_validate_buffer tv_onstack[XE_ONSTACK_TV];
> -	struct ttm_validate_buffer *tv;
> -	struct ww_acquire_ctx ww;
> -	struct list_head objs;
> +	struct drm_exec exec;
>  	struct dma_fence *rebind_fence;
>  	unsigned int fence_count = 0;
>  	LIST_HEAD(preempt_fences);
> @@ -582,8 +547,7 @@ static void preempt_rebind_work_func(struct work_struct *w)
>  			goto out_unlock_outer;
>  	}
>  
> -	err = xe_vm_lock_dma_resv(vm, &ww, tv_onstack, &tv, &objs,
> -				  false, vm->preempt.num_engines);
> +	err = xe_vm_lock_dma_resv(vm, &exec, false, vm->preempt.num_engines);
>  	if (err)
>  		goto out_unlock_outer;
>  
> @@ -662,7 +626,7 @@ static void preempt_rebind_work_func(struct work_struct *w)
>  	up_read(&vm->userptr.notifier_lock);
>  
>  out_unlock:
> -	xe_vm_unlock_dma_resv(vm, tv_onstack, tv, &ww, &objs);
> +	xe_vm_unlock_dma_resv(vm, &exec);
>  out_unlock_outer:
>  	if (err == -EAGAIN) {
>  		trace_xe_vm_rebind_worker_retry(vm);
> @@ -1105,27 +1069,17 @@ static void xe_vma_destroy(struct xe_vma *vma, struct dma_fence *fence)
>  
>  static void xe_vma_destroy_unlocked(struct xe_vma *vma)
>  {
> -	struct ttm_validate_buffer tv[2];
> -	struct ww_acquire_ctx ww;
> +	struct xe_vm *vm = xe_vma_vm(vma);
>  	struct xe_bo *bo = xe_vma_bo(vma);
> -	LIST_HEAD(objs);
> -	LIST_HEAD(dups);
> +	struct drm_exec exec;
>  	int err;
>  
> -	memset(tv, 0, sizeof(tv));
> -	tv[0].bo = xe_vm_ttm_bo(xe_vma_vm(vma));
> -	list_add(&tv[0].head, &objs);
> -
> -	if (bo) {
> -		tv[1].bo = &xe_bo_get(bo)->ttm;
> -		list_add(&tv[1].head, &objs);
> -	}
> -	err = ttm_eu_reserve_buffers(&ww, &objs, false, &dups);
> +	err = xe_vm_bo_lock(vm, xe_bo_get(bo), &exec, 0, false);
>  	XE_WARN_ON(err);
>  
>  	xe_vma_destroy(vma, NULL);
>  
> -	ttm_eu_backoff_reservation(&ww, &objs);
> +	xe_vm_bo_unlock(vm, bo, &exec, false);
>  	if (bo)
>  		xe_bo_put(bo);
>  }
> @@ -2125,21 +2079,6 @@ static int xe_vm_prefetch(struct xe_vm *vm, struct xe_vma *vma,
>  
>  #define VM_BIND_OP(op)	(op & 0xffff)
>  
> -struct ttm_buffer_object *xe_vm_ttm_bo(struct xe_vm *vm)
> -{
> -	int idx = vm->flags & XE_VM_FLAG_MIGRATION ?
> -		XE_VM_FLAG_GT_ID(vm->flags) : 0;
> -
> -	/* Safe to use index 0 as all BO in the VM share a single dma-resv lock */
> -	return &vm->pt_root[idx]->bo->ttm;
> -}
> -
> -static void xe_vm_tv_populate(struct xe_vm *vm, struct ttm_validate_buffer *tv)
> -{
> -	tv->num_shared = 1;
> -	tv->bo = xe_vm_ttm_bo(vm);
> -}
> -
>  static void vm_set_async_error(struct xe_vm *vm, int err)
>  {
>  	lockdep_assert_held(&vm->lock);
> @@ -2577,17 +2516,12 @@ static int xe_vma_op_commit(struct xe_vm *vm, struct xe_vma_op *op)
>  static int __xe_vma_op_execute(struct xe_vm *vm, struct xe_vma *vma,
>  			       struct xe_vma_op *op)
>  {
> -	LIST_HEAD(objs);
> -	LIST_HEAD(dups);
> -	struct ttm_validate_buffer tv_bo, tv_vm;
> -	struct ww_acquire_ctx ww;
>  	struct xe_bo *vbo;
> +	struct drm_exec exec;
>  	int err;
>  
>  	lockdep_assert_held_write(&vm->lock);
>  
> -	xe_vm_tv_populate(vm, &tv_vm);
> -	list_add_tail(&tv_vm.head, &objs);
>  	vbo = xe_vma_bo(vma);
>  	if (vbo) {
>  		/*
> @@ -2596,16 +2530,10 @@ static int __xe_vma_op_execute(struct xe_vm *vm, struct xe_vma *vma,
>  		 * take a reference here.
>  		 */
>  		xe_bo_get(vbo);
> -
> -		if (!vbo->vm) {
> -			tv_bo.bo = &vbo->ttm;
> -			tv_bo.num_shared = 1;
> -			list_add(&tv_bo.head, &objs);
> -		}
>  	}
>  
>  again:
> -	err = ttm_eu_reserve_buffers(&ww, &objs, true, &dups);
> +	err = xe_vm_bo_lock(vm, vbo, &exec, 1, true);
>  	if (err) {
>  		xe_bo_put(vbo);
>  		return err;
> @@ -2687,7 +2615,7 @@ static int __xe_vma_op_execute(struct xe_vm *vm, struct xe_vma *vma,
>  		XE_BUG_ON("NOT POSSIBLE");
>  	}
>  
> -	ttm_eu_backoff_reservation(&ww, &objs);
> +	xe_vm_bo_unlock(vm, vbo, &exec, false);
>  	if (err == -EAGAIN && xe_vma_is_userptr(vma)) {
>  		lockdep_assert_held_write(&vm->lock);
>  		err = xe_vma_userptr_pin_pages(vma);
> @@ -3338,24 +3266,47 @@ int xe_vm_bind_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
>  	return err == -ENODATA ? 0 : err;
>  }
>  
> +struct drm_gem_object *xe_vm_gem(struct xe_vm *vm)
> +{
> +	int idx = vm->flags & XE_VM_FLAG_MIGRATION ?
> +		XE_VM_FLAG_GT_ID(vm->flags) : 0;
> +
> +	/* Safe to use index 0 as all BO in the VM share a single dma-resv lock */
> +	return &vm->pt_root[idx]->bo->ttm.base;
> +}
> +
>  /*
> - * XXX: Using the TTM wrappers for now, likely can call into dma-resv code
> - * directly to optimize. Also this likely should be an inline function.
> + * XXX: This likely should be an inline function.
>   */
>  int xe_vm_lock(struct xe_vm *vm, struct ww_acquire_ctx *ww,
>  	       int num_resv, bool intr)
>  {
> -	struct ttm_validate_buffer tv_vm;
> -	LIST_HEAD(objs);
> -	LIST_HEAD(dups);
> +	struct dma_resv *obj;
> +	int err;
>  
>  	XE_BUG_ON(!ww);
>  
> -	tv_vm.num_shared = num_resv;
> -	tv_vm.bo = xe_vm_ttm_bo(vm);;
> -	list_add_tail(&tv_vm.head, &objs);
> +	obj = xe_vm_gem(vm)->resv;
> +	ww_acquire_init(ww, &reservation_ww_class);
> +
> +	if (intr)
> +		err = dma_resv_lock_interruptible(obj, ww);
> +	else
> +		err = dma_resv_lock(obj, ww);
> +
> +	if (unlikely(err))
> +		return err;
> +
> +	num_resv = max(num_resv, 1);

Same question as above here.

Matt

> +	err = dma_resv_reserve_fences(obj, num_resv);
> +	if (err)
> +		goto out_err;
>  
> -	return ttm_eu_reserve_buffers(ww, &objs, intr, &dups);
> +	return 0;
> +
> +out_err:
> +	dma_resv_unlock(&vm->resv);
> +	return err;
>  }
>  
>  void xe_vm_unlock(struct xe_vm *vm, struct ww_acquire_ctx *ww)
> @@ -3364,6 +3315,43 @@ void xe_vm_unlock(struct xe_vm *vm, struct ww_acquire_ctx *ww)
>  	ww_acquire_fini(ww);
>  }
>  
> +int xe_vm_bo_lock(struct xe_vm *vm, struct xe_bo *bo, struct drm_exec *exec,
> +		  int num_resv, bool intr)
> +{
> +	int err;
> +
> +	drm_exec_init(exec, intr ? DRM_EXEC_INTERRUPTIBLE_WAIT : 0);
> +	drm_exec_until_all_locked(exec) {
> +		err = drm_exec_prepare_obj(exec, xe_vm_gem(vm),
> +					   num_resv);
> +		drm_exec_retry_on_contention(exec);
> +		if (err && err != -EALREADY)
> +			goto out_err;
> +
> +		if (bo && !bo->vm) {
> +			err = drm_exec_prepare_obj(exec, &bo->ttm.base,
> +						   num_resv);
> +			drm_exec_retry_on_contention(exec);
> +			if (err && err != -EALREADY)
> +				goto out_err;
> +		}
> +	}
> +
> +	return 0;
> +
> +out_err:
> +	drm_exec_fini(exec);
> +	return err;
> +}
> +
> +void xe_vm_bo_unlock(struct xe_vm *vm, struct xe_bo *bo, struct drm_exec *exec,
> +		     bool lru_update)
> +{
> +	if (lru_update && bo && (!bo->vm || xe_vm_no_dma_fences(vm)))
> +		ttm_bo_move_to_lru_tail_unlocked(&bo->ttm);
> +	drm_exec_fini(exec);
> +}
> +
>  /**
>   * xe_vm_invalidate_vma - invalidate GPU mappings for VMA without a lock
>   * @vma: VMA to invalidate
> diff --git a/drivers/gpu/drm/xe/xe_vm.h b/drivers/gpu/drm/xe/xe_vm.h
> index eaf11ac8ff51..62a062bd5078 100644
> --- a/drivers/gpu/drm/xe/xe_vm.h
> +++ b/drivers/gpu/drm/xe/xe_vm.h
> @@ -12,6 +12,7 @@
>  #include "xe_vm_types.h"
>  
>  struct drm_device;
> +struct drm_exec;
>  struct drm_printer;
>  struct drm_file;
>  
> @@ -39,11 +40,17 @@ static inline void xe_vm_put(struct xe_vm *vm)
>  	kref_put(&vm->refcount, xe_vm_free);
>  }
>  
> +struct drm_gem_object *xe_vm_gem(struct xe_vm *vm);
> +
>  int xe_vm_lock(struct xe_vm *vm, struct ww_acquire_ctx *ww,
>  	       int num_resv, bool intr);
> -
>  void xe_vm_unlock(struct xe_vm *vm, struct ww_acquire_ctx *ww);
>  
> +int xe_vm_bo_lock(struct xe_vm *vm, struct xe_bo *bo, struct drm_exec *exec,
> +		  int num_resv, bool intr);
> +void xe_vm_bo_unlock(struct xe_vm *vm, struct xe_bo *bo, struct drm_exec *exec,
> +		     bool lru_update);
> +
>  static inline bool xe_vm_is_closed(struct xe_vm *vm)
>  {
>  	/* Only guaranteed not to change when vm->lock is held */
> @@ -183,8 +190,6 @@ int xe_vm_async_fence_wait_start(struct dma_fence *fence);
>  
>  extern struct ttm_device_funcs xe_ttm_funcs;
>  
> -struct ttm_buffer_object *xe_vm_ttm_bo(struct xe_vm *vm);
> -
>  /**
>   * xe_vm_reactivate_rebind() - Reactivate the rebind functionality on compute
>   * vms.
> @@ -212,23 +217,10 @@ static inline void xe_vm_queue_rebind_worker(struct xe_vm *vm)
>  	queue_work(vm->xe->ordered_wq, &vm->preempt.rebind_work);
>  }
>  
> -/*
> - * XE_ONSTACK_TV is used to size the tv_onstack array that is input
> - * to xe_vm_lock_dma_resv() and xe_vm_unlock_dma_resv().
> - */
> -#define XE_ONSTACK_TV 20
> -int xe_vm_lock_dma_resv(struct xe_vm *vm, struct ww_acquire_ctx *ww,
> -			struct ttm_validate_buffer *tv_onstack,
> -			struct ttm_validate_buffer **tv,
> -			struct list_head *objs,
> -			bool intr,
> -			unsigned int num_shared);
> -
> -void xe_vm_unlock_dma_resv(struct xe_vm *vm,
> -			   struct ttm_validate_buffer *tv_onstack,
> -			   struct ttm_validate_buffer *tv,
> -			   struct ww_acquire_ctx *ww,
> -			   struct list_head *objs);
> +int xe_vm_lock_dma_resv(struct xe_vm *vm, struct drm_exec *exec,
> +			bool intr, unsigned int num_shared);
> +
> +void xe_vm_unlock_dma_resv(struct xe_vm *vm, struct drm_exec *exec);
>  
>  void xe_vm_fence_all_extobjs(struct xe_vm *vm, struct dma_fence *fence,
>  			     enum dma_resv_usage usage);
> -- 
> 2.34.1
> 

  reply	other threads:[~2023-07-13 14:28 UTC|newest]

Thread overview: 14+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2023-07-13 13:17 [Intel-xe] [PATCH v2 0/2] drm/xe: switch to using drm_exec Francois Dugast
2023-07-13 13:17 ` [Intel-xe] [PATCH v2 1/2] drm: execution context for GEM buffers v7 Francois Dugast
2023-07-13 13:17 ` [Intel-xe] [PATCH v2 2/2] drm/xe: switch to using drm_exec Francois Dugast
2023-07-13 14:00   ` Matthew Brost [this message]
2023-07-13 14:17     ` Francois Dugast
2023-07-13 14:35     ` Christian König
2023-07-13 18:13       ` Matthew Brost
2023-07-14  6:44         ` Christian König
2023-07-13 14:18 ` [Intel-xe] ✓ CI.Patch_applied: success for " Patchwork
2023-07-13 14:19 ` [Intel-xe] ✗ CI.checkpatch: warning " Patchwork
2023-07-13 14:20 ` [Intel-xe] ✓ CI.KUnit: success " Patchwork
2023-07-13 14:24 ` [Intel-xe] ✓ CI.Build: " Patchwork
2023-07-13 14:24 ` [Intel-xe] ✓ CI.Hooks: " Patchwork
2023-07-13 14:25 ` [Intel-xe] ✓ CI.checksparse: " Patchwork

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=ZLADblgo7aS6fTNi@DUT025-TGLU \
    --to=matthew.brost@intel.com \
    --cc=christian.koenig@amd.com \
    --cc=francois.dugast@intel.com \
    --cc=intel-xe@lists.freedesktop.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.