Re: [Intel-xe] [PATCH v2 17/31] drm/xe: NULL binding implementation

Intel-XE Archive on lore.kernel.org
 help / color / mirror / Atom feed

From: Rodrigo Vivi <rodrigo.vivi@intel.com>
To: Matthew Brost <matthew.brost@intel.com>
Cc: intel-xe@lists.freedesktop.org
Subject: Re: [Intel-xe] [PATCH v2 17/31] drm/xe: NULL binding implementation
Date: Tue, 9 May 2023 10:34:52 -0400	[thread overview]
Message-ID: <ZFpaDFSZuDqtTbKH@intel.com> (raw)
In-Reply-To: <20230502001727.3211096-18-matthew.brost@intel.com>

On Mon, May 01, 2023 at 05:17:13PM -0700, Matthew Brost wrote:
> Add uAPI and implementation for NULL bindings. A NULL binding is defined
> as writes dropped and read zero. A single bit in the uAPI has been added
> which results in a single bit in the PTEs being set.

I have confirmed in the spec that this is the case for the BIT 9!

"If Null=1, the h/w will avoid the memory access and return all
zero's for the read access with a null completion, write accesses are dropped"

The code looks good, but just a few questions / comments below.

> 
> NULL bindings are indended to be used to implement VK sparse bindings.

is there any example available or any documentation that could explain
how this is used and why this is needed?

any IGT?

> 
> Signed-off-by: Matthew Brost <matthew.brost@intel.com>
> ---
>  drivers/gpu/drm/xe/xe_bo.h           |  1 +
>  drivers/gpu/drm/xe/xe_exec.c         |  2 +
>  drivers/gpu/drm/xe/xe_gt_pagefault.c |  4 +-
>  drivers/gpu/drm/xe/xe_pt.c           | 77 ++++++++++++++++-------
>  drivers/gpu/drm/xe/xe_vm.c           | 92 ++++++++++++++++++----------
>  drivers/gpu/drm/xe/xe_vm.h           | 10 +++
>  drivers/gpu/drm/xe/xe_vm_madvise.c   |  2 +-
>  drivers/gpu/drm/xe/xe_vm_types.h     |  3 +
>  include/uapi/drm/xe_drm.h            |  8 +++
>  9 files changed, 144 insertions(+), 55 deletions(-)
> 
> diff --git a/drivers/gpu/drm/xe/xe_bo.h b/drivers/gpu/drm/xe/xe_bo.h
> index 25457b3c757b..81051f456874 100644
> --- a/drivers/gpu/drm/xe/xe_bo.h
> +++ b/drivers/gpu/drm/xe/xe_bo.h
> @@ -56,6 +56,7 @@
>  #define XE_PDE_IPS_64K			BIT_ULL(11)
>  
>  #define XE_GGTT_PTE_LM			BIT_ULL(1)
> +#define XE_PTE_NULL			BIT_ULL(9)
>  #define XE_USM_PPGTT_PTE_AE		BIT_ULL(10)
>  #define XE_PPGTT_PTE_LM			BIT_ULL(11)
>  #define XE_PDE_64K			BIT_ULL(6)
> diff --git a/drivers/gpu/drm/xe/xe_exec.c b/drivers/gpu/drm/xe/xe_exec.c
> index 90c46d092737..68f876afd13c 100644
> --- a/drivers/gpu/drm/xe/xe_exec.c
> +++ b/drivers/gpu/drm/xe/xe_exec.c
> @@ -116,6 +116,8 @@ static int xe_exec_begin(struct xe_engine *e, struct ww_acquire_ctx *ww,
>  	 * to a location where the GPU can access it).
>  	 */
>  	list_for_each_entry(vma, &vm->rebind_list, rebind_link) {
> +		XE_BUG_ON(xe_vma_is_null(vma));

Can we avoid BUG here? Maybe a WARN?

> +
>  		if (xe_vma_is_userptr(vma))
>  			continue;
>  
> diff --git a/drivers/gpu/drm/xe/xe_gt_pagefault.c b/drivers/gpu/drm/xe/xe_gt_pagefault.c
> index f7a066090a13..cfffe3398fe4 100644
> --- a/drivers/gpu/drm/xe/xe_gt_pagefault.c
> +++ b/drivers/gpu/drm/xe/xe_gt_pagefault.c
> @@ -526,8 +526,8 @@ static int handle_acc(struct xe_gt *gt, struct acc *acc)
>  
>  	trace_xe_vma_acc(vma);
>  
> -	/* Userptr can't be migrated, nothing to do */
> -	if (xe_vma_is_userptr(vma))
> +	/* Userptr or null can't be migrated, nothing to do */
> +	if (xe_vma_has_no_bo(vma))
>  		goto unlock_vm;
>  
>  	/* Lock VM and BOs dma-resv */
> diff --git a/drivers/gpu/drm/xe/xe_pt.c b/drivers/gpu/drm/xe/xe_pt.c
> index 2b5b05a8a084..b4edb751bfbb 100644
> --- a/drivers/gpu/drm/xe/xe_pt.c
> +++ b/drivers/gpu/drm/xe/xe_pt.c
> @@ -82,7 +82,9 @@ u64 gen8_pde_encode(struct xe_bo *bo, u64 bo_offset,
>  static dma_addr_t vma_addr(struct xe_vma *vma, u64 offset,
>  			   size_t page_size, bool *is_vram)
>  {
> -	if (xe_vma_is_userptr(vma)) {
> +	if (xe_vma_is_null(vma)) {
> +		return 0;
> +	} else if (xe_vma_is_userptr(vma)) {
>  		struct xe_res_cursor cur;
>  		u64 page;
>  
> @@ -563,6 +565,10 @@ static bool xe_pt_hugepte_possible(u64 addr, u64 next, unsigned int level,
>  	if (next - xe_walk->va_curs_start > xe_walk->curs->size)
>  		return false;
>  
> +	/* null VMA's do not have dma adresses */
> +	if (xe_walk->pte_flags & XE_PTE_NULL)
> +		return true;
> +
>  	/* Is the DMA address huge PTE size aligned? */
>  	size = next - addr;
>  	dma = addr - xe_walk->va_curs_start + xe_res_dma(xe_walk->curs);
> @@ -585,6 +591,10 @@ xe_pt_scan_64K(u64 addr, u64 next, struct xe_pt_stage_bind_walk *xe_walk)
>  	if (next > xe_walk->l0_end_addr)
>  		return false;
>  
> +	/* null VMA's do not have dma adresses */
> +	if (xe_walk->pte_flags & XE_PTE_NULL)
> +		return true;
> +
>  	xe_res_next(&curs, addr - xe_walk->va_curs_start);
>  	for (; addr < next; addr += SZ_64K) {
>  		if (!IS_ALIGNED(xe_res_dma(&curs), SZ_64K) || curs.size < SZ_64K)
> @@ -630,17 +640,34 @@ xe_pt_stage_bind_entry(struct drm_pt *parent, pgoff_t offset,
>  	struct xe_pt *xe_child;
>  	bool covers;
>  	int ret = 0;
> -	u64 pte;
> +	u64 pte = 0;
>  
>  	/* Is this a leaf entry ?*/
>  	if (level == 0 || xe_pt_hugepte_possible(addr, next, level, xe_walk)) {
>  		struct xe_res_cursor *curs = xe_walk->curs;
> +		bool null = xe_walk->pte_flags & XE_PTE_NULL;
>  
>  		XE_WARN_ON(xe_walk->va_curs_start != addr);
>  
> -		pte = __gen8_pte_encode(xe_res_dma(curs) + xe_walk->dma_offset,
> -					xe_walk->cache, xe_walk->pte_flags,
> -					level);
> +		if (null) {
> +			pte |= XE_PAGE_PRESENT | XE_PAGE_RW;
> +
> +			if (unlikely(xe_walk->pte_flags & XE_PTE_READ_ONLY))
> +				pte &= ~XE_PAGE_RW;
> +
> +			if (level == 1)
> +				pte |= XE_PDE_PS_2M;
> +			else if (level == 2)
> +				pte |= XE_PDPE_PS_1G;
> +
> +			pte |= XE_PTE_NULL;
> +		} else {
> +			pte = __gen8_pte_encode(xe_res_dma(curs) +
> +						xe_walk->dma_offset,
> +						xe_walk->cache,
> +						xe_walk->pte_flags,
> +						level);
> +		}
>  		pte |= xe_walk->default_pte;
>  
>  		/*
> @@ -658,7 +685,8 @@ xe_pt_stage_bind_entry(struct drm_pt *parent, pgoff_t offset,
>  		if (unlikely(ret))
>  			return ret;
>  
> -		xe_res_next(curs, next - addr);
> +		if (!null)
> +			xe_res_next(curs, next - addr);
>  		xe_walk->va_curs_start = next;
>  		*action = ACTION_CONTINUE;
>  
> @@ -751,7 +779,8 @@ xe_pt_stage_bind(struct xe_gt *gt, struct xe_vma *vma,
>  		.gt = gt,
>  		.curs = &curs,
>  		.va_curs_start = xe_vma_start(vma),
> -		.pte_flags = xe_vma_read_only(vma) ? XE_PTE_READ_ONLY : 0,
> +		.pte_flags = xe_vma_read_only(vma) ? XE_PTE_READ_ONLY : 0 |
> +			xe_vma_is_null(vma) ? XE_PTE_NULL : 0,
>  		.wupd.entries = entries,
>  		.needs_64K = (xe_vma_vm(vma)->flags & XE_VM_FLAGS_64K) &&
>  			is_vram,
> @@ -769,23 +798,28 @@ xe_pt_stage_bind(struct xe_gt *gt, struct xe_vma *vma,
>  			gt_to_xe(gt)->mem.vram.io_start;
>  		xe_walk.cache = XE_CACHE_WB;
>  	} else {
> -		if (!xe_vma_is_userptr(vma) && bo->flags & XE_BO_SCANOUT_BIT)
> +		if (!xe_vma_has_no_bo(vma) && bo->flags & XE_BO_SCANOUT_BIT)
>  			xe_walk.cache = XE_CACHE_WT;
>  		else
>  			xe_walk.cache = XE_CACHE_WB;
>  	}
> -	if (!xe_vma_is_userptr(vma) && xe_bo_is_stolen(bo))
> +	if (!xe_vma_has_no_bo(vma) && xe_bo_is_stolen(bo))
>  		xe_walk.dma_offset = xe_ttm_stolen_gpu_offset(xe_bo_device(bo));
>  
>  	xe_bo_assert_held(bo);
> -	if (xe_vma_is_userptr(vma))
> -		xe_res_first_sg(vma->userptr.sg, 0, xe_vma_size(vma), &curs);
> -	else if (xe_bo_is_vram(bo) || xe_bo_is_stolen(bo))
> -		xe_res_first(bo->ttm.resource, xe_vma_bo_offset(vma),
> -			     xe_vma_size(vma), &curs);
> -	else
> -		xe_res_first_sg(xe_bo_get_sg(bo), xe_vma_bo_offset(vma),
> -				xe_vma_size(vma), &curs);
> +	if (!xe_vma_is_null(vma)) {
> +		if (xe_vma_is_userptr(vma))
> +			xe_res_first_sg(vma->userptr.sg, 0, xe_vma_size(vma),
> +					&curs);
> +		else if (xe_bo_is_vram(bo) || xe_bo_is_stolen(bo))
> +			xe_res_first(bo->ttm.resource, xe_vma_bo_offset(vma),
> +				     xe_vma_size(vma), &curs);
> +		else
> +			xe_res_first_sg(xe_bo_get_sg(bo), xe_vma_bo_offset(vma),
> +					xe_vma_size(vma), &curs);
> +	} else {
> +		curs.size = xe_vma_size(vma);
> +	}
>  
>  	ret = drm_pt_walk_range(&pt->drm, pt->level, xe_vma_start(vma),
>  				xe_vma_end(vma), &xe_walk.drm);
> @@ -979,7 +1013,7 @@ static void xe_pt_commit_locks_assert(struct xe_vma *vma)
>  
>  	if (xe_vma_is_userptr(vma))
>  		lockdep_assert_held_read(&vm->userptr.notifier_lock);
> -	else
> +	else if (!xe_vma_is_null(vma))
>  		dma_resv_assert_held(xe_vma_bo(vma)->ttm.base.resv);
>  
>  	dma_resv_assert_held(&vm->resv);
> @@ -1283,7 +1317,8 @@ __xe_pt_bind_vma(struct xe_gt *gt, struct xe_vma *vma, struct xe_engine *e,
>  	struct xe_vm_pgtable_update entries[XE_VM_MAX_LEVEL * 2 + 1];
>  	struct xe_pt_migrate_pt_update bind_pt_update = {
>  		.base = {
> -			.ops = xe_vma_is_userptr(vma) ? &userptr_bind_ops : &bind_ops,
> +			.ops = xe_vma_is_userptr(vma) ? &userptr_bind_ops :
> +				&bind_ops,
>  			.vma = vma,
>  		},
>  		.bind = true,
> @@ -1348,7 +1383,7 @@ __xe_pt_bind_vma(struct xe_gt *gt, struct xe_vma *vma, struct xe_engine *e,
>  				   DMA_RESV_USAGE_KERNEL :
>  				   DMA_RESV_USAGE_BOOKKEEP);
>  
> -		if (!xe_vma_is_userptr(vma) && !xe_vma_bo(vma)->vm)
> +		if (!xe_vma_has_no_bo(vma) && !xe_vma_bo(vma)->vm)
>  			dma_resv_add_fence(xe_vma_bo(vma)->ttm.base.resv, fence,
>  					   DMA_RESV_USAGE_BOOKKEEP);
>  		xe_pt_commit_bind(vma, entries, num_entries, rebind,
> @@ -1667,7 +1702,7 @@ __xe_pt_unbind_vma(struct xe_gt *gt, struct xe_vma *vma, struct xe_engine *e,
>  				   DMA_RESV_USAGE_BOOKKEEP);
>  
>  		/* This fence will be installed by caller when doing eviction */
> -		if (!xe_vma_is_userptr(vma) && !xe_vma_bo(vma)->vm)
> +		if (!xe_vma_has_no_bo(vma) && !xe_vma_bo(vma)->vm)
>  			dma_resv_add_fence(xe_vma_bo(vma)->ttm.base.resv, fence,
>  					   DMA_RESV_USAGE_BOOKKEEP);
>  		xe_pt_commit_unbind(vma, entries, num_entries,
> diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c
> index f3608865e259..a46f44ab2546 100644
> --- a/drivers/gpu/drm/xe/xe_vm.c
> +++ b/drivers/gpu/drm/xe/xe_vm.c
> @@ -60,6 +60,7 @@ int xe_vma_userptr_pin_pages(struct xe_vma *vma)
>  
>  	lockdep_assert_held(&vm->lock);
>  	XE_BUG_ON(!xe_vma_is_userptr(vma));
> +	XE_BUG_ON(xe_vma_is_null(vma));
>  retry:
>  	if (vma->gpuva.flags & XE_VMA_DESTROYED)
>  		return 0;
> @@ -581,7 +582,7 @@ static void preempt_rebind_work_func(struct work_struct *w)
>  		goto out_unlock;
>  
>  	list_for_each_entry(vma, &vm->rebind_list, rebind_link) {
> -		if (xe_vma_is_userptr(vma) ||
> +		if (xe_vma_has_no_bo(vma) ||
>  		    vma->gpuva.flags & XE_VMA_DESTROYED)
>  			continue;
>  
> @@ -813,7 +814,7 @@ static struct xe_vma *xe_vma_create(struct xe_vm *vm,
>  				    struct xe_bo *bo,
>  				    u64 bo_offset_or_userptr,
>  				    u64 start, u64 end,
> -				    bool read_only,
> +				    bool read_only, bool null,
>  				    u64 gt_mask)
>  {
>  	struct xe_vma *vma;
> @@ -843,6 +844,8 @@ static struct xe_vma *xe_vma_create(struct xe_vm *vm,
>  	vma->gpuva.va.range = end - start + 1;
>  	if (read_only)
>  		vma->gpuva.flags |= XE_VMA_READ_ONLY;
> +	if (null)
> +		vma->gpuva.flags |= XE_VMA_NULL;
>  
>  	if (gt_mask) {
>  		vma->gt_mask = gt_mask;
> @@ -862,23 +865,26 @@ static struct xe_vma *xe_vma_create(struct xe_vm *vm,
>  		vma->gpuva.gem.obj = &bo->ttm.base;
>  		vma->gpuva.gem.offset = bo_offset_or_userptr;
>  		drm_gpuva_link(&vma->gpuva);
> -	} else /* userptr */ {
> -		u64 size = end - start + 1;
> -		int err;
> -
> -		vma->gpuva.gem.offset = bo_offset_or_userptr;
> +	} else /* userptr or null */ {
> +		if (!null) {
> +			u64 size = end - start + 1;
> +			int err;
> +
> +			vma->gpuva.gem.offset = bo_offset_or_userptr;
> +			err = mmu_interval_notifier_insert(&vma->userptr.notifier,
> +							   current->mm,
> +							   xe_vma_userptr(vma),
> +							   size,
> +							   &vma_userptr_notifier_ops);
> +			if (err) {
> +				kfree(vma);
> +				vma = ERR_PTR(err);
> +				return vma;
> +			}
>  
> -		err = mmu_interval_notifier_insert(&vma->userptr.notifier,
> -						   current->mm,
> -						   xe_vma_userptr(vma), size,
> -						   &vma_userptr_notifier_ops);
> -		if (err) {
> -			kfree(vma);
> -			vma = ERR_PTR(err);
> -			return vma;
> +			vma->userptr.notifier_seq = LONG_MAX;
>  		}
>  
> -		vma->userptr.notifier_seq = LONG_MAX;
>  		xe_vm_get(vm);
>  	}
>  
> @@ -916,6 +922,8 @@ static void xe_vma_destroy_late(struct xe_vma *vma)
>  		 */
>  		mmu_interval_notifier_remove(&vma->userptr.notifier);
>  		xe_vm_put(vm);
> +	} else if (xe_vma_is_null(vma)) {
> +		xe_vm_put(vm);
>  	} else {
>  		xe_bo_put(xe_vma_bo(vma));
>  	}
> @@ -954,7 +962,7 @@ static void xe_vma_destroy(struct xe_vma *vma, struct dma_fence *fence)
>  		list_del_init(&vma->userptr.invalidate_link);
>  		spin_unlock(&vm->userptr.invalidated_lock);
>  		list_del(&vma->userptr_link);
> -	} else {
> +	} else if (!xe_vma_is_null(vma)) {
>  		xe_bo_assert_held(xe_vma_bo(vma));
>  		drm_gpuva_unlink(&vma->gpuva);
>  		if (!xe_vma_bo(vma)->vm)
> @@ -1305,7 +1313,7 @@ void xe_vm_close_and_put(struct xe_vm *vm)
>  	drm_gpuva_iter_for_each(gpuva, it) {
>  		vma = gpuva_to_vma(gpuva);
>  
> -		if (xe_vma_is_userptr(vma)) {
> +		if (xe_vma_has_no_bo(vma)) {
>  			down_read(&vm->userptr.notifier_lock);
>  			vma->gpuva.flags |= XE_VMA_DESTROYED;
>  			up_read(&vm->userptr.notifier_lock);
> @@ -1315,7 +1323,7 @@ void xe_vm_close_and_put(struct xe_vm *vm)
>  		drm_gpuva_iter_remove(&it);
>  
>  		/* easy case, remove from VMA? */
> -		if (xe_vma_is_userptr(vma) || xe_vma_bo(vma)->vm) {
> +		if (xe_vma_has_no_bo(vma) || xe_vma_bo(vma)->vm) {
>  			xe_vma_destroy(vma, NULL);
>  			continue;
>  		}
> @@ -1964,7 +1972,7 @@ static int xe_vm_prefetch(struct xe_vm *vm, struct xe_vma *vma,
>  
>  	XE_BUG_ON(region > ARRAY_SIZE(region_to_mem_type));
>  
> -	if (!xe_vma_is_userptr(vma)) {
> +	if (!xe_vma_has_no_bo(vma)) {
>  		err = xe_bo_migrate(xe_vma_bo(vma), region_to_mem_type[region]);
>  		if (err)
>  			return err;
> @@ -2170,6 +2178,7 @@ vm_bind_ioctl_ops_create(struct xe_vm *vm, struct xe_bo *bo,
>  				operation & XE_VM_BIND_FLAG_IMMEDIATE;
>  			op->map.read_only =
>  				operation & XE_VM_BIND_FLAG_READONLY;
> +			op->map.null = operation & XE_VM_BIND_FLAG_NULL;
>  		}
>  		break;
>  	case XE_VM_BIND_OP_UNMAP:
> @@ -2226,7 +2235,7 @@ vm_bind_ioctl_ops_create(struct xe_vm *vm, struct xe_bo *bo,
>  }
>  
>  static struct xe_vma *new_vma(struct xe_vm *vm, struct drm_gpuva_op_map *op,
> -			      u64 gt_mask, bool read_only)
> +			      u64 gt_mask, bool read_only, bool null)
>  {
>  	struct xe_bo *bo = op->gem.obj ? gem_to_xe_bo(op->gem.obj) : NULL;
>  	struct xe_vma *vma;
> @@ -2242,7 +2251,7 @@ static struct xe_vma *new_vma(struct xe_vm *vm, struct drm_gpuva_op_map *op,
>  	}
>  	vma = xe_vma_create(vm, bo, op->gem.offset,
>  			    op->va.addr, op->va.addr +
> -			    op->va.range - 1, read_only,
> +			    op->va.range - 1, read_only, null,
>  			    gt_mask);
>  	if (bo)
>  		xe_bo_unlock(bo, &ww);
> @@ -2254,7 +2263,7 @@ static struct xe_vma *new_vma(struct xe_vm *vm, struct drm_gpuva_op_map *op,
>  			xe_vma_destroy(vma, NULL);
>  			return ERR_PTR(err);
>  		}
> -	} else if(!bo->vm) {
> +	} else if(!xe_vma_has_no_bo(vma) && !bo->vm) {
>  		vm_insert_extobj(vm, vma);
>  		err = add_preempt_fences(vm, bo);
>  		if (err) {
> @@ -2332,7 +2341,8 @@ static int vm_bind_ioctl_ops_parse(struct xe_vm *vm, struct xe_engine *e,
>  				struct xe_vma *vma;
>  
>  				vma = new_vma(vm, &op->base.map,
> -					      op->gt_mask, op->map.read_only);
> +					      op->gt_mask, op->map.read_only,
> +					      op->map.null );
>  				if (IS_ERR(vma)) {
>  					err = PTR_ERR(vma);
>  					goto free_fence;
> @@ -2347,9 +2357,13 @@ static int vm_bind_ioctl_ops_parse(struct xe_vm *vm, struct xe_engine *e,
>  					bool read_only =
>  						op->base.remap.unmap->va->flags &
>  						XE_VMA_READ_ONLY;
> +					bool null =
> +						op->base.remap.unmap->va->flags &
> +						XE_VMA_NULL;
>  
>  					vma = new_vma(vm, op->base.remap.prev,
> -						      op->gt_mask, read_only);
> +						      op->gt_mask, read_only,
> +						      null);
>  					if (IS_ERR(vma)) {
>  						err = PTR_ERR(vma);
>  						goto free_fence;
> @@ -2364,8 +2378,13 @@ static int vm_bind_ioctl_ops_parse(struct xe_vm *vm, struct xe_engine *e,
>  						op->base.remap.unmap->va->flags &
>  						XE_VMA_READ_ONLY;
>  
> +					bool null =
> +						op->base.remap.unmap->va->flags &
> +						XE_VMA_NULL;
> +
>  					vma = new_vma(vm, op->base.remap.next,
> -						      op->gt_mask, read_only);
> +						      op->gt_mask, read_only,
> +						      null);
>  					if (IS_ERR(vma)) {
>  						err = PTR_ERR(vma);
>  						goto free_fence;
> @@ -2853,11 +2872,12 @@ static void vm_bind_ioctl_ops_unwind(struct xe_vm *vm,
>  #ifdef TEST_VM_ASYNC_OPS_ERROR
>  #define SUPPORTED_FLAGS	\
>  	(FORCE_ASYNC_OP_ERROR | XE_VM_BIND_FLAG_ASYNC | \
> -	 XE_VM_BIND_FLAG_READONLY | XE_VM_BIND_FLAG_IMMEDIATE | 0xffff)
> +	 XE_VM_BIND_FLAG_READONLY | XE_VM_BIND_FLAG_IMMEDIATE | \
> +	 XE_VM_BIND_FLAG_NULL | 0xffff)
>  #else
>  #define SUPPORTED_FLAGS	\
>  	(XE_VM_BIND_FLAG_ASYNC | XE_VM_BIND_FLAG_READONLY | \
> -	 XE_VM_BIND_FLAG_IMMEDIATE | 0xffff)
> +	 XE_VM_BIND_FLAG_IMMEDIATE | XE_VM_BIND_FLAG_NULL | 0xffff)
>  #endif
>  #define XE_64K_PAGE_MASK 0xffffull
>  
> @@ -2903,6 +2923,7 @@ static int vm_bind_ioctl_check_args(struct xe_device *xe,
>  		u32 obj = (*bind_ops)[i].obj;
>  		u64 obj_offset = (*bind_ops)[i].obj_offset;
>  		u32 region = (*bind_ops)[i].region;
> +		bool null = op &  XE_VM_BIND_FLAG_NULL;
>  
>  		if (i == 0) {
>  			*async = !!(op & XE_VM_BIND_FLAG_ASYNC);
> @@ -2929,8 +2950,12 @@ static int vm_bind_ioctl_check_args(struct xe_device *xe,
>  		if (XE_IOCTL_ERR(xe, VM_BIND_OP(op) >
>  				 XE_VM_BIND_OP_PREFETCH) ||
>  		    XE_IOCTL_ERR(xe, op & ~SUPPORTED_FLAGS) ||
> +		    XE_IOCTL_ERR(xe, obj && null) ||
> +		    XE_IOCTL_ERR(xe, obj_offset && null) ||
> +		    XE_IOCTL_ERR(xe, VM_BIND_OP(op) != XE_VM_BIND_OP_MAP &&
> +				 null) ||
>  		    XE_IOCTL_ERR(xe, !obj &&
> -				 VM_BIND_OP(op) == XE_VM_BIND_OP_MAP) ||
> +				 VM_BIND_OP(op) == XE_VM_BIND_OP_MAP && !null) ||
>  		    XE_IOCTL_ERR(xe, !obj &&
>  				 VM_BIND_OP(op) == XE_VM_BIND_OP_UNMAP_ALL) ||
>  		    XE_IOCTL_ERR(xe, addr &&
> @@ -3254,6 +3279,7 @@ int xe_vm_invalidate_vma(struct xe_vma *vma)
>  	int ret;
>  
>  	XE_BUG_ON(!xe_vm_in_fault_mode(xe_vma_vm(vma)));
> +	XE_BUG_ON(xe_vma_is_null(vma));
>  	trace_xe_vma_usm_invalidate(vma);
>  
>  	/* Check that we don't race with page-table updates */
> @@ -3313,8 +3339,11 @@ int xe_analyze_vm(struct drm_printer *p, struct xe_vm *vm, int gt_id)
>  	drm_gpuva_iter_for_each(gpuva, it) {
>  		struct xe_vma* vma = gpuva_to_vma(gpuva);
>  		bool is_userptr = xe_vma_is_userptr(vma);
> +		bool null = xe_vma_is_null(vma);
>  
> -		if (is_userptr) {
> +		if (null) {
> +			addr = 0;
> +		} else if (is_userptr) {
>  			struct xe_res_cursor cur;
>  
>  			xe_res_first_sg(vma->userptr.sg, 0, XE_PAGE_SIZE, &cur);
> @@ -3324,7 +3353,8 @@ int xe_analyze_vm(struct drm_printer *p, struct xe_vm *vm, int gt_id)
>  		}
>  		drm_printf(p, " [%016llx-%016llx] S:0x%016llx A:%016llx %s\n",
>  			   xe_vma_start(vma), xe_vma_end(vma), xe_vma_size(vma),
> -			   addr, is_userptr ? "USR" : is_vram ? "VRAM" : "SYS");
> +			   addr, null ? "NULL" :
> +			   is_userptr ? "USR" : is_vram ? "VRAM" : "SYS");
>  	}
>  	up_read(&vm->lock);
>  
> diff --git a/drivers/gpu/drm/xe/xe_vm.h b/drivers/gpu/drm/xe/xe_vm.h
> index 21b1054949c4..96e2c6b07bf8 100644
> --- a/drivers/gpu/drm/xe/xe_vm.h
> +++ b/drivers/gpu/drm/xe/xe_vm.h
> @@ -175,7 +175,17 @@ static inline void xe_vm_reactivate_rebind(struct xe_vm *vm)
>  	}
>  }
>  
> +static inline bool xe_vma_is_null(struct xe_vma *vma)
> +{
> +	return vma->gpuva.flags & XE_VMA_NULL;
> +}
> +
>  static inline bool xe_vma_is_userptr(struct xe_vma *vma)
> +{
> +	return !xe_vma_bo(vma) && !xe_vma_is_null(vma);
> +}
> +
> +static inline bool xe_vma_has_no_bo(struct xe_vma *vma)
>  {
>  	return !xe_vma_bo(vma);
>  }
> diff --git a/drivers/gpu/drm/xe/xe_vm_madvise.c b/drivers/gpu/drm/xe/xe_vm_madvise.c
> index 02d27a354b36..03508645fa08 100644
> --- a/drivers/gpu/drm/xe/xe_vm_madvise.c
> +++ b/drivers/gpu/drm/xe/xe_vm_madvise.c
> @@ -227,7 +227,7 @@ get_vmas(struct xe_vm *vm, int *num_vmas, u64 addr, u64 range)
>  	drm_gpuva_iter_for_each_range(gpuva, it, addr + range) {
>  		struct xe_vma *vma = gpuva_to_vma(gpuva);
>  
> -		if (xe_vma_is_userptr(vma))
> +		if (xe_vma_has_no_bo(vma))
>  			continue;
>  
>  		if (*num_vmas == max_vmas) {
> diff --git a/drivers/gpu/drm/xe/xe_vm_types.h b/drivers/gpu/drm/xe/xe_vm_types.h
> index 243dc91a61b0..b61007b70502 100644
> --- a/drivers/gpu/drm/xe/xe_vm_types.h
> +++ b/drivers/gpu/drm/xe/xe_vm_types.h
> @@ -29,6 +29,7 @@ struct xe_vm;
>  #define XE_VMA_ATOMIC_PTE_BIT	(DRM_GPUVA_USERBITS << 2)
>  #define XE_VMA_FIRST_REBIND	(DRM_GPUVA_USERBITS << 3)
>  #define XE_VMA_LAST_REBIND	(DRM_GPUVA_USERBITS << 4)
> +#define XE_VMA_NULL		(DRM_GPUVA_USERBITS << 5)
>  
>  struct xe_vma {
>  	/** @gpuva: Base GPUVA object */
> @@ -315,6 +316,8 @@ struct xe_vma_op_map {
>  	bool immediate;
>  	/** @read_only: Read only */
>  	bool read_only;
> +	/** @null: NULL (writes dropped, read zero) */
> +	bool null;
>  };
>  
>  /** struct xe_vma_op_unmap - VMA unmap operation */
> diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h
> index b0b80aae3ee8..27c51946fadd 100644
> --- a/include/uapi/drm/xe_drm.h
> +++ b/include/uapi/drm/xe_drm.h
> @@ -447,6 +447,14 @@ struct drm_xe_vm_bind_op {
>  	 * than differing the MAP to the page fault handler.
>  	 */
>  #define XE_VM_BIND_FLAG_IMMEDIATE	(0x1 << 18)
> +	/*
> +	 * When the NULL flag is set, the page tables are setup with a special
> +	 * bit which indicates writes are dropped and all reads return zero. The
> +	 * NULL flags is only valid for XE_VM_BIND_OP_MAP operations, the BO
> +	 * handle MBZ, and the BO offset MBZ. This flag is intended to implement
> +	 * VK sparse bindings.
> +	 */
> +#define XE_VM_BIND_FLAG_NULL		(0x1 << 19)
>  
>  	/** @reserved: Reserved */
>  	__u64 reserved[2];
> -- 
> 2.34.1
>

next prev parent reply	other threads:[~2023-05-09 14:35 UTC|newest]

Thread overview: 126+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2023-05-02  0:16 [Intel-xe] [PATCH v2 00/31] Upstreaming prep / all of mbrosts patches Matthew Brost
2023-05-02  0:16 ` [Intel-xe] [PATCH v2 01/31] drm/sched: Add run_wq argument to drm_sched_init Matthew Brost
2023-05-03 12:03   ` Thomas Hellström
2023-05-03 15:06     ` Matthew Brost
2023-05-05 18:24       ` Rodrigo Vivi
2023-05-02  0:16 ` [Intel-xe] [PATCH v2 02/31] drm/sched: Move schedule policy to scheduler Matthew Brost
2023-05-03 12:13   ` Thomas Hellström
2023-05-03 15:11     ` Matthew Brost
2023-05-02  0:16 ` [Intel-xe] [PATCH v2 03/31] drm/sched: Add DRM_SCHED_POLICY_SINGLE_ENTITY scheduling policy Matthew Brost
2023-05-08 12:40   ` Thomas Hellström
2023-05-22  1:16     ` Matthew Brost
2023-05-02  0:17 ` [Intel-xe] [PATCH v2 04/31] drm/xe: Use DRM_SCHED_POLICY_SINGLE_ENTITY mode Matthew Brost
2023-05-08 12:41   ` Thomas Hellström
2023-05-02  0:17 ` [Intel-xe] [PATCH v2 05/31] drm/xe: Long running job update Matthew Brost
2023-05-05 18:36   ` Rodrigo Vivi
2023-05-08  1:14     ` Matthew Brost
2023-05-08 13:14   ` Thomas Hellström
2023-05-09 14:56     ` Matthew Brost
2023-05-09 15:21       ` Thomas Hellström
2023-05-09 22:16         ` Matthew Brost
2023-05-10  8:15           ` Thomas Hellström
2023-05-09 22:21     ` Matthew Brost
2023-05-02  0:17 ` [Intel-xe] [PATCH v2 06/31] drm/xe: Ensure LR engines are not persistent Matthew Brost
2023-05-05 18:38   ` Rodrigo Vivi
2023-05-08  1:03     ` Matthew Brost
2023-05-09 12:21   ` Thomas Hellström
2023-05-02  0:17 ` [Intel-xe] [PATCH v2 07/31] drm/xe: Only try to lock external BOs in VM bind Matthew Brost
2023-05-05 18:40   ` Rodrigo Vivi
2023-05-08  1:08     ` Matthew Brost
2023-05-08  1:15       ` Christopher Snowhill
2023-05-08 21:34       ` Rodrigo Vivi
2023-05-09 12:29         ` Thomas Hellström
2023-05-10 23:25           ` Matthew Brost
2023-05-11  7:43             ` Thomas Hellström
2023-05-08  1:17   ` Christopher Snowhill
2023-05-02  0:17 ` [Intel-xe] [PATCH v2 08/31] drm/xe: VM LRU bulk move Matthew Brost
2023-05-08 21:39   ` Rodrigo Vivi
2023-05-09 22:09     ` Matthew Brost
2023-05-10  1:37       ` Rodrigo Vivi
2023-05-09 12:47   ` Thomas Hellström
2023-05-09 22:05     ` Matthew Brost
2023-05-10  8:14       ` Thomas Hellström
2023-05-10 18:40         ` Matthew Brost
2023-05-11  7:24           ` Thomas Hellström
2023-05-11 14:11             ` Matthew Brost
2023-05-12  9:03               ` Thomas Hellström
2023-05-02  0:17 ` [Intel-xe] [PATCH v2 09/31] drm/xe/guc: Read HXG fields from DW1 of G2H response Matthew Brost
2023-05-05 18:50   ` Rodrigo Vivi
2023-05-09 12:49   ` Thomas Hellström
2023-05-02  0:17 ` [Intel-xe] [PATCH v2 10/31] drm/xe/guc: Return the lower part of blocking H2G message Matthew Brost
2023-05-05 18:52   ` Rodrigo Vivi
2023-05-08  1:10     ` Matthew Brost
2023-05-08  9:20       ` Michal Wajdeczko
2023-05-02  0:17 ` [Intel-xe] [PATCH v2 11/31] drm/xe/guc: Use doorbells for submission if possible Matthew Brost
2023-05-08 21:42   ` Rodrigo Vivi
2023-05-10  0:49     ` Matthew Brost
2023-05-09 13:00   ` Thomas Hellström
2023-05-10  0:51     ` Matthew Brost
2023-05-21 12:32   ` Oded Gabbay
2023-06-08 19:30     ` Matthew Brost
2023-06-12 13:01       ` Oded Gabbay
2023-05-02  0:17 ` [Intel-xe] [PATCH v2 12/31] drm/xe/guc: Print doorbell ID in GuC engine debugfs entry Matthew Brost
2023-05-05 18:55   ` Rodrigo Vivi
2023-05-09 13:01     ` Thomas Hellström
2023-05-02  0:17 ` [Intel-xe] [PATCH v2 13/31] maple_tree: split up MA_STATE() macro Matthew Brost
2023-05-09 13:21   ` Thomas Hellström
2023-05-10  0:29     ` Matthew Brost
2023-05-02  0:17 ` [Intel-xe] [PATCH v2 14/31] maple_tree: Export mas_preallocate Matthew Brost
2023-05-09 13:33   ` Thomas Hellström
2023-05-10  0:31     ` Matthew Brost
2023-05-02  0:17 ` [Intel-xe] [PATCH v2 15/31] drm: manager to keep track of GPUs VA mappings Matthew Brost
2023-05-09 13:49   ` Thomas Hellström
2023-05-10  0:55     ` Matthew Brost
2023-05-02  0:17 ` [Intel-xe] [PATCH v2 16/31] drm/xe: Port Xe to GPUVA Matthew Brost
2023-05-09 13:52   ` Thomas Hellström
2023-05-11  2:41     ` Matthew Brost
2023-05-11  7:39       ` Thomas Hellström
2023-05-02  0:17 ` [Intel-xe] [PATCH v2 17/31] drm/xe: NULL binding implementation Matthew Brost
2023-05-09 14:34   ` Rodrigo Vivi [this message]
2023-05-11  2:52     ` Matthew Brost
2023-05-09 15:17   ` Thomas Hellström
2023-05-02  0:17 ` [Intel-xe] [PATCH v2 18/31] drm/xe: Avoid doing rebinds Matthew Brost
2023-05-09 14:48   ` Rodrigo Vivi
2023-05-02  0:17 ` [Intel-xe] [PATCH v2 19/31] drm/xe: Reduce the number list links in xe_vma Matthew Brost
2023-05-08 21:43   ` Rodrigo Vivi
2023-05-11  8:38   ` Thomas Hellström
2023-05-02  0:17 ` [Intel-xe] [PATCH v2 20/31] drm/xe: Optimize size of xe_vma allocation Matthew Brost
2023-05-05 19:37   ` Rodrigo Vivi
2023-05-08  1:21     ` Matthew Brost
2023-05-11  9:05   ` Thomas Hellström
2023-05-02  0:17 ` [Intel-xe] [PATCH v2 21/31] drm/gpuva: Add drm device to GPUVA manager Matthew Brost
2023-05-05 19:39   ` Rodrigo Vivi
2023-05-11  9:06     ` Thomas Hellström
2023-05-02  0:17 ` [Intel-xe] [PATCH v2 22/31] drm/gpuva: Move dma-resv " Matthew Brost
2023-05-11  9:10   ` Thomas Hellström
2023-05-02  0:17 ` [Intel-xe] [PATCH v2 23/31] drm/gpuva: Add support for extobj Matthew Brost
2023-05-11  9:35   ` Thomas Hellström
2023-05-02  0:17 ` [Intel-xe] [PATCH v2 24/31] drm/xe: Userptr refactor Matthew Brost
2023-05-05 19:41   ` Rodrigo Vivi
2023-05-11  9:46   ` Thomas Hellström
2023-05-02  0:17 ` [Intel-xe] [PATCH v2 25/31] drm: execution context for GEM buffers v3 Matthew Brost
2023-05-02  0:17 ` [Intel-xe] [PATCH v2 26/31] drm/exec: Always compile drm_exec Matthew Brost
2023-05-09 14:45   ` Rodrigo Vivi
2023-05-10  0:37     ` Matthew Brost
2023-05-10  0:38     ` Matthew Brost
2023-05-02  0:17 ` [Intel-xe] [PATCH v2 27/31] drm/xe: Use drm_exec for locking rather than TTM exec helpers Matthew Brost
2023-05-05 19:42   ` Rodrigo Vivi
2023-05-11 10:01   ` Thomas Hellström
2023-05-02  0:17 ` [Intel-xe] [PATCH v2 28/31] drm/xe: Allow dma-fences as in-syncs for compute / faulting VM Matthew Brost
2023-05-05 19:43   ` Rodrigo Vivi
2023-05-08  1:19     ` Matthew Brost
2023-05-08 21:29       ` Rodrigo Vivi
2023-05-11 10:03   ` Thomas Hellström
2023-05-02  0:17 ` [Intel-xe] [PATCH v2 29/31] drm/xe: Allow compute VMs to output dma-fences on binds Matthew Brost
2023-05-09 14:50   ` Rodrigo Vivi
2023-05-11 10:04   ` Thomas Hellström
2023-05-02  0:17 ` [Intel-xe] [PATCH v2 30/31] drm/xe: remove async worker, sync binds, new error handling Matthew Brost
2023-05-17 16:53   ` Thomas Hellström
2023-05-02  0:17 ` [Intel-xe] [PATCH v2 31/31] drm/xe/uapi: Add some VM bind kernel doc Matthew Brost
2023-05-05 19:45   ` Rodrigo Vivi
2023-05-11 10:14     ` Thomas Hellström
2023-05-02  0:20 ` [Intel-xe] ✗ CI.Patch_applied: failure for Upstreaming prep / all of mbrosts patches (rev2) Patchwork
2023-05-02  1:54   ` Christopher Snowhill (kode54)
2023-05-02  1:59   ` Christopher Snowhill (kode54)
2023-05-03 12:37 ` [Intel-xe] [PATCH v2 00/31] Upstreaming prep / all of mbrosts patches Thomas Hellström
2023-05-03 15:27   ` Matthew Brost

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=ZFpaDFSZuDqtTbKH@intel.com \
    --to=rodrigo.vivi@intel.com \
    --cc=intel-xe@lists.freedesktop.org \
    --cc=matthew.brost@intel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox