Re: [PATCH v2] drm/i915/gtt: Serialise both updates to PDE and our shadow

From: Mika Kuoppala <mika.kuoppala@linux.intel.com>
To: Chris Wilson <chris@chris-wilson.co.uk>, intel-gfx@lists.freedesktop.org
Cc: Matthew Auld <matthew.auld@intel.com>
Subject: Re: [PATCH v2] drm/i915/gtt: Serialise both updates to PDE and our shadow
Date: Mon, 17 Jun 2019 16:55:36 +0300	[thread overview]
Message-ID: <8736k88gfb.fsf@gaia.fi.intel.com> (raw)
In-Reply-To: <20190617112036.9373-1-chris@chris-wilson.co.uk>

Chris Wilson <chris@chris-wilson.co.uk> writes:

> Currently, we perform a locked update of the shadow entry when
> allocating a page directory entry such that if two clients are
> concurrently allocating neighbouring ranges we only insert one new entry
> for the pair of them. However, we also need to serialise both clients
> wrt to the actual entry in the HW table, or else we may allow one client
> or even a third client to proceed ahead of the HW write. My handwave
> before was that under the _pathological_ condition we would see the
> scratch entry instead of the expected entry, causing a temporary
> glitch. That starvation condition will eventually show up in practice, so
> fix it.
>
> The reason for the previous cheat was to avoid having to free the extra
> allocation while under the spinlock. Now, we keep the extra entry
> allocated until the end instead.
>
> v2: Fix error paths for gen6
>
> Fixes: 1d1b5490b91c ("drm/i915/gtt: Replace struct_mutex serialisation for allocation")
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Matthew Auld <matthew.auld@intel.com>
> Cc: Mika Kuoppala <mika.kuoppala@intel.com>

Reviewed-by: Mika Kuoppala <mika.kuoppala@linux.intel.com>

> ---
>  drivers/gpu/drm/i915/i915_gem_gtt.c | 130 +++++++++++++++-------------
>  1 file changed, 72 insertions(+), 58 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
> index 0392a4c4bb9b..0987748d327b 100644
> --- a/drivers/gpu/drm/i915/i915_gem_gtt.c
> +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
> @@ -1387,82 +1387,88 @@ static int gen8_ppgtt_alloc_pd(struct i915_address_space *vm,
>  			       struct i915_page_directory *pd,
>  			       u64 start, u64 length)
>  {
> +	struct i915_page_table *alloc = NULL;
>  	struct i915_page_table *pt;
>  	u64 from = start;
>  	unsigned int pde;
> +	int ret = 0;
>  
>  	spin_lock(&pd->lock);
>  	gen8_for_each_pde(pt, pd, start, length, pde) {
>  		const int count = gen8_pte_count(start, length);
>  
>  		if (pt == vm->scratch_pt) {
> -			struct i915_page_table *old;
> -
>  			spin_unlock(&pd->lock);
>  
> -			pt = alloc_pt(vm);
> -			if (IS_ERR(pt))
> +			pt = fetch_and_zero(&alloc);
> +			if (!pt)
> +				pt = alloc_pt(vm);
> +			if (IS_ERR(pt)) {
> +				ret = PTR_ERR(pt);
>  				goto unwind;
> +			}
>  
>  			if (count < GEN8_PTES || intel_vgpu_active(vm->i915))
>  				gen8_initialize_pt(vm, pt);
>  
> -			old = cmpxchg(&pd->page_table[pde], vm->scratch_pt, pt);
> -			if (old == vm->scratch_pt) {
> +			spin_lock(&pd->lock);
> +			if (pd->page_table[pde] == vm->scratch_pt) {
>  				gen8_ppgtt_set_pde(vm, pd, pt, pde);
> +				pd->page_table[pde] = pt;
>  				atomic_inc(&pd->used_pdes);
>  			} else {
> -				free_pt(vm, pt);
> -				pt = old;
> +				alloc = pt;
> +				pt = pd->page_table[pde];
>  			}
> -
> -			spin_lock(&pd->lock);
>  		}
>  
>  		atomic_add(count, &pt->used_ptes);
>  	}
>  	spin_unlock(&pd->lock);
> -
> -	return 0;
> +	goto out;
>  
>  unwind:
>  	gen8_ppgtt_clear_pd(vm, pd, from, start - from);
> -	return -ENOMEM;
> +out:
> +	if (alloc)
> +		free_pt(vm, alloc);
> +	return ret;
>  }
>  
>  static int gen8_ppgtt_alloc_pdp(struct i915_address_space *vm,
>  				struct i915_page_directory_pointer *pdp,
>  				u64 start, u64 length)
>  {
> +	struct i915_page_directory *alloc = NULL;
>  	struct i915_page_directory *pd;
>  	u64 from = start;
>  	unsigned int pdpe;
> -	int ret;
> +	int ret = 0;
>  
>  	spin_lock(&pdp->lock);
>  	gen8_for_each_pdpe(pd, pdp, start, length, pdpe) {
>  		if (pd == vm->scratch_pd) {
> -			struct i915_page_directory *old;
> -
>  			spin_unlock(&pdp->lock);
>  
> -			pd = alloc_pd(vm);
> -			if (IS_ERR(pd))
> +			pd = fetch_and_zero(&alloc);
> +			if (!pd)
> +				pd = alloc_pd(vm);
> +			if (IS_ERR(pd)) {
> +				ret = PTR_ERR(pd);
>  				goto unwind;
> +			}
>  
>  			gen8_initialize_pd(vm, pd);
>  
> -			old = cmpxchg(&pdp->page_directory[pdpe],
> -				      vm->scratch_pd, pd);
> -			if (old == vm->scratch_pd) {
> +			spin_lock(&pdp->lock);
> +			if (pdp->page_directory[pdpe] == vm->scratch_pd) {
>  				gen8_ppgtt_set_pdpe(vm, pdp, pd, pdpe);
> +				pdp->page_directory[pdpe] = pd;
>  				atomic_inc(&pdp->used_pdpes);
>  			} else {
> -				free_pd(vm, pd);
> -				pd = old;
> +				alloc = pd;
> +				pd = pdp->page_directory[pdpe];
>  			}
> -
> -			spin_lock(&pdp->lock);
>  		}
>  		atomic_inc(&pd->used_pdes);
>  		spin_unlock(&pdp->lock);
> @@ -1475,8 +1481,7 @@ static int gen8_ppgtt_alloc_pdp(struct i915_address_space *vm,
>  		atomic_dec(&pd->used_pdes);
>  	}
>  	spin_unlock(&pdp->lock);
> -
> -	return 0;
> +	goto out;
>  
>  unwind_pd:
>  	spin_lock(&pdp->lock);
> @@ -1489,7 +1494,10 @@ static int gen8_ppgtt_alloc_pdp(struct i915_address_space *vm,
>  	spin_unlock(&pdp->lock);
>  unwind:
>  	gen8_ppgtt_clear_pdp(vm, pdp, from, start - from);
> -	return -ENOMEM;
> +out:
> +	if (alloc)
> +		free_pd(vm, alloc);
> +	return ret;
>  }
>  
>  static int gen8_ppgtt_alloc_3lvl(struct i915_address_space *vm,
> @@ -1504,33 +1512,35 @@ static int gen8_ppgtt_alloc_4lvl(struct i915_address_space *vm,
>  {
>  	struct i915_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
>  	struct i915_pml4 *pml4 = &ppgtt->pml4;
> +	struct i915_page_directory_pointer *alloc = NULL;
>  	struct i915_page_directory_pointer *pdp;
>  	u64 from = start;
> +	int ret = 0;
>  	u32 pml4e;
> -	int ret;
>  
>  	spin_lock(&pml4->lock);
>  	gen8_for_each_pml4e(pdp, pml4, start, length, pml4e) {
>  		if (pdp == vm->scratch_pdp) {
> -			struct i915_page_directory_pointer *old;
> -
>  			spin_unlock(&pml4->lock);
>  
> -			pdp = alloc_pdp(vm);
> -			if (IS_ERR(pdp))
> +			pdp = fetch_and_zero(&alloc);
> +			if (!pdp)
> +				pdp = alloc_pdp(vm);
> +			if (IS_ERR(pdp)) {
> +				ret = PTR_ERR(pdp);
>  				goto unwind;
> +			}
>  
>  			gen8_initialize_pdp(vm, pdp);
>  
> -			old = cmpxchg(&pml4->pdps[pml4e], vm->scratch_pdp, pdp);
> -			if (old == vm->scratch_pdp) {
> +			spin_lock(&pml4->lock);
> +			if (pml4->pdps[pml4e] == vm->scratch_pdp) {
>  				gen8_ppgtt_set_pml4e(pml4, pdp, pml4e);
> +				pml4->pdps[pml4e] = pdp;
>  			} else {
> -				free_pdp(vm, pdp);
> -				pdp = old;
> +				alloc = pdp;
> +				pdp = pml4->pdps[pml4e];
>  			}
> -
> -			spin_lock(&pml4->lock);
>  		}
>  		atomic_inc(&pdp->used_pdpes);
>  		spin_unlock(&pml4->lock);
> @@ -1543,8 +1553,7 @@ static int gen8_ppgtt_alloc_4lvl(struct i915_address_space *vm,
>  		atomic_dec(&pdp->used_pdpes);
>  	}
>  	spin_unlock(&pml4->lock);
> -
> -	return 0;
> +	goto out;
>  
>  unwind_pdp:
>  	spin_lock(&pml4->lock);
> @@ -1555,7 +1564,10 @@ static int gen8_ppgtt_alloc_4lvl(struct i915_address_space *vm,
>  	spin_unlock(&pml4->lock);
>  unwind:
>  	gen8_ppgtt_clear_4lvl(vm, from, start - from);
> -	return -ENOMEM;
> +out:
> +	if (alloc)
> +		free_pdp(vm, alloc);
> +	return ret;
>  }
>  
>  static int gen8_preallocate_top_level_pdp(struct i915_ppgtt *ppgtt)
> @@ -1820,11 +1832,13 @@ static int gen6_alloc_va_range(struct i915_address_space *vm,
>  			       u64 start, u64 length)
>  {
>  	struct gen6_ppgtt *ppgtt = to_gen6_ppgtt(i915_vm_to_ppgtt(vm));
> +	struct i915_page_table *alloc = NULL;
>  	struct i915_page_table *pt;
>  	intel_wakeref_t wakeref;
>  	u64 from = start;
>  	unsigned int pde;
>  	bool flush = false;
> +	int ret = 0;
>  
>  	wakeref = intel_runtime_pm_get(&vm->i915->runtime_pm);
>  
> @@ -1833,19 +1847,20 @@ static int gen6_alloc_va_range(struct i915_address_space *vm,
>  		const unsigned int count = gen6_pte_count(start, length);
>  
>  		if (pt == vm->scratch_pt) {
> -			struct i915_page_table *old;
> -
>  			spin_unlock(&ppgtt->base.pd.lock);
>  
> -			pt = alloc_pt(vm);
> -			if (IS_ERR(pt))
> +			pt = fetch_and_zero(&alloc);
> +			if (!pt)
> +				pt = alloc_pt(vm);
> +			if (IS_ERR(pt)) {
> +				ret = PTR_ERR(pt);
>  				goto unwind_out;
> +			}
>  
>  			gen6_initialize_pt(vm, pt);
>  
> -			old = cmpxchg(&ppgtt->base.pd.page_table[pde],
> -				      vm->scratch_pt, pt);
> -			if (old == vm->scratch_pt) {
> +			spin_lock(&ppgtt->base.pd.lock);
> +			if (ppgtt->base.pd.page_table[pde] == vm->scratch_pt) {
>  				ppgtt->base.pd.page_table[pde] = pt;
>  				if (i915_vma_is_bound(ppgtt->vma,
>  						      I915_VMA_GLOBAL_BIND)) {
> @@ -1853,11 +1868,9 @@ static int gen6_alloc_va_range(struct i915_address_space *vm,
>  					flush = true;
>  				}
>  			} else {
> -				free_pt(vm, pt);
> -				pt = old;
> +				alloc = pt;
> +				pt = ppgtt->base.pd.page_table[pde];
>  			}
> -
> -			spin_lock(&ppgtt->base.pd.lock);
>  		}
>  
>  		atomic_add(count, &pt->used_ptes);
> @@ -1869,14 +1882,15 @@ static int gen6_alloc_va_range(struct i915_address_space *vm,
>  		gen6_ggtt_invalidate(vm->i915);
>  	}
>  
> -	intel_runtime_pm_put(&vm->i915->runtime_pm, wakeref);
> -
> -	return 0;
> +	goto out;
>  
>  unwind_out:
> -	intel_runtime_pm_put(&vm->i915->runtime_pm, wakeref);
>  	gen6_ppgtt_clear_range(vm, from, start - from);
> -	return -ENOMEM;
> +out:
> +	if (alloc)
> +		free_pt(vm, alloc);
> +	intel_runtime_pm_put(&vm->i915->runtime_pm, wakeref);
> +	return ret;
>  }
>  
>  static int gen6_ppgtt_init_scratch(struct gen6_ppgtt *ppgtt)
> -- 
> 2.20.1
>
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/intel-gfx
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx