All of lore.kernel.org
 help / color / mirror / Atom feed
From: Mika Kuoppala <mika.kuoppala@linux.intel.com>
To: Chris Wilson <chris@chris-wilson.co.uk>, intel-gfx@lists.freedesktop.org
Cc: matthew.auld@intel.com
Subject: Re: [PATCH v2 03/22] drm/i915: Micro-optimise gen8_ppgtt_insert_entries()
Date: Mon, 13 Feb 2017 16:58:06 +0200	[thread overview]
Message-ID: <87a89qp281.fsf@gaia.fi.intel.com> (raw)
In-Reply-To: <20170210193845.14281-3-chris@chris-wilson.co.uk>

Chris Wilson <chris@chris-wilson.co.uk> writes:

> Improve the sg iteration and in hte process eliminate a bug in
> miscomputing the pml4 length as orig_nents<<PAGE_SHIFT is no longer the
> full length of the sg table.
>
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> ---
>  drivers/gpu/drm/i915/i915_gem_gtt.c | 157 +++++++++++++++++++-----------------
>  1 file changed, 82 insertions(+), 75 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
> index ca1f5fa6984f..fcb8d635aec0 100644
> --- a/drivers/gpu/drm/i915/i915_gem_gtt.c
> +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
> @@ -751,9 +751,9 @@ static bool gen8_ppgtt_clear_pt(struct i915_address_space *vm,
>  	unsigned int num_entries = gen8_pte_count(start, length);
>  	unsigned int pte = gen8_pte_index(start);
>  	unsigned int pte_end = pte + num_entries;
> -	gen8_pte_t *pt_vaddr;
> -	gen8_pte_t scratch_pte = gen8_pte_encode(vm->scratch_page.daddr,
> -						 I915_CACHE_LLC);
> +	gen8_pte_t scratch_pte =
> +		gen8_pte_encode(vm->scratch_page.daddr, I915_CACHE_LLC);

Please const the scratch_pte and pte_end while you are here.

> +	gen8_pte_t *vaddr;
>  
>  	if (WARN_ON(!px_page(pt)))
>  		return false;
> @@ -766,12 +766,10 @@ static bool gen8_ppgtt_clear_pt(struct i915_address_space *vm,
>  			return true;
>  	}
>  
> -	pt_vaddr = kmap_px(pt);
> -
> +	vaddr = kmap_px(pt);
>  	while (pte < pte_end)
> -		pt_vaddr[pte++] = scratch_pte;
> -
> -	kunmap_px(ppgtt, pt_vaddr);
> +		vaddr[pte++] = scratch_pte;
> +	kunmap_px(ppgtt, vaddr);
>  
>  	return false;
>  }
> @@ -879,71 +877,93 @@ static void gen8_ppgtt_clear_range(struct i915_address_space *vm,
>  		gen8_ppgtt_clear_pdp(vm, &ppgtt->pdp, start, length);
>  }
>  
> -static void
> -gen8_ppgtt_insert_pte_entries(struct i915_address_space *vm,
> +struct sgt_dma {
> +	struct scatterlist *sg;
> +	dma_addr_t dma, max;
> +};
> +
> +static __always_inline bool
> +gen8_ppgtt_insert_pte_entries(struct i915_hw_ppgtt *ppgtt,
>  			      struct i915_page_directory_pointer *pdp,
> -			      struct sg_page_iter *sg_iter,
> -			      uint64_t start,
> +			      struct sgt_dma *iter,
> +			      u64 start,
>  			      enum i915_cache_level cache_level)
>  {
> -	struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
> -	gen8_pte_t *pt_vaddr;
> -	unsigned pdpe = gen8_pdpe_index(start);
> -	unsigned pde = gen8_pde_index(start);
> -	unsigned pte = gen8_pte_index(start);
> +	unsigned int pdpe = gen8_pdpe_index(start);
> +	unsigned int pde = gen8_pde_index(start);
> +	unsigned int pte = gen8_pte_index(start);
> +	struct i915_page_directory *pd;
> +	const gen8_pte_t pte_encode = gen8_pte_encode(0, cache_level);
> +	gen8_pte_t *vaddr;
> +	bool ret = true;
>  
> -	pt_vaddr = NULL;
> +	pd = pdp->page_directory[pdpe];
> +	vaddr = kmap_px(pd->page_table[pde]);
> +	do {
> +		vaddr[pte] = pte_encode | iter->dma;
> +		iter->dma += PAGE_SIZE;
> +		if (iter->dma >= iter->max) {
> +			iter->sg = __sg_next(iter->sg);
> +			if (!iter->sg) {
> +				ret = false;
> +				break;

We never exit with ret = true in this loop.

> +			}
>  
> -	while (__sg_page_iter_next(sg_iter)) {
> -		if (pt_vaddr == NULL) {
> -			struct i915_page_directory *pd = pdp->page_directory[pdpe];
> -			struct i915_page_table *pt = pd->page_table[pde];
> -			pt_vaddr = kmap_px(pt);
> +			iter->dma = sg_dma_address(iter->sg);
> +			iter->max = iter->dma + iter->sg->length;
>  		}
>  
> -		pt_vaddr[pte] =
> -			gen8_pte_encode(sg_page_iter_dma_address(sg_iter),
> -					cache_level);
>  		if (++pte == GEN8_PTES) {
> -			kunmap_px(ppgtt, pt_vaddr);
> -			pt_vaddr = NULL;
>  			if (++pde == I915_PDES) {
> -				if (++pdpe == I915_PDPES_PER_PDP(vm->i915))
> -					break;
> +				pd = pdp->page_directory[++pdpe];
>  				pde = 0;
>  			}
> +
> +			kunmap_px(ppgtt, vaddr);
> +			vaddr = kmap_px(pd->page_table[pde]);
>  			pte = 0;
>  		}
> -	}
> +	} while (1);
> +	kunmap_px(ppgtt, vaddr);
>  
> -	if (pt_vaddr)
> -		kunmap_px(ppgtt, pt_vaddr);
> +	return ret;
>  }
>  
> -static void gen8_ppgtt_insert_entries(struct i915_address_space *vm,
> -				      struct sg_table *pages,
> -				      uint64_t start,
> -				      enum i915_cache_level cache_level,
> -				      u32 unused)
> +static void gen8_ppgtt_insert_3lvl(struct i915_address_space *vm,
> +				   struct sg_table *pages,
> +				   uint64_t start,
> +				   enum i915_cache_level cache_level,
> +				   u32 unused)
>  {
>  	struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
> -	struct sg_page_iter sg_iter;
> +	struct sgt_dma iter = {
> +		.sg = pages->sgl,
> +		.dma = sg_dma_address(iter.sg),
> +		.max = iter.dma + iter.sg->length,
> +	};
>  
> -	__sg_page_iter_start(&sg_iter, pages->sgl, sg_nents(pages->sgl), 0);
> +	gen8_ppgtt_insert_pte_entries(ppgtt, &ppgtt->pdp, &iter,
> +				      start, cache_level);
> +}
>  
> -	if (!USES_FULL_48BIT_PPGTT(vm->i915)) {
> -		gen8_ppgtt_insert_pte_entries(vm, &ppgtt->pdp, &sg_iter, start,
> -					      cache_level);
> -	} else {
> -		struct i915_page_directory_pointer *pdp;
> -		uint64_t pml4e;
> -		uint64_t length = (uint64_t)pages->orig_nents << PAGE_SHIFT;
> +static void gen8_ppgtt_insert_4lvl(struct i915_address_space *vm,
> +				   struct sg_table *pages,
> +				   uint64_t start,
> +				   enum i915_cache_level cache_level,
> +				   u32 unused)
> +{
> +	struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
> +	struct sgt_dma iter = {
> +		.sg = pages->sgl,
> +		.dma = sg_dma_address(iter.sg),
> +		.max = iter.dma + iter.sg->length,
> +	};
> +	struct i915_page_directory_pointer **pdps = ppgtt->pml4.pdps;
> +	unsigned int pml4e = gen8_pml4e_index(start);
>  
> -		gen8_for_each_pml4e(pdp, &ppgtt->pml4, start, length, pml4e) {
> -			gen8_ppgtt_insert_pte_entries(vm, pdp, &sg_iter,
> -						      start, cache_level);
> -		}
> -	}
> +	while (gen8_ppgtt_insert_pte_entries(ppgtt, pdps[pml4e++], &iter,
> +					     start, cache_level))
> +		;
>  }
>  
>  static void gen8_free_page_tables(struct drm_i915_private *dev_priv,
> @@ -1591,7 +1611,6 @@ static int gen8_ppgtt_init(struct i915_hw_ppgtt *ppgtt)
>  	ppgtt->base.start = 0;
>  	ppgtt->base.cleanup = gen8_ppgtt_cleanup;
>  	ppgtt->base.allocate_va_range = gen8_alloc_va_range;
> -	ppgtt->base.insert_entries = gen8_ppgtt_insert_entries;
>  	ppgtt->base.clear_range = gen8_ppgtt_clear_range;
>  	ppgtt->base.unbind_vma = ppgtt_unbind_vma;
>  	ppgtt->base.bind_vma = ppgtt_bind_vma;
> @@ -1606,6 +1625,8 @@ static int gen8_ppgtt_init(struct i915_hw_ppgtt *ppgtt)
>  
>  		ppgtt->base.total = 1ULL << 48;
>  		ppgtt->switch_mm = gen8_48b_mm_switch;
> +
> +		ppgtt->base.insert_entries = gen8_ppgtt_insert_4lvl;
>  	} else {
>  		ret = __pdp_init(dev_priv, &ppgtt->pdp);
>  		if (ret)
> @@ -1622,6 +1643,8 @@ static int gen8_ppgtt_init(struct i915_hw_ppgtt *ppgtt)
>  			if (ret)
>  				goto free_scratch;
>  		}
> +
> +		ppgtt->base.insert_entries = gen8_ppgtt_insert_3lvl;
>  	}
>  
>  	if (intel_vgpu_active(dev_priv))
> @@ -1888,11 +1911,6 @@ static void gen6_ppgtt_clear_range(struct i915_address_space *vm,
>  	}
>  }
>  
> -struct sgt_dma {
> -	struct scatterlist *sg;
> -	dma_addr_t dma, max;
> -};
> -
>  static void gen6_ppgtt_insert_entries(struct i915_address_space *vm,
>  				      struct sg_table *pages,
>  				      uint64_t start,
> @@ -2434,26 +2452,15 @@ static void gen8_ggtt_insert_entries(struct i915_address_space *vm,
>  	struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
>  	struct sgt_iter sgt_iter;
>  	gen8_pte_t __iomem *gtt_entries;
> -	gen8_pte_t gtt_entry;
> +	gen8_pte_t pte_encode = gen8_pte_encode(0, level);

Const would do here also for consistency.

-Mika

>  	dma_addr_t addr;
> -	int i = 0;
> -
> -	gtt_entries = (gen8_pte_t __iomem *)ggtt->gsm + (start >> PAGE_SHIFT);
>  
> -	for_each_sgt_dma(addr, sgt_iter, st) {
> -		gtt_entry = gen8_pte_encode(addr, level);
> -		gen8_set_pte(&gtt_entries[i++], gtt_entry);
> -	}
> +	gtt_entries = (gen8_pte_t __iomem *)ggtt->gsm;
> +	gtt_entries += start >> PAGE_SHIFT;
> +	for_each_sgt_dma(addr, sgt_iter, st)
> +		gen8_set_pte(gtt_entries++, pte_encode | addr);
>  
> -	/*
> -	 * XXX: This serves as a posting read to make sure that the PTE has
> -	 * actually been updated. There is some concern that even though
> -	 * registers and PTEs are within the same BAR that they are potentially
> -	 * of NUMA access patterns. Therefore, even with the way we assume
> -	 * hardware should work, we must keep this posting read for paranoia.
> -	 */
> -	if (i != 0)
> -		WARN_ON(readq(&gtt_entries[i-1]) != gtt_entry);
> +	wmb();
>  
>  	/* This next bit makes the above posting read even more important. We
>  	 * want to flush the TLBs only after we're certain all the PTE updates
> -- 
> 2.11.0
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

  reply	other threads:[~2017-02-13 15:00 UTC|newest]

Thread overview: 31+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2017-02-10 19:38 [PATCH v2 01/22] drm/i915: Micro-optimise i915_get_ggtt_vma_pages() Chris Wilson
2017-02-10 19:38 ` [PATCH v2 02/22] drm/i915: Micro-optimise gen6_ppgtt_insert_entries() Chris Wilson
2017-02-10 19:38 ` [PATCH v2 03/22] drm/i915: Micro-optimise gen8_ppgtt_insert_entries() Chris Wilson
2017-02-13 14:58   ` Mika Kuoppala [this message]
2017-02-13 15:12     ` Chris Wilson
2017-02-13 15:44   ` [PATCH v3] " Chris Wilson
2017-02-10 19:38 ` [PATCH v2 04/22] drm/i915: Don't special case teardown of aliasing_ppgtt Chris Wilson
2017-02-10 19:38 ` [PATCH v2 05/22] drm/i915: Split ggtt/alasing_gtt unbind_vma Chris Wilson
2017-02-10 19:38 ` [PATCH v2 06/22] drm/i915: Convert clflushed pagetables over to WC maps Chris Wilson
2017-02-10 19:38 ` [PATCH v2 07/22] drm/i915: Remove kmap/kunmap wrappers Chris Wilson
2017-02-10 19:38 ` [PATCH v2 08/22] drm/i915: Move allocate_va_range to GTT Chris Wilson
2017-02-10 19:38 ` [PATCH v2 09/22] drm/i915: Always preallocate gen6/7 ppgtt Chris Wilson
2017-02-10 19:38 ` [PATCH v2 10/22] drm/i915: Remove redundant clear of appgtt Chris Wilson
2017-02-10 19:38 ` [PATCH v2 11/22] drm/i915: Tidy gen6_write_pde() Chris Wilson
2017-02-10 19:38 ` [PATCH v2 12/22] drm/i915: Remove bitmap tracking for used-ptes Chris Wilson
2017-02-10 19:38 ` [PATCH v2 13/22] drm/i915: Remove bitmap tracking for used-pdes Chris Wilson
2017-02-10 19:38 ` [PATCH v2 14/22] drm/i915: Remove bitmap tracking for used-pdpes Chris Wilson
2017-02-10 19:38 ` [PATCH v2 15/22] drm/i915: Remove bitmap tracking for used-pml4 Chris Wilson
2017-02-10 19:38 ` [PATCH v2 16/22] drm/i915: Remove superfluous posting reads after clear GGTT Chris Wilson
2017-02-10 19:38 ` [PATCH v2 17/22] drm/i915: Always mark the PDP as dirty when altered Chris Wilson
2017-02-10 19:38 ` [PATCH v2 18/22] drm/i915: Remove defunct GTT tracepoints Chris Wilson
2017-02-10 19:38 ` [PATCH v2 19/22] drm/i915: Remove unused ppgtt->enable() Chris Wilson
2017-02-10 19:38 ` [PATCH v2 20/22] drm/i915: Remove i915_address_space.start Chris Wilson
2017-02-13 14:47   ` Matthew Auld
2017-02-10 19:38 ` [PATCH v2 21/22] drm/i915: Only preallocate the aliasing GTT to the extents of the global GTT Chris Wilson
2017-02-14 15:56   ` Matthew Auld
2017-02-10 19:38 ` [PATCH v2 22/22] drm/i915: Differentiate the aliasing_ppgtt with an invalid filp Chris Wilson
2017-02-13 15:07   ` Matthew Auld
2017-02-14  9:47   ` [PATCH] drm/i915: Use preferred kernel types in i915_gem_gtt.c Chris Wilson
2017-02-14 12:01     ` Joonas Lahtinen
2017-02-14 11:22 ` ✓ Fi.CI.BAT: success for series starting with [v2,01/22] drm/i915: Micro-optimise i915_get_ggtt_vma_pages() (rev3) Patchwork

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=87a89qp281.fsf@gaia.fi.intel.com \
    --to=mika.kuoppala@linux.intel.com \
    --cc=chris@chris-wilson.co.uk \
    --cc=intel-gfx@lists.freedesktop.org \
    --cc=matthew.auld@intel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.