[Intel-gfx] [PATCH 3/3] drm/i915: Refine VT-d scanout workaround

Intel-GFX Archive on lore.kernel.org
 help / color / mirror / Atom feed

From: Andi Shyti <andi.shyti@linux.intel.com>
To: intel-gfx@lists.freedesktop.org, dri-devel@lists.freedesktop.org
Cc: "Thomas Hellström" <thomas.hellstrom@linux.intel.com>,
	"Chris Wilson" <chris@chris-wilson.co.uk>,
	"Matthew Auld" <matthew.auld@intel.com>
Subject: [Intel-gfx] [PATCH 3/3] drm/i915: Refine VT-d scanout workaround
Date: Wed,  9 Nov 2022 18:40:58 +0100	[thread overview]
Message-ID: <20221109174058.912720-4-andi.shyti@linux.intel.com> (raw)
In-Reply-To: <20221109174058.912720-1-andi.shyti@linux.intel.com>

From: Chris Wilson <chris@chris-wilson.co.uk>

VT-d may cause overfetch of the scanout PTE, both before and after the
vma (depending on the scanout orientation). bspec recommends that we
provide a tile-row in either directions, and suggests using 168 PTE,
warning that the accesses will wrap around the ends of the GGTT.
Currently, we fill the entire GGTT with scratch pages when using VT-d to
always ensure there are valid entries around every vma, including
scanout. However, writing every PTE is slow as on recent devices we
perform 8MiB of uncached writes, incurring an extra 100ms during resume.

If instead we focus on only putting guard pages around scanout, we can
avoid touching the whole GGTT. To avoid having to introduce extra nodes
around each scanout vma, we adjust the scanout drm_mm_node to be smaller
than the allocated space, and fixup the extra PTE during dma binding.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Signed-off-by: Tejas Upadhyay <tejaskumarx.surendrakumar.upadhyay@intel.com>
Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Signed-off-by: Andi Shyti <andi.shyti@linux.intel.com>
---
 drivers/gpu/drm/i915/gem/i915_gem_domain.c | 13 +++++++++++
 drivers/gpu/drm/i915/gt/intel_ggtt.c       | 25 +---------------------
 drivers/gpu/drm/i915/i915_vma.c            |  9 ++++++++
 3 files changed, 23 insertions(+), 24 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_domain.c b/drivers/gpu/drm/i915/gem/i915_gem_domain.c
index d44a152ce6800..882b91519f92b 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_domain.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_domain.c
@@ -17,6 +17,8 @@
 #include "i915_gem_object.h"
 #include "i915_vma.h"
 
+#define VTD_GUARD (168u * I915_GTT_PAGE_SIZE) /* 168 or tile-row PTE padding */
+
 static bool gpu_write_needs_clflush(struct drm_i915_gem_object *obj)
 {
 	struct drm_i915_private *i915 = to_i915(obj->base.dev);
@@ -424,6 +426,17 @@ i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj,
 	if (ret)
 		return ERR_PTR(ret);
 
+	/* VT-d may overfetch before/after the vma, so pad with scratch */
+	if (intel_scanout_needs_vtd_wa(i915)) {
+		unsigned int guard = VTD_GUARD;
+
+		if (i915_gem_object_is_tiled(obj))
+			guard = max(guard,
+				    i915_gem_object_get_tile_row_size(obj));
+
+		flags |= PIN_OFFSET_GUARD | guard;
+	}
+
 	/*
 	 * As the user may map the buffer once pinned in the display plane
 	 * (e.g. libkms for the bootup splash), we have to ensure that we
diff --git a/drivers/gpu/drm/i915/gt/intel_ggtt.c b/drivers/gpu/drm/i915/gt/intel_ggtt.c
index 0dd81b18716c3..b0acb645a6de6 100644
--- a/drivers/gpu/drm/i915/gt/intel_ggtt.c
+++ b/drivers/gpu/drm/i915/gt/intel_ggtt.c
@@ -367,27 +367,6 @@ static void nop_clear_range(struct i915_address_space *vm,
 {
 }
 
-static void gen8_ggtt_clear_range(struct i915_address_space *vm,
-				  u64 start, u64 length)
-{
-	struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
-	unsigned int first_entry = start / I915_GTT_PAGE_SIZE;
-	unsigned int num_entries = length / I915_GTT_PAGE_SIZE;
-	const gen8_pte_t scratch_pte = vm->scratch[0]->encode;
-	gen8_pte_t __iomem *gtt_base =
-		(gen8_pte_t __iomem *)ggtt->gsm + first_entry;
-	const int max_entries = ggtt_total_entries(ggtt) - first_entry;
-	int i;
-
-	if (WARN(num_entries > max_entries,
-		 "First entry = %d; Num entries = %d (max=%d)\n",
-		 first_entry, num_entries, max_entries))
-		num_entries = max_entries;
-
-	for (i = 0; i < num_entries; i++)
-		gen8_set_pte(&gtt_base[i], scratch_pte);
-}
-
 static void bxt_vtd_ggtt_wa(struct i915_address_space *vm)
 {
 	/*
@@ -959,8 +938,6 @@ static int gen8_gmch_probe(struct i915_ggtt *ggtt)
 	ggtt->vm.cleanup = gen6_gmch_remove;
 	ggtt->vm.insert_page = gen8_ggtt_insert_page;
 	ggtt->vm.clear_range = nop_clear_range;
-	if (intel_scanout_needs_vtd_wa(i915))
-		ggtt->vm.clear_range = gen8_ggtt_clear_range;
 
 	ggtt->vm.insert_entries = gen8_ggtt_insert_entries;
 
@@ -1121,7 +1098,7 @@ static int gen6_gmch_probe(struct i915_ggtt *ggtt)
 	ggtt->vm.alloc_scratch_dma = alloc_pt_dma;
 
 	ggtt->vm.clear_range = nop_clear_range;
-	if (!HAS_FULL_PPGTT(i915) || intel_scanout_needs_vtd_wa(i915))
+	if (!HAS_FULL_PPGTT(i915))
 		ggtt->vm.clear_range = gen6_ggtt_clear_range;
 	ggtt->vm.insert_page = gen6_ggtt_insert_page;
 	ggtt->vm.insert_entries = gen6_ggtt_insert_entries;
diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c
index ff24a654565f6..973abe56cbaa0 100644
--- a/drivers/gpu/drm/i915/i915_vma.c
+++ b/drivers/gpu/drm/i915/i915_vma.c
@@ -675,6 +675,10 @@ bool i915_vma_misplaced(const struct i915_vma *vma,
 	    i915_vma_offset(vma) != (flags & PIN_OFFSET_MASK))
 		return true;
 
+	if (flags & PIN_OFFSET_GUARD &&
+	    vma->guard < (flags & PIN_OFFSET_MASK))
+		return true;
+
 	return false;
 }
 
@@ -753,6 +757,7 @@ i915_vma_insert(struct i915_vma *vma, struct i915_gem_ww_ctx *ww,
 
 	GEM_BUG_ON(i915_vma_is_bound(vma, I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND));
 	GEM_BUG_ON(drm_mm_node_allocated(&vma->node));
+	GEM_BUG_ON(hweight64(flags & (PIN_OFFSET_GUARD | PIN_OFFSET_FIXED | PIN_OFFSET_BIAS)) > 1);
 
 	size = max(size, vma->size);
 	alignment = max_t(typeof(alignment), alignment, vma->display_alignment);
@@ -767,6 +772,10 @@ i915_vma_insert(struct i915_vma *vma, struct i915_gem_ww_ctx *ww,
 	GEM_BUG_ON(!is_power_of_2(alignment));
 
 	guard = vma->guard; /* retain guard across rebinds */
+	if (flags & PIN_OFFSET_GUARD) {
+		GEM_BUG_ON(overflows_type(flags & PIN_OFFSET_MASK, u32));
+		guard = max_t(u32, guard, flags & PIN_OFFSET_MASK);
+	}
 	guard = ALIGN(guard, alignment);
 
 	start = flags & PIN_OFFSET_BIAS ? flags & PIN_OFFSET_MASK : 0;
-- 
2.37.2

next prev parent reply	other threads:[~2022-11-09 17:41 UTC|newest]

Thread overview: 15+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2022-11-09 17:40 [Intel-gfx] [PATCH 0/3] add guard padding around i915_vma Andi Shyti
2022-11-09 17:40 ` [Intel-gfx] [PATCH 1/3] drm/i915: Wrap all access to i915_vma.node.start|size Andi Shyti
2022-11-09 17:40 ` [Intel-gfx] [PATCH 2/3] drm/i915: Introduce guard pages to i915_vma Andi Shyti
2022-11-09 17:40 ` Andi Shyti [this message]
2022-11-09 18:03 ` [Intel-gfx] [PATCH 0/3] add guard padding around i915_vma Thomas Hellström
2022-11-09 18:07   ` Andi Shyti
2022-11-10 10:19   ` Tvrtko Ursulin
2022-11-10 10:32   ` Andi Shyti
2022-11-10 19:57 ` [Intel-gfx] ✗ Fi.CI.BUILD: failure for " Patchwork
  -- strict thread matches above, loose matches on Subject: below --
2022-11-09 16:49 [Intel-gfx] [PATCH 0/3] add guard patting " Andi Shyti
2022-11-09 16:49 ` [Intel-gfx] [PATCH 3/3] drm/i915: Refine VT-d scanout workaround Andi Shyti
2021-12-02  8:31 [Intel-gfx] [PATCH 0/3] Replace VT-d workaround with guard pages Tejas Upadhyay
2021-12-02  8:31 ` [Intel-gfx] [PATCH 3/3] drm/i915: Refine VT-d scanout workaround Tejas Upadhyay
2021-02-15 15:56 [Intel-gfx] [PATCH 1/3] drm/i915: Wrap all access to i915_vma.node.start|size Chris Wilson
2021-02-15 15:56 ` [Intel-gfx] [PATCH 3/3] drm/i915: Refine VT-d scanout workaround Chris Wilson
2021-02-15 18:07   ` Matthew Auld
2021-02-12 10:22 [Intel-gfx] [PATCH 1/3] drm/i915: Wrap all access to i915_vma.node.start|size Chris Wilson
2021-02-12 10:22 ` [Intel-gfx] [PATCH 3/3] drm/i915: Refine VT-d scanout workaround Chris Wilson
2021-02-12 13:46   ` Matthew Auld

find likely ancestor, descendant, or conflicting patches for this message:
( dfblob:d44a152ce680 dfblob:882b91519f92 dfblob:0dd81b18716c
dfblob:b0acb645a6de dfblob:ff24a654565f dfblob:973abe56cbaa )
 OR (
bs:"[Intel-gfx] [PATCH 3/3] drm/i915: Refine VT-d scanout workaround" )
	(help)

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20221109174058.912720-4-andi.shyti@linux.intel.com \
    --to=andi.shyti@linux.intel.com \
    --cc=chris@chris-wilson.co.uk \
    --cc=dri-devel@lists.freedesktop.org \
    --cc=intel-gfx@lists.freedesktop.org \
    --cc=matthew.auld@intel.com \
    --cc=thomas.hellstrom@linux.intel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox