All of lore.kernel.org
 help / color / mirror / Atom feed
* ppGTT the recursive wars
@ 2019-07-12 11:27 Chris Wilson
  2019-07-12 11:27 ` [PATCH 1/4] drm/i915/gtt: Recursive cleanup for gen8 Chris Wilson
                   ` (7 more replies)
  0 siblings, 8 replies; 15+ messages in thread
From: Chris Wilson @ 2019-07-12 11:27 UTC (permalink / raw)
  To: intel-gfx

Doing more with less!

We restore and succesfully generalise a recently lost optimisation to
avoid updating page directories to be removed, all while using less i$
and hopefully more predictable branches. Mircoptimisations, but less
code for the win.

We even add some debug traces for the next poor soul to be trapped here.

drivers/gpu/drm/i915/Kconfig.debug  |  15 +
drivers/gpu/drm/i915/i915_gem_gtt.c | 586 +++++++++++++++---------------------
drivers/gpu/drm/i915/i915_gem_gtt.h |  92 +-----
3 files changed, 259 insertions(+), 434 deletions(-)


_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 15+ messages in thread

* [PATCH 1/4] drm/i915/gtt: Recursive cleanup for gen8
  2019-07-12 11:27 ppGTT the recursive wars Chris Wilson
@ 2019-07-12 11:27 ` Chris Wilson
  2019-07-12 13:03   ` Abdiel Janulgue
  2019-07-12 11:27 ` [PATCH 2/4] drm/i915/gtt: Recursive ppgtt clear " Chris Wilson
                   ` (6 subsequent siblings)
  7 siblings, 1 reply; 15+ messages in thread
From: Chris Wilson @ 2019-07-12 11:27 UTC (permalink / raw)
  To: intel-gfx

With an explicit level, we can refactor the separate cleanup functions
as a simple recursive function. We take the opportunity to pass down the
size of each level so that we can deal with the different sizes of
top-level and avoid over allocating for 32/36-bit vm.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_gem_gtt.c | 93 ++++++++++-------------------
 drivers/gpu/drm/i915/i915_gem_gtt.h |  2 +-
 2 files changed, 33 insertions(+), 62 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
index 753090a7729e..305c65c08a6a 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -713,11 +713,11 @@ static struct i915_page_table *alloc_pt(struct i915_address_space *vm)
 	return pt;
 }
 
-static struct i915_page_directory *__alloc_pd(void)
+static struct i915_page_directory *__alloc_pd(size_t sz)
 {
 	struct i915_page_directory *pd;
 
-	pd = kzalloc(sizeof(*pd), I915_GFP_ALLOW_FAIL);
+	pd = kzalloc(sz, I915_GFP_ALLOW_FAIL);
 	if (unlikely(!pd))
 		return NULL;
 
@@ -729,7 +729,7 @@ static struct i915_page_directory *alloc_pd(struct i915_address_space *vm)
 {
 	struct i915_page_directory *pd;
 
-	pd = __alloc_pd();
+	pd = __alloc_pd(sizeof(*pd));
 	if (unlikely(!pd))
 		return ERR_PTR(-ENOMEM);
 
@@ -766,7 +766,7 @@ __set_pd_entry(struct i915_page_directory * const pd,
 	       struct i915_page_dma * const to,
 	       u64 (*encode)(const dma_addr_t, const enum i915_cache_level))
 {
-	GEM_BUG_ON(atomic_read(px_used(pd)) > 512);
+	GEM_BUG_ON(atomic_read(px_used(pd)) > ARRAY_SIZE(pd->entry));
 
 	atomic_inc(px_used(pd));
 	pd->entry[idx] = to;
@@ -896,64 +896,34 @@ static inline unsigned int gen8_pt_count(u64 start, u64 end)
 		return end - start;
 }
 
-static void gen8_free_page_tables(struct i915_address_space *vm,
-				  struct i915_page_directory *pd)
+static void __gen8_ppgtt_cleanup(struct i915_address_space *vm,
+				 struct i915_page_directory *pd,
+				 int count, int lvl)
 {
-	int i;
-
-	for (i = 0; i < I915_PDES; i++) {
-		if (pd->entry[i])
-			free_pd(vm, pd->entry[i]);
-	}
-}
-
-static void gen8_ppgtt_cleanup_3lvl(struct i915_address_space *vm,
-				    struct i915_page_directory *pdp)
-{
-	const unsigned int pdpes = i915_pdpes_per_pdp(vm);
-	int i;
-
-	for (i = 0; i < pdpes; i++) {
-		if (!pdp->entry[i])
-			continue;
-
-		gen8_free_page_tables(vm, pdp->entry[i]);
-		free_pd(vm, pdp->entry[i]);
-	}
-
-	free_px(vm, pdp);
-}
-
-static void gen8_ppgtt_cleanup_4lvl(struct i915_ppgtt *ppgtt)
-{
-	struct i915_page_directory * const pml4 = ppgtt->pd;
-	int i;
-
-	for (i = 0; i < GEN8_PML4ES_PER_PML4; i++) {
-		struct i915_page_directory *pdp = i915_pdp_entry(pml4, i);
+	if (lvl) {
+		void **pde = pd->entry;
 
-		if (!pdp)
-			continue;
+		do {
+			if (!*pde)
+				continue;
 
-		gen8_ppgtt_cleanup_3lvl(&ppgtt->vm, pdp);
+			__gen8_ppgtt_cleanup(vm, *pde, I915_PDES, lvl - 1);
+		} while (pde++, --count);
 	}
 
-	free_px(&ppgtt->vm, pml4);
+	free_px(vm, pd);
 }
 
 static void gen8_ppgtt_cleanup(struct i915_address_space *vm)
 {
-	struct drm_i915_private *i915 = vm->i915;
 	struct i915_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
 
-	if (intel_vgpu_active(i915))
+	if (intel_vgpu_active(vm->i915))
 		gen8_ppgtt_notify_vgt(ppgtt, false);
 
-	if (i915_vm_is_4lvl(vm))
-		gen8_ppgtt_cleanup_4lvl(ppgtt);
-	else
-		gen8_ppgtt_cleanup_3lvl(&ppgtt->vm, ppgtt->pd);
-
+	__gen8_ppgtt_cleanup(vm, ppgtt->pd,
+			     vm->total >> __gen8_pte_shift(vm->top),
+			     vm->top);
 	free_scratch(vm);
 }
 
@@ -1505,24 +1475,18 @@ static int gen8_preallocate_top_level_pdp(struct i915_ppgtt *ppgtt)
 	struct i915_page_directory *pdp = ppgtt->pd;
 	struct i915_page_directory *pd;
 	u64 start = 0, length = ppgtt->vm.total;
-	u64 from = start;
 	unsigned int pdpe;
 
 	gen8_for_each_pdpe(pd, pdp, start, length, pdpe) {
 		pd = alloc_pd(vm);
 		if (IS_ERR(pd))
-			goto unwind;
+			return PTR_ERR(pd);
 
 		fill_px(pd, vm->scratch[1].encode);
 		set_pd_entry(pdp, pdpe, pd);
 	}
 
 	return 0;
-
-unwind:
-	gen8_ppgtt_clear_pdp(vm, pdp, from, start - from);
-	atomic_set(px_used(pdp), 0);
-	return -ENOMEM;
 }
 
 static void ppgtt_init(struct i915_ppgtt *ppgtt, struct intel_gt *gt)
@@ -1550,9 +1514,14 @@ gen8_alloc_top_pd(struct i915_address_space *vm)
 
 	GEM_BUG_ON(count > ARRAY_SIZE(pd->entry));
 
-	pd = alloc_pd(vm);
-	if (IS_ERR(pd))
-		return pd;
+	pd = __alloc_pd(offsetof(typeof(*pd), entry[count]));
+	if (unlikely(!pd))
+		return ERR_PTR(-ENOMEM);
+
+	if (unlikely(setup_page_dma(vm, px_base(pd)))) {
+		kfree(pd);
+		return ERR_PTR(-ENOMEM);
+	}
 
 	fill_page_dma(px_base(pd), vm->scratch[vm->top].encode, count);
 	return pd;
@@ -1625,7 +1594,9 @@ static struct i915_ppgtt *gen8_ppgtt_create(struct drm_i915_private *i915)
 	return ppgtt;
 
 err_free_pd:
-	free_px(&ppgtt->vm, ppgtt->pd);
+	__gen8_ppgtt_cleanup(&ppgtt->vm, ppgtt->pd,
+			     ppgtt->vm.total >> __gen8_pte_shift(ppgtt->vm.top),
+			     ppgtt->vm.top);
 err_free_scratch:
 	free_scratch(&ppgtt->vm);
 err_free:
@@ -2071,7 +2042,7 @@ static struct i915_ppgtt *gen6_ppgtt_create(struct drm_i915_private *i915)
 
 	ppgtt->base.vm.pte_encode = ggtt->vm.pte_encode;
 
-	ppgtt->base.pd = __alloc_pd();
+	ppgtt->base.pd = __alloc_pd(sizeof(*ppgtt->base.pd));
 	if (!ppgtt->base.pd) {
 		err = -ENOMEM;
 		goto err_free;
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h
index b30ffe333852..aea9a7084414 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.h
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.h
@@ -252,7 +252,7 @@ struct i915_page_table {
 struct i915_page_directory {
 	struct i915_page_table pt;
 	spinlock_t lock;
-	void *entry[512];
+	void *entry[I915_PDES];
 };
 
 #define __px_choose_expr(x, type, expr, other) \
-- 
2.22.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 15+ messages in thread

* [PATCH 2/4] drm/i915/gtt: Recursive ppgtt clear for gen8
  2019-07-12 11:27 ppGTT the recursive wars Chris Wilson
  2019-07-12 11:27 ` [PATCH 1/4] drm/i915/gtt: Recursive cleanup for gen8 Chris Wilson
@ 2019-07-12 11:27 ` Chris Wilson
  2019-07-12 15:16   ` Abdiel Janulgue
  2019-07-12 11:27 ` [PATCH 3/4] drm/i915/gtt: Recursive ppgtt alloc " Chris Wilson
                   ` (5 subsequent siblings)
  7 siblings, 1 reply; 15+ messages in thread
From: Chris Wilson @ 2019-07-12 11:27 UTC (permalink / raw)
  To: intel-gfx

With an explicit level, we can refactor the separate clear functions
as a simple recursive function. The additional knowledge of the level
allows us to spot when we can free an entire subtree at once.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/Kconfig.debug  |  15 +++
 drivers/gpu/drm/i915/i915_gem_gtt.c | 154 ++++++++++++++++------------
 2 files changed, 105 insertions(+), 64 deletions(-)

diff --git a/drivers/gpu/drm/i915/Kconfig.debug b/drivers/gpu/drm/i915/Kconfig.debug
index 8d922bb4d953..ed8c787058a5 100644
--- a/drivers/gpu/drm/i915/Kconfig.debug
+++ b/drivers/gpu/drm/i915/Kconfig.debug
@@ -94,6 +94,21 @@ config DRM_I915_TRACE_GEM
 
 	  If in doubt, say "N".
 
+config DRM_I915_TRACE_GTT
+	bool "Insert extra ftrace output from the GTT internals"
+	depends on DRM_I915_DEBUG_GEM
+	select TRACING
+	default n
+	help
+	  Enable additional and verbose debugging output that will spam
+	  ordinary tests, but may be vital for post-mortem debugging when
+	  used with /proc/sys/kernel/ftrace_dump_on_oops
+
+	  Recommended for driver developers only.
+
+	  If in doubt, say "N".
+
+
 config DRM_I915_SW_FENCE_DEBUG_OBJECTS
         bool "Enable additional driver debugging for fence objects"
         depends on DRM_I915
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
index 305c65c08a6a..7b2f3188d435 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -46,6 +46,12 @@
 
 #define I915_GFP_ALLOW_FAIL (GFP_KERNEL | __GFP_RETRY_MAYFAIL | __GFP_NOWARN)
 
+#if IS_ENABLED(CONFIG_DRM_I915_TRACE_GTT)
+#define DBG(...) trace_printk(__VA_ARGS__)
+#else
+#define DBG(...)
+#endif
+
 /**
  * DOC: Global GTT views
  *
@@ -796,6 +802,9 @@ release_pd_entry(struct i915_page_directory * const pd,
 {
 	bool free = false;
 
+	if (atomic_add_unless(&pt->used, -1, 1))
+		return false;
+
 	spin_lock(&pd->lock);
 	if (atomic_dec_and_test(&pt->used)) {
 		clear_pd_entry(pd, idx, scratch);
@@ -927,86 +936,101 @@ static void gen8_ppgtt_cleanup(struct i915_address_space *vm)
 	free_scratch(vm);
 }
 
-/* Removes entries from a single page table, releasing it if it's empty.
- * Caller can use the return value to update higher-level entries.
- */
-static void gen8_ppgtt_clear_pt(const struct i915_address_space *vm,
-				struct i915_page_table *pt,
-				u64 start, u64 length)
+static u64 __gen8_ppgtt_clear(struct i915_address_space * const vm,
+			      struct i915_page_directory * const pd,
+			      u64 start, const u64 end, int lvl)
 {
-	const unsigned int num_entries = gen8_pte_count(start, length);
-	gen8_pte_t *vaddr;
+	const struct i915_page_scratch * const scratch = &vm->scratch[lvl];
+	unsigned int idx, len;
 
-	vaddr = kmap_atomic_px(pt);
-	memset64(vaddr + gen8_pte_index(start),
-		 vm->scratch[0].encode,
-		 num_entries);
-	kunmap_atomic(vaddr);
+	len = gen8_pd_range(start, end, lvl--, &idx);
+	DBG("%s(%p):{ lvl:%d, start:%llx, end:%llx, idx:%d, len:%d, used:%d}\n",
+	    __func__, vm, lvl + 1, start, end,
+	    idx, len, atomic_read(px_used(pd)));
+	GEM_BUG_ON(!len || len >= atomic_read(px_used(pd)));
 
-	GEM_BUG_ON(num_entries > atomic_read(&pt->used));
+	do {
+		struct i915_page_table *pt = pd->entry[idx];
+
+		if (atomic_fetch_inc(&pt->used) >> gen8_pd_shift(1) &&
+		    gen8_pd_contains(start, end, lvl)) {
+			DBG("%s(%p):{ lvl:%d, idx:%d, start:%llx, end:%llx } removing pd\n",
+			    __func__, vm, lvl + 1, idx, start, end);
+			clear_pd_entry(pd, idx, scratch);
+			__gen8_ppgtt_cleanup(vm, as_pd(pt), I915_PDES, lvl);
+			start += (u64)I915_PDES << gen8_pd_shift(lvl);
+			continue;
+		}
 
-	atomic_sub(num_entries, &pt->used);
-}
+		if (lvl) {
+			start = __gen8_ppgtt_clear(vm, as_pd(pt),
+						   start, end, lvl);
+		} else {
+			unsigned int count;
+			u64 *vaddr;
 
-static void gen8_ppgtt_clear_pd(struct i915_address_space *vm,
-				struct i915_page_directory *pd,
-				u64 start, u64 length)
-{
-	struct i915_page_table *pt;
-	u32 pde;
+			count = gen8_pt_count(start, end);
+			DBG("%s(%p):{ lvl:%d, start:%llx, end:%llx, idx:%d, len:%d, used:%d} removing pte\n",
+			    __func__, vm, lvl, start, end,
+			    gen8_pd_index(start, 0), count,
+			    atomic_read(&pt->used));
+			GEM_BUG_ON(!count || count >= atomic_read(&pt->used));
 
-	gen8_for_each_pde(pt, pd, start, length, pde) {
-		atomic_inc(&pt->used);
-		gen8_ppgtt_clear_pt(vm, pt, start, length);
-		if (release_pd_entry(pd, pde, pt, &vm->scratch[1]))
+			vaddr = kmap_atomic_px(pt);
+			memset64(vaddr + gen8_pd_index(start, 0),
+				 vm->scratch[0].encode,
+				 count);
+			kunmap_atomic(vaddr);
+
+			atomic_sub(count, &pt->used);
+			start += count;
+		}
+
+		if (release_pd_entry(pd, idx, pt, scratch))
 			free_px(vm, pt);
-	}
+	} while (idx++, --len);
+
+	return start;
 }
 
-/* Removes entries from a single page dir pointer, releasing it if it's empty.
- * Caller can use the return value to update higher-level entries
- */
-static void gen8_ppgtt_clear_pdp(struct i915_address_space *vm,
-				 struct i915_page_directory * const pdp,
-				 u64 start, u64 length)
+static void gen8_ppgtt_clear(struct i915_address_space *vm,
+			     u64 start, u64 length)
 {
-	struct i915_page_directory *pd;
-	unsigned int pdpe;
+	GEM_BUG_ON(!IS_ALIGNED(start, BIT_ULL(GEN8_PTE_SHIFT)));
+	GEM_BUG_ON(!IS_ALIGNED(length, BIT_ULL(GEN8_PTE_SHIFT)));
 
-	gen8_for_each_pdpe(pd, pdp, start, length, pdpe) {
-		atomic_inc(px_used(pd));
-		gen8_ppgtt_clear_pd(vm, pd, start, length);
-		if (release_pd_entry(pdp, pdpe, &pd->pt, &vm->scratch[2]))
-			free_px(vm, pd);
-	}
+	start >>= GEN8_PTE_SHIFT;
+	length >>= GEN8_PTE_SHIFT;
+	GEM_BUG_ON(length == 0);
+
+	__gen8_ppgtt_clear(vm, i915_vm_to_ppgtt(vm)->pd,
+			   start, start + length, vm->top);
 }
 
-static void gen8_ppgtt_clear_3lvl(struct i915_address_space *vm,
-				  u64 start, u64 length)
+static void gen8_ppgtt_clear_pd(struct i915_address_space *vm,
+				struct i915_page_directory *pd,
+				u64 start, u64 length)
 {
-	gen8_ppgtt_clear_pdp(vm, i915_vm_to_ppgtt(vm)->pd, start, length);
+	GEM_BUG_ON(!IS_ALIGNED(start, BIT_ULL(GEN8_PTE_SHIFT)));
+	GEM_BUG_ON(!IS_ALIGNED(length, BIT_ULL(GEN8_PTE_SHIFT)));
+
+	start >>= GEN8_PTE_SHIFT;
+	length >>= GEN8_PTE_SHIFT;
+
+	__gen8_ppgtt_clear(vm, pd, start, start + length, 1);
 }
 
-/* Removes entries from a single pml4.
- * This is the top-level structure in 4-level page tables used on gen8+.
- * Empty entries are always scratch pml4e.
- */
-static void gen8_ppgtt_clear_4lvl(struct i915_address_space *vm,
-				  u64 start, u64 length)
+static void gen8_ppgtt_clear_pdp(struct i915_address_space *vm,
+				 struct i915_page_directory * const pdp,
+				 u64 start, u64 length)
 {
-	struct i915_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
-	struct i915_page_directory * const pml4 = ppgtt->pd;
-	struct i915_page_directory *pdp;
-	unsigned int pml4e;
+	GEM_BUG_ON(!IS_ALIGNED(start, BIT_ULL(GEN8_PTE_SHIFT)));
+	GEM_BUG_ON(!IS_ALIGNED(length, BIT_ULL(GEN8_PTE_SHIFT)));
 
-	GEM_BUG_ON(!i915_vm_is_4lvl(vm));
+	start >>= GEN8_PTE_SHIFT;
+	length >>= GEN8_PTE_SHIFT;
 
-	gen8_for_each_pml4e(pdp, pml4, start, length, pml4e) {
-		atomic_inc(px_used(pdp));
-		gen8_ppgtt_clear_pdp(vm, pdp, start, length);
-		if (release_pd_entry(pml4, pml4e, &pdp->pt, &vm->scratch[3]))
-			free_px(vm, pdp);
-	}
+	__gen8_ppgtt_clear(vm, pdp, start, start + length, 2);
 }
 
 static int gen8_ppgtt_alloc_pd(struct i915_address_space *vm,
@@ -1171,7 +1195,7 @@ static int gen8_ppgtt_alloc_4lvl(struct i915_address_space *vm,
 	if (release_pd_entry(pml4, pml4e, &pdp->pt, &vm->scratch[3]))
 		free_px(vm, pdp);
 unwind:
-	gen8_ppgtt_clear_4lvl(vm, from, start - from);
+	gen8_ppgtt_clear(vm, from, start - from);
 out:
 	if (alloc)
 		free_px(vm, alloc);
@@ -1484,6 +1508,7 @@ static int gen8_preallocate_top_level_pdp(struct i915_ppgtt *ppgtt)
 
 		fill_px(pd, vm->scratch[1].encode);
 		set_pd_entry(pdp, pdpe, pd);
+		atomic_inc(px_used(pd)); /* keep pinned */
 	}
 
 	return 0;
@@ -1524,6 +1549,7 @@ gen8_alloc_top_pd(struct i915_address_space *vm)
 	}
 
 	fill_page_dma(px_base(pd), vm->scratch[vm->top].encode, count);
+	atomic_inc(px_used(pd)); /* mark as pinned */
 	return pd;
 }
 
@@ -1573,7 +1599,6 @@ static struct i915_ppgtt *gen8_ppgtt_create(struct drm_i915_private *i915)
 	if (i915_vm_is_4lvl(&ppgtt->vm)) {
 		ppgtt->vm.allocate_va_range = gen8_ppgtt_alloc_4lvl;
 		ppgtt->vm.insert_entries = gen8_ppgtt_insert_4lvl;
-		ppgtt->vm.clear_range = gen8_ppgtt_clear_4lvl;
 	} else {
 		if (intel_vgpu_active(i915)) {
 			err = gen8_preallocate_top_level_pdp(ppgtt);
@@ -1583,9 +1608,10 @@ static struct i915_ppgtt *gen8_ppgtt_create(struct drm_i915_private *i915)
 
 		ppgtt->vm.allocate_va_range = gen8_ppgtt_alloc_3lvl;
 		ppgtt->vm.insert_entries = gen8_ppgtt_insert_3lvl;
-		ppgtt->vm.clear_range = gen8_ppgtt_clear_3lvl;
 	}
 
+	ppgtt->vm.clear_range = gen8_ppgtt_clear;
+
 	if (intel_vgpu_active(i915))
 		gen8_ppgtt_notify_vgt(ppgtt, true);
 
-- 
2.22.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 15+ messages in thread

* [PATCH 3/4] drm/i915/gtt: Recursive ppgtt alloc for gen8
  2019-07-12 11:27 ppGTT the recursive wars Chris Wilson
  2019-07-12 11:27 ` [PATCH 1/4] drm/i915/gtt: Recursive cleanup for gen8 Chris Wilson
  2019-07-12 11:27 ` [PATCH 2/4] drm/i915/gtt: Recursive ppgtt clear " Chris Wilson
@ 2019-07-12 11:27 ` Chris Wilson
  2019-07-16 15:28   ` Abdiel Janulgue
  2019-07-16 15:28   ` Abdiel Janulgue
  2019-07-12 11:27 ` [PATCH 4/4] drm/i915/gtt: Tidy up ppgtt insertion " Chris Wilson
                   ` (4 subsequent siblings)
  7 siblings, 2 replies; 15+ messages in thread
From: Chris Wilson @ 2019-07-12 11:27 UTC (permalink / raw)
  To: intel-gfx

Refactor the separate allocation routines into a single recursive
function.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_gem_gtt.c | 272 ++++++++++------------------
 1 file changed, 97 insertions(+), 175 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
index 7b2f3188d435..72e0f9799a46 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -1007,199 +1007,119 @@ static void gen8_ppgtt_clear(struct i915_address_space *vm,
 			   start, start + length, vm->top);
 }
 
-static void gen8_ppgtt_clear_pd(struct i915_address_space *vm,
-				struct i915_page_directory *pd,
-				u64 start, u64 length)
-{
-	GEM_BUG_ON(!IS_ALIGNED(start, BIT_ULL(GEN8_PTE_SHIFT)));
-	GEM_BUG_ON(!IS_ALIGNED(length, BIT_ULL(GEN8_PTE_SHIFT)));
-
-	start >>= GEN8_PTE_SHIFT;
-	length >>= GEN8_PTE_SHIFT;
-
-	__gen8_ppgtt_clear(vm, pd, start, start + length, 1);
-}
-
-static void gen8_ppgtt_clear_pdp(struct i915_address_space *vm,
-				 struct i915_page_directory * const pdp,
-				 u64 start, u64 length)
-{
-	GEM_BUG_ON(!IS_ALIGNED(start, BIT_ULL(GEN8_PTE_SHIFT)));
-	GEM_BUG_ON(!IS_ALIGNED(length, BIT_ULL(GEN8_PTE_SHIFT)));
-
-	start >>= GEN8_PTE_SHIFT;
-	length >>= GEN8_PTE_SHIFT;
-
-	__gen8_ppgtt_clear(vm, pdp, start, start + length, 2);
-}
-
-static int gen8_ppgtt_alloc_pd(struct i915_address_space *vm,
-			       struct i915_page_directory *pd,
-			       u64 start, u64 length)
+static int __gen8_ppgtt_alloc(struct i915_address_space * const vm,
+			      struct i915_page_directory * const pd,
+			      u64 * const start, u64 end, int lvl)
 {
-	struct i915_page_table *pt, *alloc = NULL;
-	u64 from = start;
-	unsigned int pde;
+	const struct i915_page_scratch * const scratch = &vm->scratch[lvl];
+	struct i915_page_table *alloc = NULL;
+	unsigned int idx, len;
 	int ret = 0;
 
+	len = gen8_pd_range(*start, end, lvl--, &idx);
+	DBG("%s(%p):{lvl:%d, start:%llx, end:%llx, idx:%d, len:%d, used:%d}\n",
+	    __func__, vm, lvl + 1, *start, end,
+	    idx, len, atomic_read(px_used(pd)));
+	GEM_BUG_ON(!len || (idx + len - 1) >> gen8_pd_shift(1));
+
 	spin_lock(&pd->lock);
-	gen8_for_each_pde(pt, pd, start, length, pde) {
-		const int count = gen8_pte_count(start, length);
+	GEM_BUG_ON(!atomic_read(px_used(pd))); /* Must be pinned! */
+	do {
+		struct i915_page_table *pt = pd->entry[idx];
 
 		if (!pt) {
 			spin_unlock(&pd->lock);
 
-			pt = fetch_and_zero(&alloc);
-			if (!pt)
-				pt = alloc_pt(vm);
-			if (IS_ERR(pt)) {
-				ret = PTR_ERR(pt);
-				goto unwind;
-			}
+			DBG("%s(%p):{ lvl:%d, idx:%d } allocating new tree\n",
+			    __func__, vm, lvl + 1, idx);
 
-			if (count < GEN8_PTES || intel_vgpu_active(vm->i915))
-				fill_px(pt, vm->scratch[0].encode);
+			pt = fetch_and_zero(&alloc);
+			if (lvl) {
+				if (!pt) {
+					pt = &alloc_pd(vm)->pt;
+					if (IS_ERR(pt)) {
+						ret = PTR_ERR(pt);
+						goto out;
+					}
+				}
 
-			spin_lock(&pd->lock);
-			if (!pd->entry[pde]) {
-				set_pd_entry(pd, pde, pt);
+				fill_px(pt, vm->scratch[lvl].encode);
 			} else {
-				alloc = pt;
-				pt = pd->entry[pde];
-			}
-		}
-
-		atomic_add(count, &pt->used);
-	}
-	spin_unlock(&pd->lock);
-	goto out;
-
-unwind:
-	gen8_ppgtt_clear_pd(vm, pd, from, start - from);
-out:
-	if (alloc)
-		free_px(vm, alloc);
-	return ret;
-}
-
-static int gen8_ppgtt_alloc_pdp(struct i915_address_space *vm,
-				struct i915_page_directory *pdp,
-				u64 start, u64 length)
-{
-	struct i915_page_directory *pd, *alloc = NULL;
-	u64 from = start;
-	unsigned int pdpe;
-	int ret = 0;
+				if (!pt) {
+					pt = alloc_pt(vm);
+					if (IS_ERR(pt)) {
+						ret = PTR_ERR(pt);
+						goto out;
+					}
+				}
 
-	spin_lock(&pdp->lock);
-	gen8_for_each_pdpe(pd, pdp, start, length, pdpe) {
-		if (!pd) {
-			spin_unlock(&pdp->lock);
-
-			pd = fetch_and_zero(&alloc);
-			if (!pd)
-				pd = alloc_pd(vm);
-			if (IS_ERR(pd)) {
-				ret = PTR_ERR(pd);
-				goto unwind;
+				if (intel_vgpu_active(vm->i915) ||
+				    gen8_pt_count(*start, end) < I915_PDES)
+					fill_px(pt, vm->scratch[lvl].encode);
 			}
 
-			fill_px(pd, vm->scratch[1].encode);
+			spin_lock(&pd->lock);
+			if (likely(!pd->entry[idx]))
+				set_pd_entry(pd, idx, pt);
+			else
+				alloc = pt, pt = pd->entry[idx];
+		}
 
-			spin_lock(&pdp->lock);
-			if (!pdp->entry[pdpe]) {
-				set_pd_entry(pdp, pdpe, pd);
-			} else {
-				alloc = pd;
-				pd = pdp->entry[pdpe];
+		if (lvl) {
+			atomic_inc(&pt->used);
+			spin_unlock(&pd->lock);
+
+			ret = __gen8_ppgtt_alloc(vm, as_pd(pt),
+						 start, end, lvl);
+			if (unlikely(ret)) {
+				if (release_pd_entry(pd, idx, pt, scratch))
+					free_px(vm, pt);
+				goto out;
 			}
-		}
-		atomic_inc(px_used(pd));
-		spin_unlock(&pdp->lock);
 
-		ret = gen8_ppgtt_alloc_pd(vm, pd, start, length);
-		if (unlikely(ret))
-			goto unwind_pd;
+			spin_lock(&pd->lock);
+			atomic_dec(&pt->used);
+			GEM_BUG_ON(!atomic_read(&pt->used));
+		} else {
+			unsigned int count = gen8_pt_count(*start, end);
 
-		spin_lock(&pdp->lock);
-		atomic_dec(px_used(pd));
-	}
-	spin_unlock(&pdp->lock);
-	goto out;
+			DBG("%s(%p):{lvl:%d, start:%llx, end:%llx, idx:%d, len:%d, used:%d} inserting pte\n",
+			    __func__, vm, lvl, *start, end,
+			    gen8_pd_index(*start, 0), count,
+			    atomic_read(&pt->used));
 
-unwind_pd:
-	if (release_pd_entry(pdp, pdpe, &pd->pt, &vm->scratch[2]))
-		free_px(vm, pd);
-unwind:
-	gen8_ppgtt_clear_pdp(vm, pdp, from, start - from);
+			atomic_add(count, &pt->used);
+			GEM_BUG_ON(atomic_read(&pt->used) > I915_PDES);
+			*start += count;
+		}
+	} while (idx++, --len);
+	spin_unlock(&pd->lock);
 out:
 	if (alloc)
 		free_px(vm, alloc);
 	return ret;
 }
 
-static int gen8_ppgtt_alloc_3lvl(struct i915_address_space *vm,
-				 u64 start, u64 length)
-{
-	return gen8_ppgtt_alloc_pdp(vm,
-				    i915_vm_to_ppgtt(vm)->pd, start, length);
-}
-
-static int gen8_ppgtt_alloc_4lvl(struct i915_address_space *vm,
-				 u64 start, u64 length)
+static int gen8_ppgtt_alloc(struct i915_address_space *vm,
+			    u64 start, u64 length)
 {
-	struct i915_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
-	struct i915_page_directory * const pml4 = ppgtt->pd;
-	struct i915_page_directory *pdp, *alloc = NULL;
 	u64 from = start;
-	int ret = 0;
-	u32 pml4e;
-
-	spin_lock(&pml4->lock);
-	gen8_for_each_pml4e(pdp, pml4, start, length, pml4e) {
-		if (!pdp) {
-			spin_unlock(&pml4->lock);
-
-			pdp = fetch_and_zero(&alloc);
-			if (!pdp)
-				pdp = alloc_pd(vm);
-			if (IS_ERR(pdp)) {
-				ret = PTR_ERR(pdp);
-				goto unwind;
-			}
-
-			fill_px(pdp, vm->scratch[2].encode);
+	int err;
 
-			spin_lock(&pml4->lock);
-			if (!pml4->entry[pml4e]) {
-				set_pd_entry(pml4, pml4e, pdp);
-			} else {
-				alloc = pdp;
-				pdp = pml4->entry[pml4e];
-			}
-		}
-		atomic_inc(px_used(pdp));
-		spin_unlock(&pml4->lock);
+	GEM_BUG_ON(!IS_ALIGNED(start, BIT_ULL(GEN8_PTE_SHIFT)));
+	GEM_BUG_ON(!IS_ALIGNED(length, BIT_ULL(GEN8_PTE_SHIFT)));
 
-		ret = gen8_ppgtt_alloc_pdp(vm, pdp, start, length);
-		if (unlikely(ret))
-			goto unwind_pdp;
+	start >>= GEN8_PTE_SHIFT;
+	length >>= GEN8_PTE_SHIFT;
+	GEM_BUG_ON(length == 0);
 
-		spin_lock(&pml4->lock);
-		atomic_dec(px_used(pdp));
-	}
-	spin_unlock(&pml4->lock);
-	goto out;
+	err = __gen8_ppgtt_alloc(vm, i915_vm_to_ppgtt(vm)->pd,
+				 &start, start + length, vm->top);
+	if (unlikely(err))
+		__gen8_ppgtt_clear(vm, i915_vm_to_ppgtt(vm)->pd,
+				   from, start, vm->top);
 
-unwind_pdp:
-	if (release_pd_entry(pml4, pml4e, &pdp->pt, &vm->scratch[3]))
-		free_px(vm, pdp);
-unwind:
-	gen8_ppgtt_clear(vm, from, start - from);
-out:
-	if (alloc)
-		free_px(vm, alloc);
-	return ret;
+	return err;
 }
 
 static inline struct sgt_dma {
@@ -1496,19 +1416,22 @@ static int gen8_init_scratch(struct i915_address_space *vm)
 static int gen8_preallocate_top_level_pdp(struct i915_ppgtt *ppgtt)
 {
 	struct i915_address_space *vm = &ppgtt->vm;
-	struct i915_page_directory *pdp = ppgtt->pd;
-	struct i915_page_directory *pd;
-	u64 start = 0, length = ppgtt->vm.total;
-	unsigned int pdpe;
+	struct i915_page_directory *pd = ppgtt->pd;
+	unsigned int idx;
+
+	GEM_BUG_ON(vm->top != 2);
+	GEM_BUG_ON((vm->total >> __gen8_pte_shift(2)) != GEN8_3LVL_PDPES);
+
+	for (idx = 0; idx < GEN8_3LVL_PDPES; idx++) {
+		struct i915_page_directory *pde;
 
-	gen8_for_each_pdpe(pd, pdp, start, length, pdpe) {
-		pd = alloc_pd(vm);
-		if (IS_ERR(pd))
-			return PTR_ERR(pd);
+		pde = alloc_pd(vm);
+		if (IS_ERR(pde))
+			return PTR_ERR(pde);
 
-		fill_px(pd, vm->scratch[1].encode);
-		set_pd_entry(pdp, pdpe, pd);
-		atomic_inc(px_used(pd)); /* keep pinned */
+		fill_px(pde, vm->scratch[1].encode);
+		set_pd_entry(pd, idx, pde);
+		atomic_inc(px_used(pde)); /* keep pinned */
 	}
 
 	return 0;
@@ -1597,7 +1520,6 @@ static struct i915_ppgtt *gen8_ppgtt_create(struct drm_i915_private *i915)
 	}
 
 	if (i915_vm_is_4lvl(&ppgtt->vm)) {
-		ppgtt->vm.allocate_va_range = gen8_ppgtt_alloc_4lvl;
 		ppgtt->vm.insert_entries = gen8_ppgtt_insert_4lvl;
 	} else {
 		if (intel_vgpu_active(i915)) {
@@ -1606,10 +1528,10 @@ static struct i915_ppgtt *gen8_ppgtt_create(struct drm_i915_private *i915)
 				goto err_free_pd;
 		}
 
-		ppgtt->vm.allocate_va_range = gen8_ppgtt_alloc_3lvl;
 		ppgtt->vm.insert_entries = gen8_ppgtt_insert_3lvl;
 	}
 
+	ppgtt->vm.allocate_va_range = gen8_ppgtt_alloc;
 	ppgtt->vm.clear_range = gen8_ppgtt_clear;
 
 	if (intel_vgpu_active(i915))
-- 
2.22.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 15+ messages in thread

* [PATCH 4/4] drm/i915/gtt: Tidy up ppgtt insertion for gen8
  2019-07-12 11:27 ppGTT the recursive wars Chris Wilson
                   ` (2 preceding siblings ...)
  2019-07-12 11:27 ` [PATCH 3/4] drm/i915/gtt: Recursive ppgtt alloc " Chris Wilson
@ 2019-07-12 11:27 ` Chris Wilson
  2019-07-16 15:30   ` Abdiel Janulgue
  2019-07-12 15:32 ` ✗ Fi.CI.CHECKPATCH: warning for series starting with [1/4] drm/i915/gtt: Recursive cleanup " Patchwork
                   ` (3 subsequent siblings)
  7 siblings, 1 reply; 15+ messages in thread
From: Chris Wilson @ 2019-07-12 11:27 UTC (permalink / raw)
  To: intel-gfx

Apply the new radix shift helpers to extract the multi-level indices
cleanly when inserting pte into the gtt tree.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_gem_gtt.c | 115 +++++++++++-----------------
 drivers/gpu/drm/i915/i915_gem_gtt.h |  90 ++--------------------
 2 files changed, 48 insertions(+), 157 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
index 72e0f9799a46..de78dc8c425c 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -1131,47 +1131,28 @@ static inline struct sgt_dma {
 	return (struct sgt_dma) { sg, addr, addr + sg->length };
 }
 
-struct gen8_insert_pte {
-	u16 pml4e;
-	u16 pdpe;
-	u16 pde;
-	u16 pte;
-};
-
-static __always_inline struct gen8_insert_pte gen8_insert_pte(u64 start)
-{
-	return (struct gen8_insert_pte) {
-		 gen8_pml4e_index(start),
-		 gen8_pdpe_index(start),
-		 gen8_pde_index(start),
-		 gen8_pte_index(start),
-	};
-}
-
-static __always_inline bool
+static __always_inline u64
 gen8_ppgtt_insert_pte_entries(struct i915_ppgtt *ppgtt,
 			      struct i915_page_directory *pdp,
 			      struct sgt_dma *iter,
-			      struct gen8_insert_pte *idx,
+			      u64 idx,
 			      enum i915_cache_level cache_level,
 			      u32 flags)
 {
 	struct i915_page_directory *pd;
 	const gen8_pte_t pte_encode = gen8_pte_encode(0, cache_level, flags);
 	gen8_pte_t *vaddr;
-	bool ret;
 
-	GEM_BUG_ON(idx->pdpe >= i915_pdpes_per_pdp(&ppgtt->vm));
-	pd = i915_pd_entry(pdp, idx->pdpe);
-	vaddr = kmap_atomic_px(i915_pt_entry(pd, idx->pde));
+	pd = i915_pd_entry(pdp, gen8_pd_index(idx, 2));
+	vaddr = kmap_atomic_px(i915_pt_entry(pd, gen8_pd_index(idx, 1)));
 	do {
-		vaddr[idx->pte] = pte_encode | iter->dma;
+		vaddr[gen8_pd_index(idx, 0)] = pte_encode | iter->dma;
 
 		iter->dma += I915_GTT_PAGE_SIZE;
 		if (iter->dma >= iter->max) {
 			iter->sg = __sg_next(iter->sg);
 			if (!iter->sg) {
-				ret = false;
+				idx = 0;
 				break;
 			}
 
@@ -1179,30 +1160,22 @@ gen8_ppgtt_insert_pte_entries(struct i915_ppgtt *ppgtt,
 			iter->max = iter->dma + iter->sg->length;
 		}
 
-		if (++idx->pte == GEN8_PTES) {
-			idx->pte = 0;
-
-			if (++idx->pde == I915_PDES) {
-				idx->pde = 0;
-
+		if (gen8_pd_index(++idx, 0) == 0) {
+			if (gen8_pd_index(idx, 1) == 0) {
 				/* Limited by sg length for 3lvl */
-				if (++idx->pdpe == GEN8_PML4ES_PER_PML4) {
-					idx->pdpe = 0;
-					ret = true;
+				if (gen8_pd_index(idx, 2) == 0)
 					break;
-				}
 
-				GEM_BUG_ON(idx->pdpe >= i915_pdpes_per_pdp(&ppgtt->vm));
-				pd = pdp->entry[idx->pdpe];
+				pd = pdp->entry[gen8_pd_index(idx, 2)];
 			}
 
 			kunmap_atomic(vaddr);
-			vaddr = kmap_atomic_px(i915_pt_entry(pd, idx->pde));
+			vaddr = kmap_atomic_px(i915_pt_entry(pd, gen8_pd_index(idx, 1)));
 		}
 	} while (1);
 	kunmap_atomic(vaddr);
 
-	return ret;
+	return idx;
 }
 
 static void gen8_ppgtt_insert_3lvl(struct i915_address_space *vm,
@@ -1212,9 +1185,9 @@ static void gen8_ppgtt_insert_3lvl(struct i915_address_space *vm,
 {
 	struct i915_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
 	struct sgt_dma iter = sgt_dma(vma);
-	struct gen8_insert_pte idx = gen8_insert_pte(vma->node.start);
 
-	gen8_ppgtt_insert_pte_entries(ppgtt, ppgtt->pd, &iter, &idx,
+	gen8_ppgtt_insert_pte_entries(ppgtt, ppgtt->pd, &iter,
+				      vma->node.start >> GEN8_PTE_SHIFT,
 				      cache_level, flags);
 
 	vma->page_sizes.gtt = I915_GTT_PAGE_SIZE;
@@ -1231,39 +1204,38 @@ static void gen8_ppgtt_insert_huge_entries(struct i915_vma *vma,
 	dma_addr_t rem = iter->sg->length;
 
 	do {
-		struct gen8_insert_pte idx = gen8_insert_pte(start);
 		struct i915_page_directory *pdp =
-			i915_pdp_entry(pml4, idx.pml4e);
-		struct i915_page_directory *pd = i915_pd_entry(pdp, idx.pdpe);
-		unsigned int page_size;
-		bool maybe_64K = false;
+			i915_pd_entry(pml4, __gen8_pte_index(start, 3));
+		struct i915_page_directory *pd =
+			i915_pd_entry(pdp, __gen8_pte_index(start, 2));
 		gen8_pte_t encode = pte_encode;
+		unsigned int maybe_64K = -1;
+		unsigned int page_size;
 		gen8_pte_t *vaddr;
-		u16 index, max;
+		u16 index;
 
 		if (vma->page_sizes.sg & I915_GTT_PAGE_SIZE_2M &&
 		    IS_ALIGNED(iter->dma, I915_GTT_PAGE_SIZE_2M) &&
-		    rem >= I915_GTT_PAGE_SIZE_2M && !idx.pte) {
-			index = idx.pde;
-			max = I915_PDES;
-			page_size = I915_GTT_PAGE_SIZE_2M;
-
+		    rem >= I915_GTT_PAGE_SIZE_2M &&
+		    !__gen8_pte_index(start, 0)) {
+			index = __gen8_pte_index(start, 1);
 			encode |= GEN8_PDE_PS_2M;
+			page_size = I915_GTT_PAGE_SIZE_2M;
 
 			vaddr = kmap_atomic_px(pd);
 		} else {
-			struct i915_page_table *pt = i915_pt_entry(pd, idx.pde);
+			struct i915_page_table *pt =
+				i915_pt_entry(pd, __gen8_pte_index(start, 1));
 
-			index = idx.pte;
-			max = GEN8_PTES;
+			index = __gen8_pte_index(start, 0);
 			page_size = I915_GTT_PAGE_SIZE;
 
 			if (!index &&
 			    vma->page_sizes.sg & I915_GTT_PAGE_SIZE_64K &&
 			    IS_ALIGNED(iter->dma, I915_GTT_PAGE_SIZE_64K) &&
 			    (IS_ALIGNED(rem, I915_GTT_PAGE_SIZE_64K) ||
-			     rem >= (max - index) * I915_GTT_PAGE_SIZE))
-				maybe_64K = true;
+			     rem >= (I915_PDES - index) * I915_GTT_PAGE_SIZE))
+				maybe_64K = __gen8_pte_index(start, 1);
 
 			vaddr = kmap_atomic_px(pt);
 		}
@@ -1284,16 +1256,16 @@ static void gen8_ppgtt_insert_huge_entries(struct i915_vma *vma,
 				iter->dma = sg_dma_address(iter->sg);
 				iter->max = iter->dma + rem;
 
-				if (maybe_64K && index < max &&
+				if (maybe_64K != -1 && index < I915_PDES &&
 				    !(IS_ALIGNED(iter->dma, I915_GTT_PAGE_SIZE_64K) &&
 				      (IS_ALIGNED(rem, I915_GTT_PAGE_SIZE_64K) ||
-				       rem >= (max - index) * I915_GTT_PAGE_SIZE)))
-					maybe_64K = false;
+				       rem >= (I915_PDES - index) * I915_GTT_PAGE_SIZE)))
+					maybe_64K = -1;
 
 				if (unlikely(!IS_ALIGNED(iter->dma, page_size)))
 					break;
 			}
-		} while (rem >= page_size && index < max);
+		} while (rem >= page_size && index < I915_PDES);
 
 		kunmap_atomic(vaddr);
 
@@ -1303,14 +1275,14 @@ static void gen8_ppgtt_insert_huge_entries(struct i915_vma *vma,
 		 * it and have reached the end of the sg table and we have
 		 * enough padding.
 		 */
-		if (maybe_64K &&
-		    (index == max ||
+		if (maybe_64K != -1 &&
+		    (index == I915_PDES ||
 		     (i915_vm_has_scratch_64K(vma->vm) &&
 		      !iter->sg && IS_ALIGNED(vma->node.start +
 					      vma->node.size,
 					      I915_GTT_PAGE_SIZE_2M)))) {
 			vaddr = kmap_atomic_px(pd);
-			vaddr[idx.pde] |= GEN8_PDE_IPS_64K;
+			vaddr[maybe_64K] |= GEN8_PDE_IPS_64K;
 			kunmap_atomic(vaddr);
 			page_size = I915_GTT_PAGE_SIZE_64K;
 
@@ -1327,8 +1299,7 @@ static void gen8_ppgtt_insert_huge_entries(struct i915_vma *vma,
 				u16 i;
 
 				encode = vma->vm->scratch[0].encode;
-				vaddr = kmap_atomic_px(i915_pt_entry(pd,
-								     idx.pde));
+				vaddr = kmap_atomic_px(i915_pt_entry(pd, maybe_64K));
 
 				for (i = 1; i < index; i += 16)
 					memset64(vaddr + i, encode, 15);
@@ -1354,13 +1325,13 @@ static void gen8_ppgtt_insert_4lvl(struct i915_address_space *vm,
 		gen8_ppgtt_insert_huge_entries(vma, pml4, &iter, cache_level,
 					       flags);
 	} else {
-		struct gen8_insert_pte idx = gen8_insert_pte(vma->node.start);
+		u64 idx = vma->node.start >> GEN8_PTE_SHIFT;
 
-		while (gen8_ppgtt_insert_pte_entries(ppgtt,
-						     i915_pdp_entry(pml4, idx.pml4e++),
-						     &iter, &idx, cache_level,
-						     flags))
-			GEM_BUG_ON(idx.pml4e >= GEN8_PML4ES_PER_PML4);
+		while ((idx = gen8_ppgtt_insert_pte_entries(ppgtt,
+							    i915_pd_entry(pml4, gen8_pd_index(idx, 3)),
+							    &iter, idx, cache_level,
+							    flags)))
+			;
 
 		vma->page_sizes.gtt = I915_GTT_PAGE_SIZE;
 	}
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h
index aea9a7084414..6d6e1f4015b9 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.h
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.h
@@ -115,29 +115,18 @@ typedef u64 gen8_pte_t;
 #define HSW_GTT_ADDR_ENCODE(addr)	((addr) | (((addr) >> 28) & 0x7f0))
 #define HSW_PTE_ADDR_ENCODE(addr)	HSW_GTT_ADDR_ENCODE(addr)
 
-/* GEN8 32b style address is defined as a 3 level page table:
+/*
+ * GEN8 32b style address is defined as a 3 level page table:
  * 31:30 | 29:21 | 20:12 |  11:0
  * PDPE  |  PDE  |  PTE  | offset
  * The difference as compared to normal x86 3 level page table is the PDPEs are
  * programmed via register.
- */
-#define GEN8_3LVL_PDPES			4
-#define GEN8_PDE_SHIFT			21
-#define GEN8_PDE_MASK			0x1ff
-#define GEN8_PTE_MASK			0x1ff
-#define GEN8_PTES			I915_PTES(sizeof(gen8_pte_t))
-
-/* GEN8 48b style address is defined as a 4 level page table:
+ *
+ * GEN8 48b style address is defined as a 4 level page table:
  * 47:39 | 38:30 | 29:21 | 20:12 |  11:0
  * PML4E | PDPE  |  PDE  |  PTE  | offset
  */
-#define GEN8_PML4ES_PER_PML4		512
-#define GEN8_PML4E_SHIFT		39
-#define GEN8_PML4E_MASK			(GEN8_PML4ES_PER_PML4 - 1)
-#define GEN8_PDPE_SHIFT			30
-/* NB: GEN8_PDPE_MASK is untrue for 32b platforms, but it has no impact on 32b page
- * tables */
-#define GEN8_PDPE_MASK			0x1ff
+#define GEN8_3LVL_PDPES			4
 
 #define PPAT_UNCACHED			(_PAGE_PWT | _PAGE_PCD)
 #define PPAT_CACHED_PDE			0 /* WB LLC */
@@ -521,15 +510,6 @@ static inline u32 gen6_pde_index(u32 addr)
 	return i915_pde_index(addr, GEN6_PDE_SHIFT);
 }
 
-static inline unsigned int
-i915_pdpes_per_pdp(const struct i915_address_space *vm)
-{
-	if (i915_vm_is_4lvl(vm))
-		return GEN8_PML4ES_PER_PML4;
-
-	return GEN8_3LVL_PDPES;
-}
-
 static inline struct i915_page_table *
 i915_pt_entry(const struct i915_page_directory * const pd,
 	      const unsigned short n)
@@ -544,66 +524,6 @@ i915_pd_entry(const struct i915_page_directory * const pdp,
 	return pdp->entry[n];
 }
 
-static inline struct i915_page_directory *
-i915_pdp_entry(const struct i915_page_directory * const pml4,
-	       const unsigned short n)
-{
-	return pml4->entry[n];
-}
-
-/* Equivalent to the gen6 version, For each pde iterates over every pde
- * between from start until start + length. On gen8+ it simply iterates
- * over every page directory entry in a page directory.
- */
-#define gen8_for_each_pde(pt, pd, start, length, iter)			\
-	for (iter = gen8_pde_index(start);				\
-	     length > 0 && iter < I915_PDES &&				\
-		     (pt = i915_pt_entry(pd, iter), true);		\
-	     ({ u64 temp = ALIGN(start+1, 1 << GEN8_PDE_SHIFT);		\
-		    temp = min(temp - start, length);			\
-		    start += temp, length -= temp; }), ++iter)
-
-#define gen8_for_each_pdpe(pd, pdp, start, length, iter)		\
-	for (iter = gen8_pdpe_index(start);				\
-	     length > 0 && iter < i915_pdpes_per_pdp(vm) &&		\
-		     (pd = i915_pd_entry(pdp, iter), true);		\
-	     ({ u64 temp = ALIGN(start+1, 1 << GEN8_PDPE_SHIFT);	\
-		    temp = min(temp - start, length);			\
-		    start += temp, length -= temp; }), ++iter)
-
-#define gen8_for_each_pml4e(pdp, pml4, start, length, iter)		\
-	for (iter = gen8_pml4e_index(start);				\
-	     length > 0 && iter < GEN8_PML4ES_PER_PML4 &&		\
-		     (pdp = i915_pdp_entry(pml4, iter), true);		\
-	     ({ u64 temp = ALIGN(start+1, 1ULL << GEN8_PML4E_SHIFT);	\
-		    temp = min(temp - start, length);			\
-		    start += temp, length -= temp; }), ++iter)
-
-static inline u32 gen8_pte_index(u64 address)
-{
-	return i915_pte_index(address, GEN8_PDE_SHIFT);
-}
-
-static inline u32 gen8_pde_index(u64 address)
-{
-	return i915_pde_index(address, GEN8_PDE_SHIFT);
-}
-
-static inline u32 gen8_pdpe_index(u64 address)
-{
-	return (address >> GEN8_PDPE_SHIFT) & GEN8_PDPE_MASK;
-}
-
-static inline u32 gen8_pml4e_index(u64 address)
-{
-	return (address >> GEN8_PML4E_SHIFT) & GEN8_PML4E_MASK;
-}
-
-static inline u64 gen8_pte_count(u64 address, u64 length)
-{
-	return i915_pte_count(address, length, GEN8_PDE_SHIFT);
-}
-
 static inline dma_addr_t
 i915_page_dir_dma_addr(const struct i915_ppgtt *ppgtt, const unsigned int n)
 {
-- 
2.22.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 15+ messages in thread

* Re: [PATCH 1/4] drm/i915/gtt: Recursive cleanup for gen8
  2019-07-12 11:27 ` [PATCH 1/4] drm/i915/gtt: Recursive cleanup for gen8 Chris Wilson
@ 2019-07-12 13:03   ` Abdiel Janulgue
  0 siblings, 0 replies; 15+ messages in thread
From: Abdiel Janulgue @ 2019-07-12 13:03 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx



On 12/07/2019 14.27, Chris Wilson wrote:
> With an explicit level, we can refactor the separate cleanup functions
> as a simple recursive function. We take the opportunity to pass down the
> size of each level so that we can deal with the different sizes of
> top-level and avoid over allocating for 32/36-bit vm.
> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> ---

Reviewed-by: Abdiel Janulgue <abdiel.janulgue@linux.intel.com>

Regards,
Abdiel


>  drivers/gpu/drm/i915/i915_gem_gtt.c | 93 ++++++++++-------------------
>  drivers/gpu/drm/i915/i915_gem_gtt.h |  2 +-
>  2 files changed, 33 insertions(+), 62 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
> index 753090a7729e..305c65c08a6a 100644
> --- a/drivers/gpu/drm/i915/i915_gem_gtt.c
> +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
> @@ -713,11 +713,11 @@ static struct i915_page_table *alloc_pt(struct i915_address_space *vm)
>  	return pt;
>  }
>  
> -static struct i915_page_directory *__alloc_pd(void)
> +static struct i915_page_directory *__alloc_pd(size_t sz)
>  {
>  	struct i915_page_directory *pd;
>  
> -	pd = kzalloc(sizeof(*pd), I915_GFP_ALLOW_FAIL);
> +	pd = kzalloc(sz, I915_GFP_ALLOW_FAIL);
>  	if (unlikely(!pd))
>  		return NULL;
>  
> @@ -729,7 +729,7 @@ static struct i915_page_directory *alloc_pd(struct i915_address_space *vm)
>  {
>  	struct i915_page_directory *pd;
>  
> -	pd = __alloc_pd();
> +	pd = __alloc_pd(sizeof(*pd));
>  	if (unlikely(!pd))
>  		return ERR_PTR(-ENOMEM);
>  
> @@ -766,7 +766,7 @@ __set_pd_entry(struct i915_page_directory * const pd,
>  	       struct i915_page_dma * const to,
>  	       u64 (*encode)(const dma_addr_t, const enum i915_cache_level))
>  {
> -	GEM_BUG_ON(atomic_read(px_used(pd)) > 512);
> +	GEM_BUG_ON(atomic_read(px_used(pd)) > ARRAY_SIZE(pd->entry));
>  
>  	atomic_inc(px_used(pd));
>  	pd->entry[idx] = to;
> @@ -896,64 +896,34 @@ static inline unsigned int gen8_pt_count(u64 start, u64 end)
>  		return end - start;
>  }
>  
> -static void gen8_free_page_tables(struct i915_address_space *vm,
> -				  struct i915_page_directory *pd)
> +static void __gen8_ppgtt_cleanup(struct i915_address_space *vm,
> +				 struct i915_page_directory *pd,
> +				 int count, int lvl)
>  {
> -	int i;
> -
> -	for (i = 0; i < I915_PDES; i++) {
> -		if (pd->entry[i])
> -			free_pd(vm, pd->entry[i]);
> -	}
> -}
> -
> -static void gen8_ppgtt_cleanup_3lvl(struct i915_address_space *vm,
> -				    struct i915_page_directory *pdp)
> -{
> -	const unsigned int pdpes = i915_pdpes_per_pdp(vm);
> -	int i;
> -
> -	for (i = 0; i < pdpes; i++) {
> -		if (!pdp->entry[i])
> -			continue;
> -
> -		gen8_free_page_tables(vm, pdp->entry[i]);
> -		free_pd(vm, pdp->entry[i]);
> -	}
> -
> -	free_px(vm, pdp);
> -}
> -
> -static void gen8_ppgtt_cleanup_4lvl(struct i915_ppgtt *ppgtt)
> -{
> -	struct i915_page_directory * const pml4 = ppgtt->pd;
> -	int i;
> -
> -	for (i = 0; i < GEN8_PML4ES_PER_PML4; i++) {
> -		struct i915_page_directory *pdp = i915_pdp_entry(pml4, i);
> +	if (lvl) {
> +		void **pde = pd->entry;
>  
> -		if (!pdp)
> -			continue;
> +		do {
> +			if (!*pde)
> +				continue;
>  
> -		gen8_ppgtt_cleanup_3lvl(&ppgtt->vm, pdp);
> +			__gen8_ppgtt_cleanup(vm, *pde, I915_PDES, lvl - 1);
> +		} while (pde++, --count);
>  	}
>  
> -	free_px(&ppgtt->vm, pml4);
> +	free_px(vm, pd);
>  }
>  
>  static void gen8_ppgtt_cleanup(struct i915_address_space *vm)
>  {
> -	struct drm_i915_private *i915 = vm->i915;
>  	struct i915_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
>  
> -	if (intel_vgpu_active(i915))
> +	if (intel_vgpu_active(vm->i915))
>  		gen8_ppgtt_notify_vgt(ppgtt, false);
>  
> -	if (i915_vm_is_4lvl(vm))
> -		gen8_ppgtt_cleanup_4lvl(ppgtt);
> -	else
> -		gen8_ppgtt_cleanup_3lvl(&ppgtt->vm, ppgtt->pd);
> -
> +	__gen8_ppgtt_cleanup(vm, ppgtt->pd,
> +			     vm->total >> __gen8_pte_shift(vm->top),
> +			     vm->top);
>  	free_scratch(vm);
>  }
>  
> @@ -1505,24 +1475,18 @@ static int gen8_preallocate_top_level_pdp(struct i915_ppgtt *ppgtt)
>  	struct i915_page_directory *pdp = ppgtt->pd;
>  	struct i915_page_directory *pd;
>  	u64 start = 0, length = ppgtt->vm.total;
> -	u64 from = start;
>  	unsigned int pdpe;
>  
>  	gen8_for_each_pdpe(pd, pdp, start, length, pdpe) {
>  		pd = alloc_pd(vm);
>  		if (IS_ERR(pd))
> -			goto unwind;
> +			return PTR_ERR(pd);
>  
>  		fill_px(pd, vm->scratch[1].encode);
>  		set_pd_entry(pdp, pdpe, pd);
>  	}
>  
>  	return 0;
> -
> -unwind:
> -	gen8_ppgtt_clear_pdp(vm, pdp, from, start - from);
> -	atomic_set(px_used(pdp), 0);
> -	return -ENOMEM;
>  }
>  
>  static void ppgtt_init(struct i915_ppgtt *ppgtt, struct intel_gt *gt)
> @@ -1550,9 +1514,14 @@ gen8_alloc_top_pd(struct i915_address_space *vm)
>  
>  	GEM_BUG_ON(count > ARRAY_SIZE(pd->entry));
>  
> -	pd = alloc_pd(vm);
> -	if (IS_ERR(pd))
> -		return pd;
> +	pd = __alloc_pd(offsetof(typeof(*pd), entry[count]));
> +	if (unlikely(!pd))
> +		return ERR_PTR(-ENOMEM);
> +
> +	if (unlikely(setup_page_dma(vm, px_base(pd)))) {
> +		kfree(pd);
> +		return ERR_PTR(-ENOMEM);
> +	}
>  
>  	fill_page_dma(px_base(pd), vm->scratch[vm->top].encode, count);
>  	return pd;
> @@ -1625,7 +1594,9 @@ static struct i915_ppgtt *gen8_ppgtt_create(struct drm_i915_private *i915)
>  	return ppgtt;
>  
>  err_free_pd:
> -	free_px(&ppgtt->vm, ppgtt->pd);
> +	__gen8_ppgtt_cleanup(&ppgtt->vm, ppgtt->pd,
> +			     ppgtt->vm.total >> __gen8_pte_shift(ppgtt->vm.top),
> +			     ppgtt->vm.top);
>  err_free_scratch:
>  	free_scratch(&ppgtt->vm);
>  err_free:
> @@ -2071,7 +2042,7 @@ static struct i915_ppgtt *gen6_ppgtt_create(struct drm_i915_private *i915)
>  
>  	ppgtt->base.vm.pte_encode = ggtt->vm.pte_encode;
>  
> -	ppgtt->base.pd = __alloc_pd();
> +	ppgtt->base.pd = __alloc_pd(sizeof(*ppgtt->base.pd));
>  	if (!ppgtt->base.pd) {
>  		err = -ENOMEM;
>  		goto err_free;
> diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h
> index b30ffe333852..aea9a7084414 100644
> --- a/drivers/gpu/drm/i915/i915_gem_gtt.h
> +++ b/drivers/gpu/drm/i915/i915_gem_gtt.h
> @@ -252,7 +252,7 @@ struct i915_page_table {
>  struct i915_page_directory {
>  	struct i915_page_table pt;
>  	spinlock_t lock;
> -	void *entry[512];
> +	void *entry[I915_PDES];
>  };
>  
>  #define __px_choose_expr(x, type, expr, other) \
> 
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH 2/4] drm/i915/gtt: Recursive ppgtt clear for gen8
  2019-07-12 11:27 ` [PATCH 2/4] drm/i915/gtt: Recursive ppgtt clear " Chris Wilson
@ 2019-07-12 15:16   ` Abdiel Janulgue
  0 siblings, 0 replies; 15+ messages in thread
From: Abdiel Janulgue @ 2019-07-12 15:16 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx



On 12/07/2019 14.27, Chris Wilson wrote:
> With an explicit level, we can refactor the separate clear functions
> as a simple recursive function. The additional knowledge of the level
> allows us to spot when we can free an entire subtree at once.
> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> ---

Reviewed-by: Abdiel Janulgue <abdiel.janulgue@linux.intel.com>

>  drivers/gpu/drm/i915/Kconfig.debug  |  15 +++
>  drivers/gpu/drm/i915/i915_gem_gtt.c | 154 ++++++++++++++++------------
>  2 files changed, 105 insertions(+), 64 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/Kconfig.debug b/drivers/gpu/drm/i915/Kconfig.debug
> index 8d922bb4d953..ed8c787058a5 100644
> --- a/drivers/gpu/drm/i915/Kconfig.debug
> +++ b/drivers/gpu/drm/i915/Kconfig.debug
> @@ -94,6 +94,21 @@ config DRM_I915_TRACE_GEM
>  
>  	  If in doubt, say "N".
>  
> +config DRM_I915_TRACE_GTT
> +	bool "Insert extra ftrace output from the GTT internals"
> +	depends on DRM_I915_DEBUG_GEM
> +	select TRACING
> +	default n
> +	help
> +	  Enable additional and verbose debugging output that will spam
> +	  ordinary tests, but may be vital for post-mortem debugging when
> +	  used with /proc/sys/kernel/ftrace_dump_on_oops
> +
> +	  Recommended for driver developers only.
> +
> +	  If in doubt, say "N".
> +
> +
>  config DRM_I915_SW_FENCE_DEBUG_OBJECTS
>          bool "Enable additional driver debugging for fence objects"
>          depends on DRM_I915
> diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
> index 305c65c08a6a..7b2f3188d435 100644
> --- a/drivers/gpu/drm/i915/i915_gem_gtt.c
> +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
> @@ -46,6 +46,12 @@
>  
>  #define I915_GFP_ALLOW_FAIL (GFP_KERNEL | __GFP_RETRY_MAYFAIL | __GFP_NOWARN)
>  
> +#if IS_ENABLED(CONFIG_DRM_I915_TRACE_GTT)
> +#define DBG(...) trace_printk(__VA_ARGS__)
> +#else
> +#define DBG(...)
> +#endif
> +
>  /**
>   * DOC: Global GTT views
>   *
> @@ -796,6 +802,9 @@ release_pd_entry(struct i915_page_directory * const pd,
>  {
>  	bool free = false;
>  
> +	if (atomic_add_unless(&pt->used, -1, 1))
> +		return false;
> +
>  	spin_lock(&pd->lock);
>  	if (atomic_dec_and_test(&pt->used)) {
>  		clear_pd_entry(pd, idx, scratch);
> @@ -927,86 +936,101 @@ static void gen8_ppgtt_cleanup(struct i915_address_space *vm)
>  	free_scratch(vm);
>  }
>  
> -/* Removes entries from a single page table, releasing it if it's empty.
> - * Caller can use the return value to update higher-level entries.
> - */
> -static void gen8_ppgtt_clear_pt(const struct i915_address_space *vm,
> -				struct i915_page_table *pt,
> -				u64 start, u64 length)
> +static u64 __gen8_ppgtt_clear(struct i915_address_space * const vm,
> +			      struct i915_page_directory * const pd,
> +			      u64 start, const u64 end, int lvl)
>  {
> -	const unsigned int num_entries = gen8_pte_count(start, length);
> -	gen8_pte_t *vaddr;
> +	const struct i915_page_scratch * const scratch = &vm->scratch[lvl];
> +	unsigned int idx, len;
>  
> -	vaddr = kmap_atomic_px(pt);
> -	memset64(vaddr + gen8_pte_index(start),
> -		 vm->scratch[0].encode,
> -		 num_entries);
> -	kunmap_atomic(vaddr);
> +	len = gen8_pd_range(start, end, lvl--, &idx);
> +	DBG("%s(%p):{ lvl:%d, start:%llx, end:%llx, idx:%d, len:%d, used:%d}\n",
> +	    __func__, vm, lvl + 1, start, end,
> +	    idx, len, atomic_read(px_used(pd)));
> +	GEM_BUG_ON(!len || len >= atomic_read(px_used(pd)));
>  
> -	GEM_BUG_ON(num_entries > atomic_read(&pt->used));
> +	do {
> +		struct i915_page_table *pt = pd->entry[idx];
> +
> +		if (atomic_fetch_inc(&pt->used) >> gen8_pd_shift(1) &&
> +		    gen8_pd_contains(start, end, lvl)) {
> +			DBG("%s(%p):{ lvl:%d, idx:%d, start:%llx, end:%llx } removing pd\n",
> +			    __func__, vm, lvl + 1, idx, start, end);
> +			clear_pd_entry(pd, idx, scratch);
> +			__gen8_ppgtt_cleanup(vm, as_pd(pt), I915_PDES, lvl);
> +			start += (u64)I915_PDES << gen8_pd_shift(lvl);
> +			continue;
> +		}
>  
> -	atomic_sub(num_entries, &pt->used);
> -}
> +		if (lvl) {
> +			start = __gen8_ppgtt_clear(vm, as_pd(pt),
> +						   start, end, lvl);
> +		} else {
> +			unsigned int count;
> +			u64 *vaddr;
>  
> -static void gen8_ppgtt_clear_pd(struct i915_address_space *vm,
> -				struct i915_page_directory *pd,
> -				u64 start, u64 length)
> -{
> -	struct i915_page_table *pt;
> -	u32 pde;
> +			count = gen8_pt_count(start, end);
> +			DBG("%s(%p):{ lvl:%d, start:%llx, end:%llx, idx:%d, len:%d, used:%d} removing pte\n",
> +			    __func__, vm, lvl, start, end,
> +			    gen8_pd_index(start, 0), count,
> +			    atomic_read(&pt->used));
> +			GEM_BUG_ON(!count || count >= atomic_read(&pt->used));
>  
> -	gen8_for_each_pde(pt, pd, start, length, pde) {
> -		atomic_inc(&pt->used);
> -		gen8_ppgtt_clear_pt(vm, pt, start, length);
> -		if (release_pd_entry(pd, pde, pt, &vm->scratch[1]))
> +			vaddr = kmap_atomic_px(pt);
> +			memset64(vaddr + gen8_pd_index(start, 0),
> +				 vm->scratch[0].encode,
> +				 count);
> +			kunmap_atomic(vaddr);
> +
> +			atomic_sub(count, &pt->used);
> +			start += count;
> +		}
> +
> +		if (release_pd_entry(pd, idx, pt, scratch))
>  			free_px(vm, pt);
> -	}
> +	} while (idx++, --len);
> +
> +	return start;
>  }
>  
> -/* Removes entries from a single page dir pointer, releasing it if it's empty.
> - * Caller can use the return value to update higher-level entries
> - */
> -static void gen8_ppgtt_clear_pdp(struct i915_address_space *vm,
> -				 struct i915_page_directory * const pdp,
> -				 u64 start, u64 length)
> +static void gen8_ppgtt_clear(struct i915_address_space *vm,
> +			     u64 start, u64 length)
>  {
> -	struct i915_page_directory *pd;
> -	unsigned int pdpe;
> +	GEM_BUG_ON(!IS_ALIGNED(start, BIT_ULL(GEN8_PTE_SHIFT)));
> +	GEM_BUG_ON(!IS_ALIGNED(length, BIT_ULL(GEN8_PTE_SHIFT)));
>  
> -	gen8_for_each_pdpe(pd, pdp, start, length, pdpe) {
> -		atomic_inc(px_used(pd));
> -		gen8_ppgtt_clear_pd(vm, pd, start, length);
> -		if (release_pd_entry(pdp, pdpe, &pd->pt, &vm->scratch[2]))
> -			free_px(vm, pd);
> -	}
> +	start >>= GEN8_PTE_SHIFT;
> +	length >>= GEN8_PTE_SHIFT;
> +	GEM_BUG_ON(length == 0);
> +
> +	__gen8_ppgtt_clear(vm, i915_vm_to_ppgtt(vm)->pd,
> +			   start, start + length, vm->top);
>  }
>  
> -static void gen8_ppgtt_clear_3lvl(struct i915_address_space *vm,
> -				  u64 start, u64 length)
> +static void gen8_ppgtt_clear_pd(struct i915_address_space *vm,
> +				struct i915_page_directory *pd,
> +				u64 start, u64 length)
>  {
> -	gen8_ppgtt_clear_pdp(vm, i915_vm_to_ppgtt(vm)->pd, start, length);
> +	GEM_BUG_ON(!IS_ALIGNED(start, BIT_ULL(GEN8_PTE_SHIFT)));
> +	GEM_BUG_ON(!IS_ALIGNED(length, BIT_ULL(GEN8_PTE_SHIFT)));
> +
> +	start >>= GEN8_PTE_SHIFT;
> +	length >>= GEN8_PTE_SHIFT;
> +
> +	__gen8_ppgtt_clear(vm, pd, start, start + length, 1);
>  }
>  
> -/* Removes entries from a single pml4.
> - * This is the top-level structure in 4-level page tables used on gen8+.
> - * Empty entries are always scratch pml4e.
> - */
> -static void gen8_ppgtt_clear_4lvl(struct i915_address_space *vm,
> -				  u64 start, u64 length)
> +static void gen8_ppgtt_clear_pdp(struct i915_address_space *vm,
> +				 struct i915_page_directory * const pdp,
> +				 u64 start, u64 length)
>  {
> -	struct i915_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
> -	struct i915_page_directory * const pml4 = ppgtt->pd;
> -	struct i915_page_directory *pdp;
> -	unsigned int pml4e;
> +	GEM_BUG_ON(!IS_ALIGNED(start, BIT_ULL(GEN8_PTE_SHIFT)));
> +	GEM_BUG_ON(!IS_ALIGNED(length, BIT_ULL(GEN8_PTE_SHIFT)));
>  
> -	GEM_BUG_ON(!i915_vm_is_4lvl(vm));
> +	start >>= GEN8_PTE_SHIFT;
> +	length >>= GEN8_PTE_SHIFT;
>  
> -	gen8_for_each_pml4e(pdp, pml4, start, length, pml4e) {
> -		atomic_inc(px_used(pdp));
> -		gen8_ppgtt_clear_pdp(vm, pdp, start, length);
> -		if (release_pd_entry(pml4, pml4e, &pdp->pt, &vm->scratch[3]))
> -			free_px(vm, pdp);
> -	}
> +	__gen8_ppgtt_clear(vm, pdp, start, start + length, 2);
>  }
>  
>  static int gen8_ppgtt_alloc_pd(struct i915_address_space *vm,
> @@ -1171,7 +1195,7 @@ static int gen8_ppgtt_alloc_4lvl(struct i915_address_space *vm,
>  	if (release_pd_entry(pml4, pml4e, &pdp->pt, &vm->scratch[3]))
>  		free_px(vm, pdp);
>  unwind:
> -	gen8_ppgtt_clear_4lvl(vm, from, start - from);
> +	gen8_ppgtt_clear(vm, from, start - from);
>  out:
>  	if (alloc)
>  		free_px(vm, alloc);
> @@ -1484,6 +1508,7 @@ static int gen8_preallocate_top_level_pdp(struct i915_ppgtt *ppgtt)
>  
>  		fill_px(pd, vm->scratch[1].encode);
>  		set_pd_entry(pdp, pdpe, pd);
> +		atomic_inc(px_used(pd)); /* keep pinned */
>  	}
>  
>  	return 0;
> @@ -1524,6 +1549,7 @@ gen8_alloc_top_pd(struct i915_address_space *vm)
>  	}
>  
>  	fill_page_dma(px_base(pd), vm->scratch[vm->top].encode, count);
> +	atomic_inc(px_used(pd)); /* mark as pinned */
>  	return pd;
>  }
>  
> @@ -1573,7 +1599,6 @@ static struct i915_ppgtt *gen8_ppgtt_create(struct drm_i915_private *i915)
>  	if (i915_vm_is_4lvl(&ppgtt->vm)) {
>  		ppgtt->vm.allocate_va_range = gen8_ppgtt_alloc_4lvl;
>  		ppgtt->vm.insert_entries = gen8_ppgtt_insert_4lvl;
> -		ppgtt->vm.clear_range = gen8_ppgtt_clear_4lvl;
>  	} else {
>  		if (intel_vgpu_active(i915)) {
>  			err = gen8_preallocate_top_level_pdp(ppgtt);
> @@ -1583,9 +1608,10 @@ static struct i915_ppgtt *gen8_ppgtt_create(struct drm_i915_private *i915)
>  
>  		ppgtt->vm.allocate_va_range = gen8_ppgtt_alloc_3lvl;
>  		ppgtt->vm.insert_entries = gen8_ppgtt_insert_3lvl;
> -		ppgtt->vm.clear_range = gen8_ppgtt_clear_3lvl;
>  	}
>  
> +	ppgtt->vm.clear_range = gen8_ppgtt_clear;
> +
>  	if (intel_vgpu_active(i915))
>  		gen8_ppgtt_notify_vgt(ppgtt, true);
>  
> 
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 15+ messages in thread

* ✗ Fi.CI.CHECKPATCH: warning for series starting with [1/4] drm/i915/gtt: Recursive cleanup for gen8
  2019-07-12 11:27 ppGTT the recursive wars Chris Wilson
                   ` (3 preceding siblings ...)
  2019-07-12 11:27 ` [PATCH 4/4] drm/i915/gtt: Tidy up ppgtt insertion " Chris Wilson
@ 2019-07-12 15:32 ` Patchwork
  2019-07-12 15:34 ` ✗ Fi.CI.SPARSE: " Patchwork
                   ` (2 subsequent siblings)
  7 siblings, 0 replies; 15+ messages in thread
From: Patchwork @ 2019-07-12 15:32 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

== Series Details ==

Series: series starting with [1/4] drm/i915/gtt: Recursive cleanup for gen8
URL   : https://patchwork.freedesktop.org/series/63637/
State : warning

== Summary ==

$ dim checkpatch origin/drm-tip
fa13d0752efc drm/i915/gtt: Recursive cleanup for gen8
66b3e26989e5 drm/i915/gtt: Recursive ppgtt clear for gen8
a94da0fe46bd drm/i915/gtt: Recursive ppgtt alloc for gen8
d383b7fa3711 drm/i915/gtt: Tidy up ppgtt insertion for gen8
-:235: WARNING:LONG_LINE: line over 100 characters
#235: FILE: drivers/gpu/drm/i915/i915_gem_gtt.c:1331:
+							    i915_pd_entry(pml4, gen8_pd_index(idx, 3)),

total: 0 errors, 1 warnings, 0 checks, 334 lines checked

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 15+ messages in thread

* ✗ Fi.CI.SPARSE: warning for series starting with [1/4] drm/i915/gtt: Recursive cleanup for gen8
  2019-07-12 11:27 ppGTT the recursive wars Chris Wilson
                   ` (4 preceding siblings ...)
  2019-07-12 15:32 ` ✗ Fi.CI.CHECKPATCH: warning for series starting with [1/4] drm/i915/gtt: Recursive cleanup " Patchwork
@ 2019-07-12 15:34 ` Patchwork
  2019-07-12 15:52 ` ✓ Fi.CI.BAT: success " Patchwork
  2019-07-14  5:07 ` ✓ Fi.CI.IGT: " Patchwork
  7 siblings, 0 replies; 15+ messages in thread
From: Patchwork @ 2019-07-12 15:34 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

== Series Details ==

Series: series starting with [1/4] drm/i915/gtt: Recursive cleanup for gen8
URL   : https://patchwork.freedesktop.org/series/63637/
State : warning

== Summary ==

$ dim sparse origin/drm-tip
Sparse version: v0.5.2
Commit: drm/i915/gtt: Recursive cleanup for gen8
-drivers/gpu/drm/i915/i915_gem_gtt.c:976:9: warning: expression using sizeof(void)
-drivers/gpu/drm/i915/i915_gem_gtt.c:976:9: warning: expression using sizeof(void)
-O:drivers/gpu/drm/i915/i915_gem_gtt.c:1511:9: warning: expression using sizeof(void)
-O:drivers/gpu/drm/i915/i915_gem_gtt.c:1511:9: warning: expression using sizeof(void)
+drivers/gpu/drm/i915/i915_gem_gtt.c:1480:9: warning: expression using sizeof(void)
+drivers/gpu/drm/i915/i915_gem_gtt.c:1480:9: warning: expression using sizeof(void)
-drivers/gpu/drm/i915/i915_gem_gtt.c:348:14: warning: expression using sizeof(void)
-drivers/gpu/drm/i915/i915_gem_gtt.c:348:14: warning: expression using sizeof(void)
+drivers/gpu/drm/i915/i915_gem_gtt.c:348:14: warning: expression using sizeof(void)
+drivers/gpu/drm/i915/i915_gem_gtt.c:348:14: warning: expression using sizeof(void)
+drivers/gpu/drm/i915/i915_gem_gtt.c:976:9: warning: expression using sizeof(void)
+drivers/gpu/drm/i915/i915_gem_gtt.c:976:9: warning: expression using sizeof(void)

Commit: drm/i915/gtt: Recursive ppgtt clear for gen8
-O:drivers/gpu/drm/i915/i915_gem_gtt.c:1004:9: warning: expression using sizeof(void)
-O:drivers/gpu/drm/i915/i915_gem_gtt.c:1004:9: warning: expression using sizeof(void)
+
-O:drivers/gpu/drm/i915/i915_gem_gtt.c:958:9: warning: expression using sizeof(void)
-O:drivers/gpu/drm/i915/i915_gem_gtt.c:958:9: warning: expression using sizeof(void)
-O:drivers/gpu/drm/i915/i915_gem_gtt.c:976:9: warning: expression using sizeof(void)
-O:drivers/gpu/drm/i915/i915_gem_gtt.c:976:9: warning: expression using sizeof(void)
+Error in reading or end of file.

Commit: drm/i915/gtt: Recursive ppgtt alloc for gen8
-O:drivers/gpu/drm/i915/i915_gem_gtt.c:1046:9: warning: expression using sizeof(void)
-O:drivers/gpu/drm/i915/i915_gem_gtt.c:1046:9: warning: expression using sizeof(void)
-O:drivers/gpu/drm/i915/i915_gem_gtt.c:1095:9: warning: expression using sizeof(void)
-O:drivers/gpu/drm/i915/i915_gem_gtt.c:1095:9: warning: expression using sizeof(void)
-O:drivers/gpu/drm/i915/i915_gem_gtt.c:1159:9: warning: expression using sizeof(void)
-O:drivers/gpu/drm/i915/i915_gem_gtt.c:1159:9: warning: expression using sizeof(void)
-O:drivers/gpu/drm/i915/i915_gem_gtt.c:1504:9: warning: expression using sizeof(void)
-O:drivers/gpu/drm/i915/i915_gem_gtt.c:1504:9: warning: expression using sizeof(void)
-drivers/gpu/drm/i915/i915_gem_gtt.c:354:14: warning: expression using sizeof(void)
-drivers/gpu/drm/i915/i915_gem_gtt.c:354:14: warning: expression using sizeof(void)
+drivers/gpu/drm/i915/i915_gem_gtt.c:354:14: warning: expression using sizeof(void)
+drivers/gpu/drm/i915/i915_gem_gtt.c:354:14: warning: expression using sizeof(void)

Commit: drm/i915/gtt: Tidy up ppgtt insertion for gen8
Okay!

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 15+ messages in thread

* ✓ Fi.CI.BAT: success for series starting with [1/4] drm/i915/gtt: Recursive cleanup for gen8
  2019-07-12 11:27 ppGTT the recursive wars Chris Wilson
                   ` (5 preceding siblings ...)
  2019-07-12 15:34 ` ✗ Fi.CI.SPARSE: " Patchwork
@ 2019-07-12 15:52 ` Patchwork
  2019-07-14  5:07 ` ✓ Fi.CI.IGT: " Patchwork
  7 siblings, 0 replies; 15+ messages in thread
From: Patchwork @ 2019-07-12 15:52 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

== Series Details ==

Series: series starting with [1/4] drm/i915/gtt: Recursive cleanup for gen8
URL   : https://patchwork.freedesktop.org/series/63637/
State : success

== Summary ==

CI Bug Log - changes from CI_DRM_6474 -> Patchwork_13644
====================================================

Summary
-------

  **SUCCESS**

  No regressions found.

  External URL: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_13644/

Known issues
------------

  Here are the changes found in Patchwork_13644 that come from known issues:

### IGT changes ###

#### Issues hit ####

  * igt@gem_exec_suspend@basic-s3:
    - fi-blb-e6850:       [PASS][1] -> [INCOMPLETE][2] ([fdo#107718])
   [1]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6474/fi-blb-e6850/igt@gem_exec_suspend@basic-s3.html
   [2]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_13644/fi-blb-e6850/igt@gem_exec_suspend@basic-s3.html

  * igt@i915_module_load@reload-no-display:
    - fi-icl-u3:          [PASS][3] -> [DMESG-WARN][4] ([fdo#107724])
   [3]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6474/fi-icl-u3/igt@i915_module_load@reload-no-display.html
   [4]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_13644/fi-icl-u3/igt@i915_module_load@reload-no-display.html

  * igt@kms_chamelium@hdmi-hpd-fast:
    - fi-kbl-7567u:       [PASS][5] -> [FAIL][6] ([fdo#109485])
   [5]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6474/fi-kbl-7567u/igt@kms_chamelium@hdmi-hpd-fast.html
   [6]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_13644/fi-kbl-7567u/igt@kms_chamelium@hdmi-hpd-fast.html

  
#### Possible fixes ####

  * igt@gem_exec_suspend@basic-s3:
    - fi-apl-guc:         [DMESG-WARN][7] ([fdo#108566]) -> [PASS][8]
   [7]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6474/fi-apl-guc/igt@gem_exec_suspend@basic-s3.html
   [8]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_13644/fi-apl-guc/igt@gem_exec_suspend@basic-s3.html

  
  [fdo#107718]: https://bugs.freedesktop.org/show_bug.cgi?id=107718
  [fdo#107724]: https://bugs.freedesktop.org/show_bug.cgi?id=107724
  [fdo#108566]: https://bugs.freedesktop.org/show_bug.cgi?id=108566
  [fdo#109485]: https://bugs.freedesktop.org/show_bug.cgi?id=109485


Participating hosts (54 -> 44)
------------------------------

  Additional (1): fi-pnv-d510 
  Missing    (11): fi-kbl-soraka fi-icl-u4 fi-ilk-m540 fi-hsw-4200u fi-byt-squawks fi-bsw-cyan fi-gdg-551 fi-icl-y fi-byt-clapper fi-bdw-samus fi-kbl-r 


Build changes
-------------

  * Linux: CI_DRM_6474 -> Patchwork_13644

  CI_DRM_6474: 0140ee44b0e52427496bcc7dddba0f0d7ee15f56 @ git://anongit.freedesktop.org/gfx-ci/linux
  IGT_5095: 549e1cdc064c0491a9c4509f42d826ae0e752a07 @ git://anongit.freedesktop.org/xorg/app/intel-gpu-tools
  Patchwork_13644: d383b7fa3711bedd238dfb35330ea3629271ac96 @ git://anongit.freedesktop.org/gfx-ci/linux


== Linux commits ==

d383b7fa3711 drm/i915/gtt: Tidy up ppgtt insertion for gen8
a94da0fe46bd drm/i915/gtt: Recursive ppgtt alloc for gen8
66b3e26989e5 drm/i915/gtt: Recursive ppgtt clear for gen8
fa13d0752efc drm/i915/gtt: Recursive cleanup for gen8

== Logs ==

For more details see: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_13644/
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 15+ messages in thread

* ✓ Fi.CI.IGT: success for series starting with [1/4] drm/i915/gtt: Recursive cleanup for gen8
  2019-07-12 11:27 ppGTT the recursive wars Chris Wilson
                   ` (6 preceding siblings ...)
  2019-07-12 15:52 ` ✓ Fi.CI.BAT: success " Patchwork
@ 2019-07-14  5:07 ` Patchwork
  7 siblings, 0 replies; 15+ messages in thread
From: Patchwork @ 2019-07-14  5:07 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

== Series Details ==

Series: series starting with [1/4] drm/i915/gtt: Recursive cleanup for gen8
URL   : https://patchwork.freedesktop.org/series/63637/
State : success

== Summary ==

CI Bug Log - changes from CI_DRM_6474_full -> Patchwork_13644_full
====================================================

Summary
-------

  **SUCCESS**

  No regressions found.

  

Known issues
------------

  Here are the changes found in Patchwork_13644_full that come from known issues:

### IGT changes ###

#### Issues hit ####

  * igt@gem_ctx_isolation@rcs0-s3:
    - shard-apl:          [PASS][1] -> [DMESG-WARN][2] ([fdo#108566]) +3 similar issues
   [1]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6474/shard-apl1/igt@gem_ctx_isolation@rcs0-s3.html
   [2]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_13644/shard-apl3/igt@gem_ctx_isolation@rcs0-s3.html

  * igt@kms_flip@flip-vs-expired-vblank-interruptible:
    - shard-skl:          [PASS][3] -> [FAIL][4] ([fdo#105363])
   [3]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6474/shard-skl7/igt@kms_flip@flip-vs-expired-vblank-interruptible.html
   [4]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_13644/shard-skl9/igt@kms_flip@flip-vs-expired-vblank-interruptible.html

  * igt@kms_frontbuffer_tracking@fbcpsr-1p-primscrn-cur-indfb-draw-pwrite:
    - shard-iclb:         [PASS][5] -> [FAIL][6] ([fdo#103167]) +4 similar issues
   [5]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6474/shard-iclb4/igt@kms_frontbuffer_tracking@fbcpsr-1p-primscrn-cur-indfb-draw-pwrite.html
   [6]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_13644/shard-iclb2/igt@kms_frontbuffer_tracking@fbcpsr-1p-primscrn-cur-indfb-draw-pwrite.html

  * igt@kms_psr@psr2_cursor_mmap_cpu:
    - shard-iclb:         [PASS][7] -> [SKIP][8] ([fdo#109441]) +2 similar issues
   [7]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6474/shard-iclb2/igt@kms_psr@psr2_cursor_mmap_cpu.html
   [8]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_13644/shard-iclb6/igt@kms_psr@psr2_cursor_mmap_cpu.html

  * igt@kms_setmode@basic:
    - shard-apl:          [PASS][9] -> [FAIL][10] ([fdo#99912])
   [9]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6474/shard-apl1/igt@kms_setmode@basic.html
   [10]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_13644/shard-apl6/igt@kms_setmode@basic.html

  * igt@perf@polling:
    - shard-skl:          [PASS][11] -> [FAIL][12] ([fdo#110728])
   [11]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6474/shard-skl8/igt@perf@polling.html
   [12]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_13644/shard-skl1/igt@perf@polling.html

  
#### Possible fixes ####

  * igt@gem_exec_parallel@basic:
    - shard-snb:          [INCOMPLETE][13] ([fdo#105411]) -> [PASS][14]
   [13]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6474/shard-snb6/igt@gem_exec_parallel@basic.html
   [14]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_13644/shard-snb5/igt@gem_exec_parallel@basic.html

  * igt@i915_pm_rc6_residency@rc6-accuracy:
    - shard-kbl:          [SKIP][15] ([fdo#109271]) -> [PASS][16]
   [15]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6474/shard-kbl2/igt@i915_pm_rc6_residency@rc6-accuracy.html
   [16]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_13644/shard-kbl3/igt@i915_pm_rc6_residency@rc6-accuracy.html

  * igt@kms_flip@2x-modeset-vs-vblank-race-interruptible:
    - shard-glk:          [FAIL][17] ([fdo#103060]) -> [PASS][18]
   [17]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6474/shard-glk3/igt@kms_flip@2x-modeset-vs-vblank-race-interruptible.html
   [18]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_13644/shard-glk9/igt@kms_flip@2x-modeset-vs-vblank-race-interruptible.html

  * igt@kms_flip@flip-vs-expired-vblank:
    - shard-skl:          [FAIL][19] ([fdo#105363]) -> [PASS][20]
   [19]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6474/shard-skl6/igt@kms_flip@flip-vs-expired-vblank.html
   [20]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_13644/shard-skl5/igt@kms_flip@flip-vs-expired-vblank.html

  * igt@kms_flip@flip-vs-suspend-interruptible:
    - shard-hsw:          [INCOMPLETE][21] ([fdo#103540]) -> [PASS][22]
   [21]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6474/shard-hsw8/igt@kms_flip@flip-vs-suspend-interruptible.html
   [22]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_13644/shard-hsw1/igt@kms_flip@flip-vs-suspend-interruptible.html

  * igt@kms_frontbuffer_tracking@fbc-1p-pri-indfb-multidraw:
    - shard-iclb:         [FAIL][23] ([fdo#103167]) -> [PASS][24] +1 similar issue
   [23]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6474/shard-iclb2/igt@kms_frontbuffer_tracking@fbc-1p-pri-indfb-multidraw.html
   [24]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_13644/shard-iclb4/igt@kms_frontbuffer_tracking@fbc-1p-pri-indfb-multidraw.html

  * igt@kms_frontbuffer_tracking@fbc-1p-rte:
    - shard-iclb:         [FAIL][25] ([fdo#103167] / [fdo#110378]) -> [PASS][26]
   [25]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6474/shard-iclb5/igt@kms_frontbuffer_tracking@fbc-1p-rte.html
   [26]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_13644/shard-iclb7/igt@kms_frontbuffer_tracking@fbc-1p-rte.html

  * igt@kms_pipe_crc_basic@suspend-read-crc-pipe-c:
    - shard-skl:          [INCOMPLETE][27] ([fdo#104108]) -> [PASS][28]
   [27]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6474/shard-skl7/igt@kms_pipe_crc_basic@suspend-read-crc-pipe-c.html
   [28]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_13644/shard-skl6/igt@kms_pipe_crc_basic@suspend-read-crc-pipe-c.html

  * igt@kms_plane@plane-panning-bottom-right-suspend-pipe-c-planes:
    - shard-apl:          [DMESG-WARN][29] ([fdo#108566]) -> [PASS][30] +2 similar issues
   [29]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6474/shard-apl6/igt@kms_plane@plane-panning-bottom-right-suspend-pipe-c-planes.html
   [30]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_13644/shard-apl5/igt@kms_plane@plane-panning-bottom-right-suspend-pipe-c-planes.html

  * igt@kms_plane_alpha_blend@pipe-c-constant-alpha-min:
    - shard-skl:          [FAIL][31] ([fdo#108145]) -> [PASS][32]
   [31]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6474/shard-skl4/igt@kms_plane_alpha_blend@pipe-c-constant-alpha-min.html
   [32]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_13644/shard-skl7/igt@kms_plane_alpha_blend@pipe-c-constant-alpha-min.html

  * igt@kms_plane_alpha_blend@pipe-c-coverage-7efc:
    - shard-skl:          [FAIL][33] ([fdo#108145] / [fdo#110403]) -> [PASS][34]
   [33]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6474/shard-skl5/igt@kms_plane_alpha_blend@pipe-c-coverage-7efc.html
   [34]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_13644/shard-skl6/igt@kms_plane_alpha_blend@pipe-c-coverage-7efc.html

  * igt@kms_plane_cursor@pipe-a-primary-size-256:
    - shard-iclb:         [INCOMPLETE][35] ([fdo#107713]) -> [PASS][36]
   [35]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6474/shard-iclb1/igt@kms_plane_cursor@pipe-a-primary-size-256.html
   [36]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_13644/shard-iclb5/igt@kms_plane_cursor@pipe-a-primary-size-256.html

  * igt@kms_plane_lowres@pipe-a-tiling-x:
    - shard-iclb:         [FAIL][37] ([fdo#103166]) -> [PASS][38]
   [37]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6474/shard-iclb4/igt@kms_plane_lowres@pipe-a-tiling-x.html
   [38]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_13644/shard-iclb2/igt@kms_plane_lowres@pipe-a-tiling-x.html

  * igt@kms_psr@psr2_cursor_blt:
    - shard-iclb:         [SKIP][39] ([fdo#109441]) -> [PASS][40] +1 similar issue
   [39]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6474/shard-iclb8/igt@kms_psr@psr2_cursor_blt.html
   [40]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_13644/shard-iclb2/igt@kms_psr@psr2_cursor_blt.html

  
  [fdo#103060]: https://bugs.freedesktop.org/show_bug.cgi?id=103060
  [fdo#103166]: https://bugs.freedesktop.org/show_bug.cgi?id=103166
  [fdo#103167]: https://bugs.freedesktop.org/show_bug.cgi?id=103167
  [fdo#103540]: https://bugs.freedesktop.org/show_bug.cgi?id=103540
  [fdo#104108]: https://bugs.freedesktop.org/show_bug.cgi?id=104108
  [fdo#105363]: https://bugs.freedesktop.org/show_bug.cgi?id=105363
  [fdo#105411]: https://bugs.freedesktop.org/show_bug.cgi?id=105411
  [fdo#107713]: https://bugs.freedesktop.org/show_bug.cgi?id=107713
  [fdo#108145]: https://bugs.freedesktop.org/show_bug.cgi?id=108145
  [fdo#108566]: https://bugs.freedesktop.org/show_bug.cgi?id=108566
  [fdo#109271]: https://bugs.freedesktop.org/show_bug.cgi?id=109271
  [fdo#109441]: https://bugs.freedesktop.org/show_bug.cgi?id=109441
  [fdo#110378]: https://bugs.freedesktop.org/show_bug.cgi?id=110378
  [fdo#110403]: https://bugs.freedesktop.org/show_bug.cgi?id=110403
  [fdo#110728]: https://bugs.freedesktop.org/show_bug.cgi?id=110728
  [fdo#99912]: https://bugs.freedesktop.org/show_bug.cgi?id=99912


Participating hosts (10 -> 10)
------------------------------

  No changes in participating hosts


Build changes
-------------

  * Linux: CI_DRM_6474 -> Patchwork_13644

  CI_DRM_6474: 0140ee44b0e52427496bcc7dddba0f0d7ee15f56 @ git://anongit.freedesktop.org/gfx-ci/linux
  IGT_5095: 549e1cdc064c0491a9c4509f42d826ae0e752a07 @ git://anongit.freedesktop.org/xorg/app/intel-gpu-tools
  Patchwork_13644: d383b7fa3711bedd238dfb35330ea3629271ac96 @ git://anongit.freedesktop.org/gfx-ci/linux
  piglit_4509: fdc5a4ca11124ab8413c7988896eec4c97336694 @ git://anongit.freedesktop.org/piglit

== Logs ==

For more details see: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_13644/
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH 3/4] drm/i915/gtt: Recursive ppgtt alloc for gen8
  2019-07-12 11:27 ` [PATCH 3/4] drm/i915/gtt: Recursive ppgtt alloc " Chris Wilson
@ 2019-07-16 15:28   ` Abdiel Janulgue
  2019-07-16 15:28   ` Abdiel Janulgue
  1 sibling, 0 replies; 15+ messages in thread
From: Abdiel Janulgue @ 2019-07-16 15:28 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx



On 12/07/2019 14.27, Chris Wilson wrote:
> Refactor the separate allocation routines into a single recursive
> function.
> 

Reviewed-by: Abdiel Janulgue <abdiel.janulgue@linux.intel.com>

> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> ---
>  drivers/gpu/drm/i915/i915_gem_gtt.c | 272 ++++++++++------------------
>  1 file changed, 97 insertions(+), 175 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
> index 7b2f3188d435..72e0f9799a46 100644
> --- a/drivers/gpu/drm/i915/i915_gem_gtt.c
> +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
> @@ -1007,199 +1007,119 @@ static void gen8_ppgtt_clear(struct i915_address_space *vm,
>  			   start, start + length, vm->top);
>  }
>  
> -static void gen8_ppgtt_clear_pd(struct i915_address_space *vm,
> -				struct i915_page_directory *pd,
> -				u64 start, u64 length)
> -{
> -	GEM_BUG_ON(!IS_ALIGNED(start, BIT_ULL(GEN8_PTE_SHIFT)));
> -	GEM_BUG_ON(!IS_ALIGNED(length, BIT_ULL(GEN8_PTE_SHIFT)));
> -
> -	start >>= GEN8_PTE_SHIFT;
> -	length >>= GEN8_PTE_SHIFT;
> -
> -	__gen8_ppgtt_clear(vm, pd, start, start + length, 1);
> -}
> -
> -static void gen8_ppgtt_clear_pdp(struct i915_address_space *vm,
> -				 struct i915_page_directory * const pdp,
> -				 u64 start, u64 length)
> -{
> -	GEM_BUG_ON(!IS_ALIGNED(start, BIT_ULL(GEN8_PTE_SHIFT)));
> -	GEM_BUG_ON(!IS_ALIGNED(length, BIT_ULL(GEN8_PTE_SHIFT)));
> -
> -	start >>= GEN8_PTE_SHIFT;
> -	length >>= GEN8_PTE_SHIFT;
> -
> -	__gen8_ppgtt_clear(vm, pdp, start, start + length, 2);
> -}
> -
> -static int gen8_ppgtt_alloc_pd(struct i915_address_space *vm,
> -			       struct i915_page_directory *pd,
> -			       u64 start, u64 length)
> +static int __gen8_ppgtt_alloc(struct i915_address_space * const vm,
> +			      struct i915_page_directory * const pd,
> +			      u64 * const start, u64 end, int lvl)
>  {
> -	struct i915_page_table *pt, *alloc = NULL;
> -	u64 from = start;
> -	unsigned int pde;
> +	const struct i915_page_scratch * const scratch = &vm->scratch[lvl];
> +	struct i915_page_table *alloc = NULL;
> +	unsigned int idx, len;
>  	int ret = 0;
>  
> +	len = gen8_pd_range(*start, end, lvl--, &idx);
> +	DBG("%s(%p):{lvl:%d, start:%llx, end:%llx, idx:%d, len:%d, used:%d}\n",
> +	    __func__, vm, lvl + 1, *start, end,
> +	    idx, len, atomic_read(px_used(pd)));
> +	GEM_BUG_ON(!len || (idx + len - 1) >> gen8_pd_shift(1));
> +
>  	spin_lock(&pd->lock);
> -	gen8_for_each_pde(pt, pd, start, length, pde) {
> -		const int count = gen8_pte_count(start, length);
> +	GEM_BUG_ON(!atomic_read(px_used(pd))); /* Must be pinned! */
> +	do {
> +		struct i915_page_table *pt = pd->entry[idx];
>  
>  		if (!pt) {
>  			spin_unlock(&pd->lock);
>  
> -			pt = fetch_and_zero(&alloc);
> -			if (!pt)
> -				pt = alloc_pt(vm);
> -			if (IS_ERR(pt)) {
> -				ret = PTR_ERR(pt);
> -				goto unwind;
> -			}
> +			DBG("%s(%p):{ lvl:%d, idx:%d } allocating new tree\n",
> +			    __func__, vm, lvl + 1, idx);
>  
> -			if (count < GEN8_PTES || intel_vgpu_active(vm->i915))
> -				fill_px(pt, vm->scratch[0].encode);
> +			pt = fetch_and_zero(&alloc);
> +			if (lvl) {
> +				if (!pt) {
> +					pt = &alloc_pd(vm)->pt;
> +					if (IS_ERR(pt)) {
> +						ret = PTR_ERR(pt);
> +						goto out;
> +					}
> +				}
>  
> -			spin_lock(&pd->lock);
> -			if (!pd->entry[pde]) {
> -				set_pd_entry(pd, pde, pt);
> +				fill_px(pt, vm->scratch[lvl].encode);
>  			} else {
> -				alloc = pt;
> -				pt = pd->entry[pde];
> -			}
> -		}
> -
> -		atomic_add(count, &pt->used);
> -	}
> -	spin_unlock(&pd->lock);
> -	goto out;
> -
> -unwind:
> -	gen8_ppgtt_clear_pd(vm, pd, from, start - from);
> -out:
> -	if (alloc)
> -		free_px(vm, alloc);
> -	return ret;
> -}
> -
> -static int gen8_ppgtt_alloc_pdp(struct i915_address_space *vm,
> -				struct i915_page_directory *pdp,
> -				u64 start, u64 length)
> -{
> -	struct i915_page_directory *pd, *alloc = NULL;
> -	u64 from = start;
> -	unsigned int pdpe;
> -	int ret = 0;
> +				if (!pt) {
> +					pt = alloc_pt(vm);
> +					if (IS_ERR(pt)) {
> +						ret = PTR_ERR(pt);
> +						goto out;
> +					}
> +				}
>  
> -	spin_lock(&pdp->lock);
> -	gen8_for_each_pdpe(pd, pdp, start, length, pdpe) {
> -		if (!pd) {
> -			spin_unlock(&pdp->lock);
> -
> -			pd = fetch_and_zero(&alloc);
> -			if (!pd)
> -				pd = alloc_pd(vm);
> -			if (IS_ERR(pd)) {
> -				ret = PTR_ERR(pd);
> -				goto unwind;
> +				if (intel_vgpu_active(vm->i915) ||
> +				    gen8_pt_count(*start, end) < I915_PDES)
> +					fill_px(pt, vm->scratch[lvl].encode);
>  			}
>  
> -			fill_px(pd, vm->scratch[1].encode);
> +			spin_lock(&pd->lock);
> +			if (likely(!pd->entry[idx]))
> +				set_pd_entry(pd, idx, pt);
> +			else
> +				alloc = pt, pt = pd->entry[idx];
> +		}
>  
> -			spin_lock(&pdp->lock);
> -			if (!pdp->entry[pdpe]) {
> -				set_pd_entry(pdp, pdpe, pd);
> -			} else {
> -				alloc = pd;
> -				pd = pdp->entry[pdpe];
> +		if (lvl) {
> +			atomic_inc(&pt->used);
> +			spin_unlock(&pd->lock);
> +
> +			ret = __gen8_ppgtt_alloc(vm, as_pd(pt),
> +						 start, end, lvl);
> +			if (unlikely(ret)) {
> +				if (release_pd_entry(pd, idx, pt, scratch))
> +					free_px(vm, pt);
> +				goto out;
>  			}
> -		}
> -		atomic_inc(px_used(pd));
> -		spin_unlock(&pdp->lock);
>  
> -		ret = gen8_ppgtt_alloc_pd(vm, pd, start, length);
> -		if (unlikely(ret))
> -			goto unwind_pd;
> +			spin_lock(&pd->lock);
> +			atomic_dec(&pt->used);
> +			GEM_BUG_ON(!atomic_read(&pt->used));
> +		} else {
> +			unsigned int count = gen8_pt_count(*start, end);
>  
> -		spin_lock(&pdp->lock);
> -		atomic_dec(px_used(pd));
> -	}
> -	spin_unlock(&pdp->lock);
> -	goto out;
> +			DBG("%s(%p):{lvl:%d, start:%llx, end:%llx, idx:%d, len:%d, used:%d} inserting pte\n",
> +			    __func__, vm, lvl, *start, end,
> +			    gen8_pd_index(*start, 0), count,
> +			    atomic_read(&pt->used));
>  
> -unwind_pd:
> -	if (release_pd_entry(pdp, pdpe, &pd->pt, &vm->scratch[2]))
> -		free_px(vm, pd);
> -unwind:
> -	gen8_ppgtt_clear_pdp(vm, pdp, from, start - from);
> +			atomic_add(count, &pt->used);
> +			GEM_BUG_ON(atomic_read(&pt->used) > I915_PDES);
> +			*start += count;
> +		}
> +	} while (idx++, --len);
> +	spin_unlock(&pd->lock);
>  out:
>  	if (alloc)
>  		free_px(vm, alloc);
>  	return ret;
>  }
>  
> -static int gen8_ppgtt_alloc_3lvl(struct i915_address_space *vm,
> -				 u64 start, u64 length)
> -{
> -	return gen8_ppgtt_alloc_pdp(vm,
> -				    i915_vm_to_ppgtt(vm)->pd, start, length);
> -}
> -
> -static int gen8_ppgtt_alloc_4lvl(struct i915_address_space *vm,
> -				 u64 start, u64 length)
> +static int gen8_ppgtt_alloc(struct i915_address_space *vm,
> +			    u64 start, u64 length)
>  {
> -	struct i915_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
> -	struct i915_page_directory * const pml4 = ppgtt->pd;
> -	struct i915_page_directory *pdp, *alloc = NULL;
>  	u64 from = start;
> -	int ret = 0;
> -	u32 pml4e;
> -
> -	spin_lock(&pml4->lock);
> -	gen8_for_each_pml4e(pdp, pml4, start, length, pml4e) {
> -		if (!pdp) {
> -			spin_unlock(&pml4->lock);
> -
> -			pdp = fetch_and_zero(&alloc);
> -			if (!pdp)
> -				pdp = alloc_pd(vm);
> -			if (IS_ERR(pdp)) {
> -				ret = PTR_ERR(pdp);
> -				goto unwind;
> -			}
> -
> -			fill_px(pdp, vm->scratch[2].encode);
> +	int err;
>  
> -			spin_lock(&pml4->lock);
> -			if (!pml4->entry[pml4e]) {
> -				set_pd_entry(pml4, pml4e, pdp);
> -			} else {
> -				alloc = pdp;
> -				pdp = pml4->entry[pml4e];
> -			}
> -		}
> -		atomic_inc(px_used(pdp));
> -		spin_unlock(&pml4->lock);
> +	GEM_BUG_ON(!IS_ALIGNED(start, BIT_ULL(GEN8_PTE_SHIFT)));
> +	GEM_BUG_ON(!IS_ALIGNED(length, BIT_ULL(GEN8_PTE_SHIFT)));
>  
> -		ret = gen8_ppgtt_alloc_pdp(vm, pdp, start, length);
> -		if (unlikely(ret))
> -			goto unwind_pdp;
> +	start >>= GEN8_PTE_SHIFT;
> +	length >>= GEN8_PTE_SHIFT;
> +	GEM_BUG_ON(length == 0);
>  
> -		spin_lock(&pml4->lock);
> -		atomic_dec(px_used(pdp));
> -	}
> -	spin_unlock(&pml4->lock);
> -	goto out;
> +	err = __gen8_ppgtt_alloc(vm, i915_vm_to_ppgtt(vm)->pd,
> +				 &start, start + length, vm->top);
> +	if (unlikely(err))
> +		__gen8_ppgtt_clear(vm, i915_vm_to_ppgtt(vm)->pd,
> +				   from, start, vm->top);
>  
> -unwind_pdp:
> -	if (release_pd_entry(pml4, pml4e, &pdp->pt, &vm->scratch[3]))
> -		free_px(vm, pdp);
> -unwind:
> -	gen8_ppgtt_clear(vm, from, start - from);
> -out:
> -	if (alloc)
> -		free_px(vm, alloc);
> -	return ret;
> +	return err;
>  }
>  
>  static inline struct sgt_dma {
> @@ -1496,19 +1416,22 @@ static int gen8_init_scratch(struct i915_address_space *vm)
>  static int gen8_preallocate_top_level_pdp(struct i915_ppgtt *ppgtt)
>  {
>  	struct i915_address_space *vm = &ppgtt->vm;
> -	struct i915_page_directory *pdp = ppgtt->pd;
> -	struct i915_page_directory *pd;
> -	u64 start = 0, length = ppgtt->vm.total;
> -	unsigned int pdpe;
> +	struct i915_page_directory *pd = ppgtt->pd;
> +	unsigned int idx;
> +
> +	GEM_BUG_ON(vm->top != 2);
> +	GEM_BUG_ON((vm->total >> __gen8_pte_shift(2)) != GEN8_3LVL_PDPES);
> +
> +	for (idx = 0; idx < GEN8_3LVL_PDPES; idx++) {
> +		struct i915_page_directory *pde;
>  
> -	gen8_for_each_pdpe(pd, pdp, start, length, pdpe) {
> -		pd = alloc_pd(vm);
> -		if (IS_ERR(pd))
> -			return PTR_ERR(pd);
> +		pde = alloc_pd(vm);
> +		if (IS_ERR(pde))
> +			return PTR_ERR(pde);
>  
> -		fill_px(pd, vm->scratch[1].encode);
> -		set_pd_entry(pdp, pdpe, pd);
> -		atomic_inc(px_used(pd)); /* keep pinned */
> +		fill_px(pde, vm->scratch[1].encode);
> +		set_pd_entry(pd, idx, pde);
> +		atomic_inc(px_used(pde)); /* keep pinned */
>  	}
>  
>  	return 0;
> @@ -1597,7 +1520,6 @@ static struct i915_ppgtt *gen8_ppgtt_create(struct drm_i915_private *i915)
>  	}
>  
>  	if (i915_vm_is_4lvl(&ppgtt->vm)) {
> -		ppgtt->vm.allocate_va_range = gen8_ppgtt_alloc_4lvl;
>  		ppgtt->vm.insert_entries = gen8_ppgtt_insert_4lvl;
>  	} else {
>  		if (intel_vgpu_active(i915)) {
> @@ -1606,10 +1528,10 @@ static struct i915_ppgtt *gen8_ppgtt_create(struct drm_i915_private *i915)
>  				goto err_free_pd;
>  		}
>  
> -		ppgtt->vm.allocate_va_range = gen8_ppgtt_alloc_3lvl;
>  		ppgtt->vm.insert_entries = gen8_ppgtt_insert_3lvl;
>  	}
>  
> +	ppgtt->vm.allocate_va_range = gen8_ppgtt_alloc;
>  	ppgtt->vm.clear_range = gen8_ppgtt_clear;
>  
>  	if (intel_vgpu_active(i915))
> 
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH 3/4] drm/i915/gtt: Recursive ppgtt alloc for gen8
  2019-07-12 11:27 ` [PATCH 3/4] drm/i915/gtt: Recursive ppgtt alloc " Chris Wilson
  2019-07-16 15:28   ` Abdiel Janulgue
@ 2019-07-16 15:28   ` Abdiel Janulgue
  1 sibling, 0 replies; 15+ messages in thread
From: Abdiel Janulgue @ 2019-07-16 15:28 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx



On 12/07/2019 14.27, Chris Wilson wrote:
> Refactor the separate allocation routines into a single recursive
> function.
> 
Reviewed-by: Abdiel Janulgue <abdiel.janulgue@linux.intel.com>

> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> ---
>  drivers/gpu/drm/i915/i915_gem_gtt.c | 272 ++++++++++------------------
>  1 file changed, 97 insertions(+), 175 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
> index 7b2f3188d435..72e0f9799a46 100644
> --- a/drivers/gpu/drm/i915/i915_gem_gtt.c
> +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
> @@ -1007,199 +1007,119 @@ static void gen8_ppgtt_clear(struct i915_address_space *vm,
>  			   start, start + length, vm->top);
>  }
>  
> -static void gen8_ppgtt_clear_pd(struct i915_address_space *vm,
> -				struct i915_page_directory *pd,
> -				u64 start, u64 length)
> -{
> -	GEM_BUG_ON(!IS_ALIGNED(start, BIT_ULL(GEN8_PTE_SHIFT)));
> -	GEM_BUG_ON(!IS_ALIGNED(length, BIT_ULL(GEN8_PTE_SHIFT)));
> -
> -	start >>= GEN8_PTE_SHIFT;
> -	length >>= GEN8_PTE_SHIFT;
> -
> -	__gen8_ppgtt_clear(vm, pd, start, start + length, 1);
> -}
> -
> -static void gen8_ppgtt_clear_pdp(struct i915_address_space *vm,
> -				 struct i915_page_directory * const pdp,
> -				 u64 start, u64 length)
> -{
> -	GEM_BUG_ON(!IS_ALIGNED(start, BIT_ULL(GEN8_PTE_SHIFT)));
> -	GEM_BUG_ON(!IS_ALIGNED(length, BIT_ULL(GEN8_PTE_SHIFT)));
> -
> -	start >>= GEN8_PTE_SHIFT;
> -	length >>= GEN8_PTE_SHIFT;
> -
> -	__gen8_ppgtt_clear(vm, pdp, start, start + length, 2);
> -}
> -
> -static int gen8_ppgtt_alloc_pd(struct i915_address_space *vm,
> -			       struct i915_page_directory *pd,
> -			       u64 start, u64 length)
> +static int __gen8_ppgtt_alloc(struct i915_address_space * const vm,
> +			      struct i915_page_directory * const pd,
> +			      u64 * const start, u64 end, int lvl)
>  {
> -	struct i915_page_table *pt, *alloc = NULL;
> -	u64 from = start;
> -	unsigned int pde;
> +	const struct i915_page_scratch * const scratch = &vm->scratch[lvl];
> +	struct i915_page_table *alloc = NULL;
> +	unsigned int idx, len;
>  	int ret = 0;
>  
> +	len = gen8_pd_range(*start, end, lvl--, &idx);
> +	DBG("%s(%p):{lvl:%d, start:%llx, end:%llx, idx:%d, len:%d, used:%d}\n",
> +	    __func__, vm, lvl + 1, *start, end,
> +	    idx, len, atomic_read(px_used(pd)));
> +	GEM_BUG_ON(!len || (idx + len - 1) >> gen8_pd_shift(1));
> +
>  	spin_lock(&pd->lock);
> -	gen8_for_each_pde(pt, pd, start, length, pde) {
> -		const int count = gen8_pte_count(start, length);
> +	GEM_BUG_ON(!atomic_read(px_used(pd))); /* Must be pinned! */
> +	do {
> +		struct i915_page_table *pt = pd->entry[idx];
>  
>  		if (!pt) {
>  			spin_unlock(&pd->lock);
>  
> -			pt = fetch_and_zero(&alloc);
> -			if (!pt)
> -				pt = alloc_pt(vm);
> -			if (IS_ERR(pt)) {
> -				ret = PTR_ERR(pt);
> -				goto unwind;
> -			}
> +			DBG("%s(%p):{ lvl:%d, idx:%d } allocating new tree\n",
> +			    __func__, vm, lvl + 1, idx);
>  
> -			if (count < GEN8_PTES || intel_vgpu_active(vm->i915))
> -				fill_px(pt, vm->scratch[0].encode);
> +			pt = fetch_and_zero(&alloc);
> +			if (lvl) {
> +				if (!pt) {
> +					pt = &alloc_pd(vm)->pt;
> +					if (IS_ERR(pt)) {
> +						ret = PTR_ERR(pt);
> +						goto out;
> +					}
> +				}
>  
> -			spin_lock(&pd->lock);
> -			if (!pd->entry[pde]) {
> -				set_pd_entry(pd, pde, pt);
> +				fill_px(pt, vm->scratch[lvl].encode);
>  			} else {
> -				alloc = pt;
> -				pt = pd->entry[pde];
> -			}
> -		}
> -
> -		atomic_add(count, &pt->used);
> -	}
> -	spin_unlock(&pd->lock);
> -	goto out;
> -
> -unwind:
> -	gen8_ppgtt_clear_pd(vm, pd, from, start - from);
> -out:
> -	if (alloc)
> -		free_px(vm, alloc);
> -	return ret;
> -}
> -
> -static int gen8_ppgtt_alloc_pdp(struct i915_address_space *vm,
> -				struct i915_page_directory *pdp,
> -				u64 start, u64 length)
> -{
> -	struct i915_page_directory *pd, *alloc = NULL;
> -	u64 from = start;
> -	unsigned int pdpe;
> -	int ret = 0;
> +				if (!pt) {
> +					pt = alloc_pt(vm);
> +					if (IS_ERR(pt)) {
> +						ret = PTR_ERR(pt);
> +						goto out;
> +					}
> +				}
>  
> -	spin_lock(&pdp->lock);
> -	gen8_for_each_pdpe(pd, pdp, start, length, pdpe) {
> -		if (!pd) {
> -			spin_unlock(&pdp->lock);
> -
> -			pd = fetch_and_zero(&alloc);
> -			if (!pd)
> -				pd = alloc_pd(vm);
> -			if (IS_ERR(pd)) {
> -				ret = PTR_ERR(pd);
> -				goto unwind;
> +				if (intel_vgpu_active(vm->i915) ||
> +				    gen8_pt_count(*start, end) < I915_PDES)
> +					fill_px(pt, vm->scratch[lvl].encode);
>  			}
>  
> -			fill_px(pd, vm->scratch[1].encode);
> +			spin_lock(&pd->lock);
> +			if (likely(!pd->entry[idx]))
> +				set_pd_entry(pd, idx, pt);
> +			else
> +				alloc = pt, pt = pd->entry[idx];
> +		}
>  
> -			spin_lock(&pdp->lock);
> -			if (!pdp->entry[pdpe]) {
> -				set_pd_entry(pdp, pdpe, pd);
> -			} else {
> -				alloc = pd;
> -				pd = pdp->entry[pdpe];
> +		if (lvl) {
> +			atomic_inc(&pt->used);
> +			spin_unlock(&pd->lock);
> +
> +			ret = __gen8_ppgtt_alloc(vm, as_pd(pt),
> +						 start, end, lvl);
> +			if (unlikely(ret)) {
> +				if (release_pd_entry(pd, idx, pt, scratch))
> +					free_px(vm, pt);
> +				goto out;
>  			}
> -		}
> -		atomic_inc(px_used(pd));
> -		spin_unlock(&pdp->lock);
>  
> -		ret = gen8_ppgtt_alloc_pd(vm, pd, start, length);
> -		if (unlikely(ret))
> -			goto unwind_pd;
> +			spin_lock(&pd->lock);
> +			atomic_dec(&pt->used);
> +			GEM_BUG_ON(!atomic_read(&pt->used));
> +		} else {
> +			unsigned int count = gen8_pt_count(*start, end);
>  
> -		spin_lock(&pdp->lock);
> -		atomic_dec(px_used(pd));
> -	}
> -	spin_unlock(&pdp->lock);
> -	goto out;
> +			DBG("%s(%p):{lvl:%d, start:%llx, end:%llx, idx:%d, len:%d, used:%d} inserting pte\n",
> +			    __func__, vm, lvl, *start, end,
> +			    gen8_pd_index(*start, 0), count,
> +			    atomic_read(&pt->used));
>  
> -unwind_pd:
> -	if (release_pd_entry(pdp, pdpe, &pd->pt, &vm->scratch[2]))
> -		free_px(vm, pd);
> -unwind:
> -	gen8_ppgtt_clear_pdp(vm, pdp, from, start - from);
> +			atomic_add(count, &pt->used);
> +			GEM_BUG_ON(atomic_read(&pt->used) > I915_PDES);
> +			*start += count;
> +		}
> +	} while (idx++, --len);
> +	spin_unlock(&pd->lock);
>  out:
>  	if (alloc)
>  		free_px(vm, alloc);
>  	return ret;
>  }
>  
> -static int gen8_ppgtt_alloc_3lvl(struct i915_address_space *vm,
> -				 u64 start, u64 length)
> -{
> -	return gen8_ppgtt_alloc_pdp(vm,
> -				    i915_vm_to_ppgtt(vm)->pd, start, length);
> -}
> -
> -static int gen8_ppgtt_alloc_4lvl(struct i915_address_space *vm,
> -				 u64 start, u64 length)
> +static int gen8_ppgtt_alloc(struct i915_address_space *vm,
> +			    u64 start, u64 length)
>  {
> -	struct i915_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
> -	struct i915_page_directory * const pml4 = ppgtt->pd;
> -	struct i915_page_directory *pdp, *alloc = NULL;
>  	u64 from = start;
> -	int ret = 0;
> -	u32 pml4e;
> -
> -	spin_lock(&pml4->lock);
> -	gen8_for_each_pml4e(pdp, pml4, start, length, pml4e) {
> -		if (!pdp) {
> -			spin_unlock(&pml4->lock);
> -
> -			pdp = fetch_and_zero(&alloc);
> -			if (!pdp)
> -				pdp = alloc_pd(vm);
> -			if (IS_ERR(pdp)) {
> -				ret = PTR_ERR(pdp);
> -				goto unwind;
> -			}
> -
> -			fill_px(pdp, vm->scratch[2].encode);
> +	int err;
>  
> -			spin_lock(&pml4->lock);
> -			if (!pml4->entry[pml4e]) {
> -				set_pd_entry(pml4, pml4e, pdp);
> -			} else {
> -				alloc = pdp;
> -				pdp = pml4->entry[pml4e];
> -			}
> -		}
> -		atomic_inc(px_used(pdp));
> -		spin_unlock(&pml4->lock);
> +	GEM_BUG_ON(!IS_ALIGNED(start, BIT_ULL(GEN8_PTE_SHIFT)));
> +	GEM_BUG_ON(!IS_ALIGNED(length, BIT_ULL(GEN8_PTE_SHIFT)));
>  
> -		ret = gen8_ppgtt_alloc_pdp(vm, pdp, start, length);
> -		if (unlikely(ret))
> -			goto unwind_pdp;
> +	start >>= GEN8_PTE_SHIFT;
> +	length >>= GEN8_PTE_SHIFT;
> +	GEM_BUG_ON(length == 0);
>  
> -		spin_lock(&pml4->lock);
> -		atomic_dec(px_used(pdp));
> -	}
> -	spin_unlock(&pml4->lock);
> -	goto out;
> +	err = __gen8_ppgtt_alloc(vm, i915_vm_to_ppgtt(vm)->pd,
> +				 &start, start + length, vm->top);
> +	if (unlikely(err))
> +		__gen8_ppgtt_clear(vm, i915_vm_to_ppgtt(vm)->pd,
> +				   from, start, vm->top);
>  
> -unwind_pdp:
> -	if (release_pd_entry(pml4, pml4e, &pdp->pt, &vm->scratch[3]))
> -		free_px(vm, pdp);
> -unwind:
> -	gen8_ppgtt_clear(vm, from, start - from);
> -out:
> -	if (alloc)
> -		free_px(vm, alloc);
> -	return ret;
> +	return err;
>  }
>  
>  static inline struct sgt_dma {
> @@ -1496,19 +1416,22 @@ static int gen8_init_scratch(struct i915_address_space *vm)
>  static int gen8_preallocate_top_level_pdp(struct i915_ppgtt *ppgtt)
>  {
>  	struct i915_address_space *vm = &ppgtt->vm;
> -	struct i915_page_directory *pdp = ppgtt->pd;
> -	struct i915_page_directory *pd;
> -	u64 start = 0, length = ppgtt->vm.total;
> -	unsigned int pdpe;
> +	struct i915_page_directory *pd = ppgtt->pd;
> +	unsigned int idx;
> +
> +	GEM_BUG_ON(vm->top != 2);
> +	GEM_BUG_ON((vm->total >> __gen8_pte_shift(2)) != GEN8_3LVL_PDPES);
> +
> +	for (idx = 0; idx < GEN8_3LVL_PDPES; idx++) {
> +		struct i915_page_directory *pde;
>  
> -	gen8_for_each_pdpe(pd, pdp, start, length, pdpe) {
> -		pd = alloc_pd(vm);
> -		if (IS_ERR(pd))
> -			return PTR_ERR(pd);
> +		pde = alloc_pd(vm);
> +		if (IS_ERR(pde))
> +			return PTR_ERR(pde);
>  
> -		fill_px(pd, vm->scratch[1].encode);
> -		set_pd_entry(pdp, pdpe, pd);
> -		atomic_inc(px_used(pd)); /* keep pinned */
> +		fill_px(pde, vm->scratch[1].encode);
> +		set_pd_entry(pd, idx, pde);
> +		atomic_inc(px_used(pde)); /* keep pinned */
>  	}
>  
>  	return 0;
> @@ -1597,7 +1520,6 @@ static struct i915_ppgtt *gen8_ppgtt_create(struct drm_i915_private *i915)
>  	}
>  
>  	if (i915_vm_is_4lvl(&ppgtt->vm)) {
> -		ppgtt->vm.allocate_va_range = gen8_ppgtt_alloc_4lvl;
>  		ppgtt->vm.insert_entries = gen8_ppgtt_insert_4lvl;
>  	} else {
>  		if (intel_vgpu_active(i915)) {
> @@ -1606,10 +1528,10 @@ static struct i915_ppgtt *gen8_ppgtt_create(struct drm_i915_private *i915)
>  				goto err_free_pd;
>  		}
>  
> -		ppgtt->vm.allocate_va_range = gen8_ppgtt_alloc_3lvl;
>  		ppgtt->vm.insert_entries = gen8_ppgtt_insert_3lvl;
>  	}
>  
> +	ppgtt->vm.allocate_va_range = gen8_ppgtt_alloc;
>  	ppgtt->vm.clear_range = gen8_ppgtt_clear;
>  
>  	if (intel_vgpu_active(i915))
> 
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH 4/4] drm/i915/gtt: Tidy up ppgtt insertion for gen8
  2019-07-12 11:27 ` [PATCH 4/4] drm/i915/gtt: Tidy up ppgtt insertion " Chris Wilson
@ 2019-07-16 15:30   ` Abdiel Janulgue
  2019-07-16 15:45     ` Chris Wilson
  0 siblings, 1 reply; 15+ messages in thread
From: Abdiel Janulgue @ 2019-07-16 15:30 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx



On 12/07/2019 14.27, Chris Wilson wrote:
> Apply the new radix shift helpers to extract the multi-level indices
> cleanly when inserting pte into the gtt tree.
> 
Reviewed-by: Abdiel Janulgue <abdiel.janulgue@linux.intel.com>

> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> ---
>  drivers/gpu/drm/i915/i915_gem_gtt.c | 115 +++++++++++-----------------
>  drivers/gpu/drm/i915/i915_gem_gtt.h |  90 ++--------------------
>  2 files changed, 48 insertions(+), 157 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
> index 72e0f9799a46..de78dc8c425c 100644
> --- a/drivers/gpu/drm/i915/i915_gem_gtt.c
> +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
> @@ -1131,47 +1131,28 @@ static inline struct sgt_dma {
>  	return (struct sgt_dma) { sg, addr, addr + sg->length };
>  }
>  
> -struct gen8_insert_pte {
> -	u16 pml4e;
> -	u16 pdpe;
> -	u16 pde;
> -	u16 pte;
> -};
> -
> -static __always_inline struct gen8_insert_pte gen8_insert_pte(u64 start)
> -{
> -	return (struct gen8_insert_pte) {
> -		 gen8_pml4e_index(start),
> -		 gen8_pdpe_index(start),
> -		 gen8_pde_index(start),
> -		 gen8_pte_index(start),
> -	};
> -}
> -
> -static __always_inline bool
> +static __always_inline u64
>  gen8_ppgtt_insert_pte_entries(struct i915_ppgtt *ppgtt,
>  			      struct i915_page_directory *pdp,
>  			      struct sgt_dma *iter,
> -			      struct gen8_insert_pte *idx,
> +			      u64 idx,
>  			      enum i915_cache_level cache_level,
>  			      u32 flags)
>  {
>  	struct i915_page_directory *pd;
>  	const gen8_pte_t pte_encode = gen8_pte_encode(0, cache_level, flags);
>  	gen8_pte_t *vaddr;
> -	bool ret;
>  
> -	GEM_BUG_ON(idx->pdpe >= i915_pdpes_per_pdp(&ppgtt->vm));
> -	pd = i915_pd_entry(pdp, idx->pdpe);
> -	vaddr = kmap_atomic_px(i915_pt_entry(pd, idx->pde));
> +	pd = i915_pd_entry(pdp, gen8_pd_index(idx, 2));
> +	vaddr = kmap_atomic_px(i915_pt_entry(pd, gen8_pd_index(idx, 1)));
>  	do {
> -		vaddr[idx->pte] = pte_encode | iter->dma;
> +		vaddr[gen8_pd_index(idx, 0)] = pte_encode | iter->dma;
>  
>  		iter->dma += I915_GTT_PAGE_SIZE;
>  		if (iter->dma >= iter->max) {
>  			iter->sg = __sg_next(iter->sg);
>  			if (!iter->sg) {
> -				ret = false;
> +				idx = 0;
>  				break;
>  			}
>  
> @@ -1179,30 +1160,22 @@ gen8_ppgtt_insert_pte_entries(struct i915_ppgtt *ppgtt,
>  			iter->max = iter->dma + iter->sg->length;
>  		}
>  
> -		if (++idx->pte == GEN8_PTES) {
> -			idx->pte = 0;
> -
> -			if (++idx->pde == I915_PDES) {
> -				idx->pde = 0;
> -
> +		if (gen8_pd_index(++idx, 0) == 0) {
> +			if (gen8_pd_index(idx, 1) == 0) {
>  				/* Limited by sg length for 3lvl */
> -				if (++idx->pdpe == GEN8_PML4ES_PER_PML4) {
> -					idx->pdpe = 0;
> -					ret = true;
> +				if (gen8_pd_index(idx, 2) == 0)
>  					break;
> -				}
>  
> -				GEM_BUG_ON(idx->pdpe >= i915_pdpes_per_pdp(&ppgtt->vm));
> -				pd = pdp->entry[idx->pdpe];
> +				pd = pdp->entry[gen8_pd_index(idx, 2)];
>  			}
>  
>  			kunmap_atomic(vaddr);
> -			vaddr = kmap_atomic_px(i915_pt_entry(pd, idx->pde));
> +			vaddr = kmap_atomic_px(i915_pt_entry(pd, gen8_pd_index(idx, 1)));
>  		}
>  	} while (1);
>  	kunmap_atomic(vaddr);
>  
> -	return ret;
> +	return idx;
>  }
>  
>  static void gen8_ppgtt_insert_3lvl(struct i915_address_space *vm,
> @@ -1212,9 +1185,9 @@ static void gen8_ppgtt_insert_3lvl(struct i915_address_space *vm,
>  {
>  	struct i915_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
>  	struct sgt_dma iter = sgt_dma(vma);
> -	struct gen8_insert_pte idx = gen8_insert_pte(vma->node.start);
>  
> -	gen8_ppgtt_insert_pte_entries(ppgtt, ppgtt->pd, &iter, &idx,
> +	gen8_ppgtt_insert_pte_entries(ppgtt, ppgtt->pd, &iter,
> +				      vma->node.start >> GEN8_PTE_SHIFT,
>  				      cache_level, flags);
>  
>  	vma->page_sizes.gtt = I915_GTT_PAGE_SIZE;
> @@ -1231,39 +1204,38 @@ static void gen8_ppgtt_insert_huge_entries(struct i915_vma *vma,
>  	dma_addr_t rem = iter->sg->length;
>  
>  	do {
> -		struct gen8_insert_pte idx = gen8_insert_pte(start);
>  		struct i915_page_directory *pdp =
> -			i915_pdp_entry(pml4, idx.pml4e);
> -		struct i915_page_directory *pd = i915_pd_entry(pdp, idx.pdpe);
> -		unsigned int page_size;
> -		bool maybe_64K = false;
> +			i915_pd_entry(pml4, __gen8_pte_index(start, 3));
> +		struct i915_page_directory *pd =
> +			i915_pd_entry(pdp, __gen8_pte_index(start, 2));
>  		gen8_pte_t encode = pte_encode;
> +		unsigned int maybe_64K = -1;
> +		unsigned int page_size;
>  		gen8_pte_t *vaddr;
> -		u16 index, max;
> +		u16 index;
>  
>  		if (vma->page_sizes.sg & I915_GTT_PAGE_SIZE_2M &&
>  		    IS_ALIGNED(iter->dma, I915_GTT_PAGE_SIZE_2M) &&
> -		    rem >= I915_GTT_PAGE_SIZE_2M && !idx.pte) {
> -			index = idx.pde;
> -			max = I915_PDES;
> -			page_size = I915_GTT_PAGE_SIZE_2M;
> -
> +		    rem >= I915_GTT_PAGE_SIZE_2M &&
> +		    !__gen8_pte_index(start, 0)) {
> +			index = __gen8_pte_index(start, 1);
>  			encode |= GEN8_PDE_PS_2M;
> +			page_size = I915_GTT_PAGE_SIZE_2M;
>  
>  			vaddr = kmap_atomic_px(pd);
>  		} else {
> -			struct i915_page_table *pt = i915_pt_entry(pd, idx.pde);
> +			struct i915_page_table *pt =
> +				i915_pt_entry(pd, __gen8_pte_index(start, 1));
>  
> -			index = idx.pte;
> -			max = GEN8_PTES;
> +			index = __gen8_pte_index(start, 0);
>  			page_size = I915_GTT_PAGE_SIZE;
>  
>  			if (!index &&
>  			    vma->page_sizes.sg & I915_GTT_PAGE_SIZE_64K &&
>  			    IS_ALIGNED(iter->dma, I915_GTT_PAGE_SIZE_64K) &&
>  			    (IS_ALIGNED(rem, I915_GTT_PAGE_SIZE_64K) ||
> -			     rem >= (max - index) * I915_GTT_PAGE_SIZE))
> -				maybe_64K = true;
> +			     rem >= (I915_PDES - index) * I915_GTT_PAGE_SIZE))
> +				maybe_64K = __gen8_pte_index(start, 1);
>  
>  			vaddr = kmap_atomic_px(pt);
>  		}
> @@ -1284,16 +1256,16 @@ static void gen8_ppgtt_insert_huge_entries(struct i915_vma *vma,
>  				iter->dma = sg_dma_address(iter->sg);
>  				iter->max = iter->dma + rem;
>  
> -				if (maybe_64K && index < max &&
> +				if (maybe_64K != -1 && index < I915_PDES &&
>  				    !(IS_ALIGNED(iter->dma, I915_GTT_PAGE_SIZE_64K) &&
>  				      (IS_ALIGNED(rem, I915_GTT_PAGE_SIZE_64K) ||
> -				       rem >= (max - index) * I915_GTT_PAGE_SIZE)))
> -					maybe_64K = false;
> +				       rem >= (I915_PDES - index) * I915_GTT_PAGE_SIZE)))
> +					maybe_64K = -1;
>  
>  				if (unlikely(!IS_ALIGNED(iter->dma, page_size)))
>  					break;
>  			}
> -		} while (rem >= page_size && index < max);
> +		} while (rem >= page_size && index < I915_PDES);
>  
>  		kunmap_atomic(vaddr);
>  
> @@ -1303,14 +1275,14 @@ static void gen8_ppgtt_insert_huge_entries(struct i915_vma *vma,
>  		 * it and have reached the end of the sg table and we have
>  		 * enough padding.
>  		 */
> -		if (maybe_64K &&
> -		    (index == max ||
> +		if (maybe_64K != -1 &&
> +		    (index == I915_PDES ||
>  		     (i915_vm_has_scratch_64K(vma->vm) &&
>  		      !iter->sg && IS_ALIGNED(vma->node.start +
>  					      vma->node.size,
>  					      I915_GTT_PAGE_SIZE_2M)))) {
>  			vaddr = kmap_atomic_px(pd);
> -			vaddr[idx.pde] |= GEN8_PDE_IPS_64K;
> +			vaddr[maybe_64K] |= GEN8_PDE_IPS_64K;
>  			kunmap_atomic(vaddr);
>  			page_size = I915_GTT_PAGE_SIZE_64K;
>  
> @@ -1327,8 +1299,7 @@ static void gen8_ppgtt_insert_huge_entries(struct i915_vma *vma,
>  				u16 i;
>  
>  				encode = vma->vm->scratch[0].encode;
> -				vaddr = kmap_atomic_px(i915_pt_entry(pd,
> -								     idx.pde));
> +				vaddr = kmap_atomic_px(i915_pt_entry(pd, maybe_64K));
>  
>  				for (i = 1; i < index; i += 16)
>  					memset64(vaddr + i, encode, 15);
> @@ -1354,13 +1325,13 @@ static void gen8_ppgtt_insert_4lvl(struct i915_address_space *vm,
>  		gen8_ppgtt_insert_huge_entries(vma, pml4, &iter, cache_level,
>  					       flags);
>  	} else {
> -		struct gen8_insert_pte idx = gen8_insert_pte(vma->node.start);
> +		u64 idx = vma->node.start >> GEN8_PTE_SHIFT;
>  
> -		while (gen8_ppgtt_insert_pte_entries(ppgtt,
> -						     i915_pdp_entry(pml4, idx.pml4e++),
> -						     &iter, &idx, cache_level,
> -						     flags))
> -			GEM_BUG_ON(idx.pml4e >= GEN8_PML4ES_PER_PML4);
> +		while ((idx = gen8_ppgtt_insert_pte_entries(ppgtt,
> +							    i915_pd_entry(pml4, gen8_pd_index(idx, 3)),
> +							    &iter, idx, cache_level,
> +							    flags)))
> +			;
>  
>  		vma->page_sizes.gtt = I915_GTT_PAGE_SIZE;
>  	}
> diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h
> index aea9a7084414..6d6e1f4015b9 100644
> --- a/drivers/gpu/drm/i915/i915_gem_gtt.h
> +++ b/drivers/gpu/drm/i915/i915_gem_gtt.h
> @@ -115,29 +115,18 @@ typedef u64 gen8_pte_t;
>  #define HSW_GTT_ADDR_ENCODE(addr)	((addr) | (((addr) >> 28) & 0x7f0))
>  #define HSW_PTE_ADDR_ENCODE(addr)	HSW_GTT_ADDR_ENCODE(addr)
>  
> -/* GEN8 32b style address is defined as a 3 level page table:
> +/*
> + * GEN8 32b style address is defined as a 3 level page table:
>   * 31:30 | 29:21 | 20:12 |  11:0
>   * PDPE  |  PDE  |  PTE  | offset
>   * The difference as compared to normal x86 3 level page table is the PDPEs are
>   * programmed via register.
> - */
> -#define GEN8_3LVL_PDPES			4
> -#define GEN8_PDE_SHIFT			21
> -#define GEN8_PDE_MASK			0x1ff
> -#define GEN8_PTE_MASK			0x1ff
> -#define GEN8_PTES			I915_PTES(sizeof(gen8_pte_t))
> -
> -/* GEN8 48b style address is defined as a 4 level page table:
> + *
> + * GEN8 48b style address is defined as a 4 level page table:
>   * 47:39 | 38:30 | 29:21 | 20:12 |  11:0
>   * PML4E | PDPE  |  PDE  |  PTE  | offset
>   */
> -#define GEN8_PML4ES_PER_PML4		512
> -#define GEN8_PML4E_SHIFT		39
> -#define GEN8_PML4E_MASK			(GEN8_PML4ES_PER_PML4 - 1)
> -#define GEN8_PDPE_SHIFT			30
> -/* NB: GEN8_PDPE_MASK is untrue for 32b platforms, but it has no impact on 32b page
> - * tables */
> -#define GEN8_PDPE_MASK			0x1ff
> +#define GEN8_3LVL_PDPES			4
>  
>  #define PPAT_UNCACHED			(_PAGE_PWT | _PAGE_PCD)
>  #define PPAT_CACHED_PDE			0 /* WB LLC */
> @@ -521,15 +510,6 @@ static inline u32 gen6_pde_index(u32 addr)
>  	return i915_pde_index(addr, GEN6_PDE_SHIFT);
>  }
>  
> -static inline unsigned int
> -i915_pdpes_per_pdp(const struct i915_address_space *vm)
> -{
> -	if (i915_vm_is_4lvl(vm))
> -		return GEN8_PML4ES_PER_PML4;
> -
> -	return GEN8_3LVL_PDPES;
> -}
> -
>  static inline struct i915_page_table *
>  i915_pt_entry(const struct i915_page_directory * const pd,
>  	      const unsigned short n)
> @@ -544,66 +524,6 @@ i915_pd_entry(const struct i915_page_directory * const pdp,
>  	return pdp->entry[n];
>  }
>  
> -static inline struct i915_page_directory *
> -i915_pdp_entry(const struct i915_page_directory * const pml4,
> -	       const unsigned short n)
> -{
> -	return pml4->entry[n];
> -}
> -
> -/* Equivalent to the gen6 version, For each pde iterates over every pde
> - * between from start until start + length. On gen8+ it simply iterates
> - * over every page directory entry in a page directory.
> - */
> -#define gen8_for_each_pde(pt, pd, start, length, iter)			\
> -	for (iter = gen8_pde_index(start);				\
> -	     length > 0 && iter < I915_PDES &&				\
> -		     (pt = i915_pt_entry(pd, iter), true);		\
> -	     ({ u64 temp = ALIGN(start+1, 1 << GEN8_PDE_SHIFT);		\
> -		    temp = min(temp - start, length);			\
> -		    start += temp, length -= temp; }), ++iter)
> -
> -#define gen8_for_each_pdpe(pd, pdp, start, length, iter)		\
> -	for (iter = gen8_pdpe_index(start);				\
> -	     length > 0 && iter < i915_pdpes_per_pdp(vm) &&		\
> -		     (pd = i915_pd_entry(pdp, iter), true);		\
> -	     ({ u64 temp = ALIGN(start+1, 1 << GEN8_PDPE_SHIFT);	\
> -		    temp = min(temp - start, length);			\
> -		    start += temp, length -= temp; }), ++iter)
> -
> -#define gen8_for_each_pml4e(pdp, pml4, start, length, iter)		\
> -	for (iter = gen8_pml4e_index(start);				\
> -	     length > 0 && iter < GEN8_PML4ES_PER_PML4 &&		\
> -		     (pdp = i915_pdp_entry(pml4, iter), true);		\
> -	     ({ u64 temp = ALIGN(start+1, 1ULL << GEN8_PML4E_SHIFT);	\
> -		    temp = min(temp - start, length);			\
> -		    start += temp, length -= temp; }), ++iter)
> -
> -static inline u32 gen8_pte_index(u64 address)
> -{
> -	return i915_pte_index(address, GEN8_PDE_SHIFT);
> -}
> -
> -static inline u32 gen8_pde_index(u64 address)
> -{
> -	return i915_pde_index(address, GEN8_PDE_SHIFT);
> -}
> -
> -static inline u32 gen8_pdpe_index(u64 address)
> -{
> -	return (address >> GEN8_PDPE_SHIFT) & GEN8_PDPE_MASK;
> -}
> -
> -static inline u32 gen8_pml4e_index(u64 address)
> -{
> -	return (address >> GEN8_PML4E_SHIFT) & GEN8_PML4E_MASK;
> -}
> -
> -static inline u64 gen8_pte_count(u64 address, u64 length)
> -{
> -	return i915_pte_count(address, length, GEN8_PDE_SHIFT);
> -}
> -
>  static inline dma_addr_t
>  i915_page_dir_dma_addr(const struct i915_ppgtt *ppgtt, const unsigned int n)
>  {
> 
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH 4/4] drm/i915/gtt: Tidy up ppgtt insertion for gen8
  2019-07-16 15:30   ` Abdiel Janulgue
@ 2019-07-16 15:45     ` Chris Wilson
  0 siblings, 0 replies; 15+ messages in thread
From: Chris Wilson @ 2019-07-16 15:45 UTC (permalink / raw)
  To: Abdiel Janulgue, intel-gfx

Quoting Abdiel Janulgue (2019-07-16 16:30:09)
> 
> 
> On 12/07/2019 14.27, Chris Wilson wrote:
> > Apply the new radix shift helpers to extract the multi-level indices
> > cleanly when inserting pte into the gtt tree.
> > 
> Reviewed-by: Abdiel Janulgue <abdiel.janulgue@linux.intel.com>

Thanks for the reviews,
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 15+ messages in thread

end of thread, other threads:[~2019-07-16 15:45 UTC | newest]

Thread overview: 15+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2019-07-12 11:27 ppGTT the recursive wars Chris Wilson
2019-07-12 11:27 ` [PATCH 1/4] drm/i915/gtt: Recursive cleanup for gen8 Chris Wilson
2019-07-12 13:03   ` Abdiel Janulgue
2019-07-12 11:27 ` [PATCH 2/4] drm/i915/gtt: Recursive ppgtt clear " Chris Wilson
2019-07-12 15:16   ` Abdiel Janulgue
2019-07-12 11:27 ` [PATCH 3/4] drm/i915/gtt: Recursive ppgtt alloc " Chris Wilson
2019-07-16 15:28   ` Abdiel Janulgue
2019-07-16 15:28   ` Abdiel Janulgue
2019-07-12 11:27 ` [PATCH 4/4] drm/i915/gtt: Tidy up ppgtt insertion " Chris Wilson
2019-07-16 15:30   ` Abdiel Janulgue
2019-07-16 15:45     ` Chris Wilson
2019-07-12 15:32 ` ✗ Fi.CI.CHECKPATCH: warning for series starting with [1/4] drm/i915/gtt: Recursive cleanup " Patchwork
2019-07-12 15:34 ` ✗ Fi.CI.SPARSE: " Patchwork
2019-07-12 15:52 ` ✓ Fi.CI.BAT: success " Patchwork
2019-07-14  5:07 ` ✓ Fi.CI.IGT: " Patchwork

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.