public inbox for intel-gfx@lists.freedesktop.org
 help / color / mirror / Atom feed
* [PATCH 4/9] [v4] drm/i915: Make clear/insert vfuncs args absolute
       [not found] <1392925833-6593-1-git-send-email-benjamin.widawsky>
@ 2014-02-26  2:13 ` Ben Widawsky
  2014-02-26  3:52   ` [PATCH] drm/i915: Page table helpers Ben Widawsky
  2014-02-26  7:27   ` [PATCH 4/9] [v4] drm/i915: Make clear/insert vfuncs args absolute Ben Widawsky
  0 siblings, 2 replies; 7+ messages in thread
From: Ben Widawsky @ 2014-02-26  2:13 UTC (permalink / raw)
  To: Intel GFX; +Cc: Ben Widawsky, Ben Widawsky

This patch converts insert_entries and clear_range, both functions which
are specific to the VM. These functions tend to encapsulate the gen
specific PTE writes. Passing absolute addresses to the insert_entries,
and clear_range will help make the logic clearer within the functions as
to what's going on. Currently, all callers simply do the appropriate
page shift, which IMO, ends up looking weird with an upcoming change for
the gen8 page table allocations.

Up until now, the PPGTT was a funky 2 level page table. GEN8 changes
this to look more like a 3 level page table, and to that extent we need
a significant amount more memory simply for the page tables. To address
this, the allocations will be split up in finer amounts.

v2: Replace size_t with uint64_t (Chris, Imre)

v3: Fix size in gen8_ppgtt_init (Ben)
Fix Size in i915_gem_suspend_gtt_mappings/restore (Imre)

v4: Wherever length was introduced in the API, the num_entries
calculation should have been (length - start) as opposed to just length.
As this is only a bug on clearing entries, it wasn't easily apparent.
(Ben)

Reviewed-by: Imre Deak <imre.deak@intel.com>
Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
---
 drivers/gpu/drm/i915/i915_drv.h     |  6 +--
 drivers/gpu/drm/i915/i915_gem_gtt.c | 90 +++++++++++++++++++++----------------
 2 files changed, 54 insertions(+), 42 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 57556fb..ab23bfd 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -652,12 +652,12 @@ struct i915_address_space {
 				     enum i915_cache_level level,
 				     bool valid); /* Create a valid PTE */
 	void (*clear_range)(struct i915_address_space *vm,
-			    unsigned int first_entry,
-			    unsigned int num_entries,
+			    uint64_t start,
+			    uint64_t length,
 			    bool use_scratch);
 	void (*insert_entries)(struct i915_address_space *vm,
 			       struct sg_table *st,
-			       unsigned int first_entry,
+			       uint64_t start,
 			       enum i915_cache_level cache_level);
 	void (*cleanup)(struct i915_address_space *vm);
 };
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
index beca571..23e722b 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -254,13 +254,15 @@ static int gen8_mm_switch(struct i915_hw_ppgtt *ppgtt,
 }
 
 static void gen8_ppgtt_clear_range(struct i915_address_space *vm,
-				   unsigned first_entry,
-				   unsigned num_entries,
+				   uint64_t start,
+				   uint64_t length,
 				   bool use_scratch)
 {
 	struct i915_hw_ppgtt *ppgtt =
 		container_of(vm, struct i915_hw_ppgtt, base);
 	gen8_gtt_pte_t *pt_vaddr, scratch_pte;
+	unsigned first_entry = start >> PAGE_SHIFT;
+	unsigned num_entries = (length - start) >> PAGE_SHIFT;
 	unsigned act_pt = first_entry / GEN8_PTES_PER_PAGE;
 	unsigned first_pte = first_entry % GEN8_PTES_PER_PAGE;
 	unsigned last_pte, i;
@@ -290,12 +292,13 @@ static void gen8_ppgtt_clear_range(struct i915_address_space *vm,
 
 static void gen8_ppgtt_insert_entries(struct i915_address_space *vm,
 				      struct sg_table *pages,
-				      unsigned first_entry,
+				      uint64_t start,
 				      enum i915_cache_level cache_level)
 {
 	struct i915_hw_ppgtt *ppgtt =
 		container_of(vm, struct i915_hw_ppgtt, base);
 	gen8_gtt_pte_t *pt_vaddr;
+	unsigned first_entry = start >> PAGE_SHIFT;
 	unsigned act_pt = first_entry / GEN8_PTES_PER_PAGE;
 	unsigned act_pte = first_entry % GEN8_PTES_PER_PAGE;
 	struct sg_page_iter sg_iter;
@@ -539,7 +542,7 @@ static int gen8_ppgtt_init(struct i915_hw_ppgtt *ppgtt, uint64_t size)
 	ppgtt->base.total = ppgtt->num_pt_pages * GEN8_PTES_PER_PAGE * PAGE_SIZE;
 
 	ppgtt->base.clear_range(&ppgtt->base, 0,
-				ppgtt->num_pd_entries * GEN8_PTES_PER_PAGE,
+				ppgtt->num_pd_entries * GEN8_PTES_PER_PAGE * PAGE_SIZE,
 				true);
 
 	DRM_DEBUG_DRIVER("Allocated %d pages for page directories (%d wasted)\n",
@@ -854,13 +857,15 @@ static int gen6_ppgtt_enable(struct i915_hw_ppgtt *ppgtt)
 
 /* PPGTT support for Sandybdrige/Gen6 and later */
 static void gen6_ppgtt_clear_range(struct i915_address_space *vm,
-				   unsigned first_entry,
-				   unsigned num_entries,
+				   uint64_t start,
+				   uint64_t length,
 				   bool use_scratch)
 {
 	struct i915_hw_ppgtt *ppgtt =
 		container_of(vm, struct i915_hw_ppgtt, base);
 	gen6_gtt_pte_t *pt_vaddr, scratch_pte;
+	unsigned first_entry = start >> PAGE_SHIFT;
+	unsigned num_entries = (length - start) >> PAGE_SHIFT;
 	unsigned act_pt = first_entry / I915_PPGTT_PT_ENTRIES;
 	unsigned first_pte = first_entry % I915_PPGTT_PT_ENTRIES;
 	unsigned last_pte, i;
@@ -887,12 +892,13 @@ static void gen6_ppgtt_clear_range(struct i915_address_space *vm,
 
 static void gen6_ppgtt_insert_entries(struct i915_address_space *vm,
 				      struct sg_table *pages,
-				      unsigned first_entry,
+				      uint64_t start,
 				      enum i915_cache_level cache_level)
 {
 	struct i915_hw_ppgtt *ppgtt =
 		container_of(vm, struct i915_hw_ppgtt, base);
 	gen6_gtt_pte_t *pt_vaddr;
+	unsigned first_entry = start >> PAGE_SHIFT;
 	unsigned act_pt = first_entry / I915_PPGTT_PT_ENTRIES;
 	unsigned act_pte = first_entry % I915_PPGTT_PT_ENTRIES;
 	struct sg_page_iter sg_iter;
@@ -1024,8 +1030,7 @@ alloc:
 		ppgtt->pt_dma_addr[i] = pt_addr;
 	}
 
-	ppgtt->base.clear_range(&ppgtt->base, 0,
-				ppgtt->num_pd_entries * I915_PPGTT_PT_ENTRIES, true);
+	ppgtt->base.clear_range(&ppgtt->base, 0, ppgtt->base.total, true);
 	ppgtt->debug_dump = gen6_dump_ppgtt;
 
 	DRM_DEBUG_DRIVER("Allocated pde space (%ldM) at GTT entry: %lx\n",
@@ -1089,20 +1094,17 @@ ppgtt_bind_vma(struct i915_vma *vma,
 	       enum i915_cache_level cache_level,
 	       u32 flags)
 {
-	const unsigned long entry = vma->node.start >> PAGE_SHIFT;
-
 	WARN_ON(flags);
 
-	vma->vm->insert_entries(vma->vm, vma->obj->pages, entry, cache_level);
+	vma->vm->insert_entries(vma->vm, vma->obj->pages, vma->node.start,
+				cache_level);
 }
 
 static void ppgtt_unbind_vma(struct i915_vma *vma)
 {
-	const unsigned long entry = vma->node.start >> PAGE_SHIFT;
-
 	vma->vm->clear_range(vma->vm,
-			     entry,
-			     vma->obj->base.size >> PAGE_SHIFT,
+			     vma->node.start,
+			     vma->obj->base.size,
 			     true);
 }
 
@@ -1186,8 +1188,8 @@ void i915_gem_suspend_gtt_mappings(struct drm_device *dev)
 	i915_check_and_clear_faults(dev);
 
 	dev_priv->gtt.base.clear_range(&dev_priv->gtt.base,
-				       dev_priv->gtt.base.start / PAGE_SIZE,
-				       dev_priv->gtt.base.total / PAGE_SIZE,
+				       dev_priv->gtt.base.start,
+				       dev_priv->gtt.base.total,
 				       false);
 }
 
@@ -1201,8 +1203,8 @@ void i915_gem_restore_gtt_mappings(struct drm_device *dev)
 
 	/* First fill our portion of the GTT with scratch pages */
 	dev_priv->gtt.base.clear_range(&dev_priv->gtt.base,
-				       dev_priv->gtt.base.start / PAGE_SIZE,
-				       dev_priv->gtt.base.total / PAGE_SIZE,
+				       dev_priv->gtt.base.start,
+				       dev_priv->gtt.base.total,
 				       true);
 
 	list_for_each_entry(obj, &dev_priv->mm.bound_list, global_list) {
@@ -1263,10 +1265,11 @@ static inline void gen8_set_pte(void __iomem *addr, gen8_gtt_pte_t pte)
 
 static void gen8_ggtt_insert_entries(struct i915_address_space *vm,
 				     struct sg_table *st,
-				     unsigned int first_entry,
+				     uint64_t start,
 				     enum i915_cache_level level)
 {
 	struct drm_i915_private *dev_priv = vm->dev->dev_private;
+	unsigned first_entry = start >> PAGE_SHIFT;
 	gen8_gtt_pte_t __iomem *gtt_entries =
 		(gen8_gtt_pte_t __iomem *)dev_priv->gtt.gsm + first_entry;
 	int i = 0;
@@ -1308,10 +1311,11 @@ static void gen8_ggtt_insert_entries(struct i915_address_space *vm,
  */
 static void gen6_ggtt_insert_entries(struct i915_address_space *vm,
 				     struct sg_table *st,
-				     unsigned int first_entry,
+				     uint64_t start,
 				     enum i915_cache_level level)
 {
 	struct drm_i915_private *dev_priv = vm->dev->dev_private;
+	unsigned first_entry = start >> PAGE_SHIFT;
 	gen6_gtt_pte_t __iomem *gtt_entries =
 		(gen6_gtt_pte_t __iomem *)dev_priv->gtt.gsm + first_entry;
 	int i = 0;
@@ -1343,11 +1347,13 @@ static void gen6_ggtt_insert_entries(struct i915_address_space *vm,
 }
 
 static void gen8_ggtt_clear_range(struct i915_address_space *vm,
-				  unsigned int first_entry,
-				  unsigned int num_entries,
+				  uint64_t start,
+				  uint64_t length,
 				  bool use_scratch)
 {
 	struct drm_i915_private *dev_priv = vm->dev->dev_private;
+	unsigned first_entry = start >> PAGE_SHIFT;
+	unsigned num_entries = (length - start) >> PAGE_SHIFT;
 	gen8_gtt_pte_t scratch_pte, __iomem *gtt_base =
 		(gen8_gtt_pte_t __iomem *) dev_priv->gtt.gsm + first_entry;
 	const int max_entries = gtt_total_entries(dev_priv->gtt) - first_entry;
@@ -1367,11 +1373,13 @@ static void gen8_ggtt_clear_range(struct i915_address_space *vm,
 }
 
 static void gen6_ggtt_clear_range(struct i915_address_space *vm,
-				  unsigned int first_entry,
-				  unsigned int num_entries,
+				  uint64_t start,
+				  uint64_t length,
 				  bool use_scratch)
 {
 	struct drm_i915_private *dev_priv = vm->dev->dev_private;
+	unsigned first_entry = start >> PAGE_SHIFT;
+	unsigned num_entries = (length - start) >> PAGE_SHIFT;
 	gen6_gtt_pte_t scratch_pte, __iomem *gtt_base =
 		(gen6_gtt_pte_t __iomem *) dev_priv->gtt.gsm + first_entry;
 	const int max_entries = gtt_total_entries(dev_priv->gtt) - first_entry;
@@ -1404,10 +1412,12 @@ static void i915_ggtt_bind_vma(struct i915_vma *vma,
 }
 
 static void i915_ggtt_clear_range(struct i915_address_space *vm,
-				  unsigned int first_entry,
-				  unsigned int num_entries,
+				  uint64_t start,
+				  uint64_t length,
 				  bool unused)
 {
+	unsigned first_entry = start >> PAGE_SHIFT;
+	unsigned num_entries = (length - start) >> PAGE_SHIFT;
 	intel_gtt_clear_range(first_entry, num_entries);
 }
 
@@ -1428,7 +1438,6 @@ static void ggtt_bind_vma(struct i915_vma *vma,
 	struct drm_device *dev = vma->vm->dev;
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	struct drm_i915_gem_object *obj = vma->obj;
-	const unsigned long entry = vma->node.start >> PAGE_SHIFT;
 
 	/* If there is no aliasing PPGTT, or the caller needs a global mapping,
 	 * or we have a global mapping already but the cacheability flags have
@@ -1444,7 +1453,8 @@ static void ggtt_bind_vma(struct i915_vma *vma,
 	if (!dev_priv->mm.aliasing_ppgtt || flags & GLOBAL_BIND) {
 		if (!obj->has_global_gtt_mapping ||
 		    (cache_level != obj->cache_level)) {
-			vma->vm->insert_entries(vma->vm, obj->pages, entry,
+			vma->vm->insert_entries(vma->vm, obj->pages,
+						vma->node.start,
 						cache_level);
 			obj->has_global_gtt_mapping = 1;
 		}
@@ -1455,7 +1465,9 @@ static void ggtt_bind_vma(struct i915_vma *vma,
 	     (cache_level != obj->cache_level))) {
 		struct i915_hw_ppgtt *appgtt = dev_priv->mm.aliasing_ppgtt;
 		appgtt->base.insert_entries(&appgtt->base,
-					    vma->obj->pages, entry, cache_level);
+					    vma->obj->pages,
+					    vma->node.start,
+					    cache_level);
 		vma->obj->has_aliasing_ppgtt_mapping = 1;
 	}
 }
@@ -1465,11 +1477,11 @@ static void ggtt_unbind_vma(struct i915_vma *vma)
 	struct drm_device *dev = vma->vm->dev;
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	struct drm_i915_gem_object *obj = vma->obj;
-	const unsigned long entry = vma->node.start >> PAGE_SHIFT;
 
 	if (obj->has_global_gtt_mapping) {
-		vma->vm->clear_range(vma->vm, entry,
-				     vma->obj->base.size >> PAGE_SHIFT,
+		vma->vm->clear_range(vma->vm,
+				     vma->node.start,
+				     obj->base.size,
 				     true);
 		obj->has_global_gtt_mapping = 0;
 	}
@@ -1477,8 +1489,8 @@ static void ggtt_unbind_vma(struct i915_vma *vma)
 	if (obj->has_aliasing_ppgtt_mapping) {
 		struct i915_hw_ppgtt *appgtt = dev_priv->mm.aliasing_ppgtt;
 		appgtt->base.clear_range(&appgtt->base,
-					 entry,
-					 obj->base.size >> PAGE_SHIFT,
+					 vma->node.start,
+					 obj->base.size,
 					 true);
 		obj->has_aliasing_ppgtt_mapping = 0;
 	}
@@ -1563,14 +1575,14 @@ void i915_gem_setup_global_gtt(struct drm_device *dev,
 
 	/* Clear any non-preallocated blocks */
 	drm_mm_for_each_hole(entry, &ggtt_vm->mm, hole_start, hole_end) {
-		const unsigned long count = (hole_end - hole_start) / PAGE_SIZE;
 		DRM_DEBUG_KMS("clearing unused GTT space: [%lx, %lx]\n",
 			      hole_start, hole_end);
-		ggtt_vm->clear_range(ggtt_vm, hole_start / PAGE_SIZE, count, true);
+		ggtt_vm->clear_range(ggtt_vm, hole_start,
+				     hole_end - hole_start, true);
 	}
 
 	/* And finally clear the reserved guard page */
-	ggtt_vm->clear_range(ggtt_vm, end / PAGE_SIZE - 1, 1, true);
+	ggtt_vm->clear_range(ggtt_vm, end - PAGE_SIZE, PAGE_SIZE, true);
 }
 
 void i915_gem_init_global_gtt(struct drm_device *dev)
-- 
1.9.0

^ permalink raw reply related	[flat|nested] 7+ messages in thread

* [PATCH] drm/i915: Page table helpers
  2014-02-26  2:13 ` [PATCH 4/9] [v4] drm/i915: Make clear/insert vfuncs args absolute Ben Widawsky
@ 2014-02-26  3:52   ` Ben Widawsky
  2014-03-10 21:05     ` Imre Deak
  2014-02-26  7:27   ` [PATCH 4/9] [v4] drm/i915: Make clear/insert vfuncs args absolute Ben Widawsky
  1 sibling, 1 reply; 7+ messages in thread
From: Ben Widawsky @ 2014-02-26  3:52 UTC (permalink / raw)
  To: Intel GFX; +Cc: Ben Widawsky, Ben Widawsky

These page table helpers make the code much cleaner. There is some
room to use the arch/x86 header files. The reason I've opted not to is
in several cases, the definitions are dictated by the CONFIG_ options
which do not always indicate the restrictions in the GPU.

Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
---

I have this patch queued up for the next round of /stuff/ I am working on. If
you want to pull it into this series, it's fine by me. As I deal with the code
more, it does become more obvious what looks good, and what does not.

---

 drivers/gpu/drm/i915/i915_gem_gtt.c | 115 +++++++++++++++++++++++++-----------
 1 file changed, 81 insertions(+), 34 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
index aa3ef7f..43d9129 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -30,8 +30,6 @@
 #include "i915_trace.h"
 #include "intel_drv.h"
 
-#define GEN6_PPGTT_PD_ENTRIES 512
-#define I915_PPGTT_PT_ENTRIES (PAGE_SIZE / sizeof(gen6_gtt_pte_t))
 typedef uint64_t gen8_gtt_pte_t;
 typedef gen8_gtt_pte_t gen8_ppgtt_pde_t;
 
@@ -51,6 +49,27 @@ typedef gen8_gtt_pte_t gen8_ppgtt_pde_t;
 #define GEN6_PTE_ADDR_ENCODE(addr)	GEN6_GTT_ADDR_ENCODE(addr)
 #define HSW_PTE_ADDR_ENCODE(addr)	HSW_GTT_ADDR_ENCODE(addr)
 
+/* GEN6 PPGTT resembles a 2 level page table:
+ * 31:22 | 21:12 |  11:0
+ *  PDE  |  PTE  | offset
+ */
+#define GEN6_PDE_SHIFT			22
+#define GEN6_PPGTT_PD_ENTRIES		512
+#define GEN6_PDE_MASK			(GEN6_PPGTT_PD_ENTRIES-1)
+#define GEN6_PTE_SHIFT			12
+#define GEN6_PPGTT_PT_ENTRIES (PAGE_SIZE / sizeof(gen6_gtt_pte_t))
+#define GEN6_PTE_MASK			(GEN6_PPGTT_PT_ENTRIES-1)
+
+static inline uint32_t gen6_pte_index(uint32_t address)
+{
+	return (address >> GEN6_PTE_SHIFT) & GEN6_PTE_MASK;
+}
+
+static inline uint32_t gen6_pde_index(uint32_t address)
+{
+	return (address >> GEN6_PDE_SHIFT) & GEN6_PDE_MASK;
+}
+
 /* Cacheability Control is a 4-bit value. The low three bits are stored in *
  * bits 3:1 of the PTE, while the fourth bit is stored in bit 11 of the PTE.
  */
@@ -63,6 +82,11 @@ typedef gen8_gtt_pte_t gen8_ppgtt_pde_t;
 #define HSW_WT_ELLC_LLC_AGE0		HSW_CACHEABILITY_CONTROL(0x6)
 #define HSW_WT_ELLC_LLC_AGE3		HSW_CACHEABILITY_CONTROL(0x7)
 
+#define PPAT_UNCACHED_INDEX		(_PAGE_PWT | _PAGE_PCD)
+#define PPAT_CACHED_PDE_INDEX		0 /* WB LLC */
+#define PPAT_CACHED_INDEX		_PAGE_PAT /* WB LLCeLLC */
+#define PPAT_DISPLAY_ELLC_INDEX		_PAGE_PCD /* WT eLLC */
+
 #define GEN8_PTES_PER_PAGE		(PAGE_SIZE / sizeof(gen8_gtt_pte_t))
 #define GEN8_PDES_PER_PAGE		(PAGE_SIZE / sizeof(gen8_ppgtt_pde_t))
 
@@ -71,6 +95,10 @@ typedef gen8_gtt_pte_t gen8_ppgtt_pde_t;
  * PDPE  |  PDE  |  PTE  | offset
  * The difference as compared to normal x86 3 level page table is the PDPEs are
  * programmed via register.
+ *
+ * The x86 pagetable code is flexible in its ability to handle varying page
+ * table depths via abstracted PGDIR/PUD/PMD/PTE. I've opted to not do this and
+ * instead replicate the interesting functionality.
  */
 #define GEN8_PDPE_SHIFT			30
 #define GEN8_PDPE_MASK			0x3
@@ -79,10 +107,31 @@ typedef gen8_gtt_pte_t gen8_ppgtt_pde_t;
 #define GEN8_PTE_SHIFT			12
 #define GEN8_PTE_MASK			0x1ff
 
-#define PPAT_UNCACHED_INDEX		(_PAGE_PWT | _PAGE_PCD)
-#define PPAT_CACHED_PDE_INDEX		0 /* WB LLC */
-#define PPAT_CACHED_INDEX		_PAGE_PAT /* WB LLCeLLC */
-#define PPAT_DISPLAY_ELLC_INDEX		_PAGE_PCD /* WT eLLC */
+static inline uint32_t gen8_pte_index(uint64_t address)
+{
+	return (address >> GEN8_PTE_SHIFT) & GEN8_PTE_MASK;
+}
+
+static inline uint32_t gen8_pde_index(uint64_t address)
+{
+	return (address >> GEN8_PDE_SHIFT) & GEN8_PDE_MASK;
+}
+
+static inline uint32_t gen8_pdpe_index(uint64_t address)
+{
+	return (address >> GEN8_PDPE_SHIFT) & GEN8_PDPE_MASK;
+}
+
+static inline uint32_t gen8_pml4e_index(uint64_t address)
+{
+	BUG();
+}
+
+/* Useful for 3 level page table functions */
+static inline uint32_t gen8_pde_count(uint64_t start, uint64_t length)
+{
+	return ((length - start) / PAGE_SIZE) << GEN8_PDE_SHIFT;
+}
 
 static void ppgtt_bind_vma(struct i915_vma *vma,
 			   enum i915_cache_level cache_level,
@@ -274,9 +323,9 @@ static void gen8_ppgtt_clear_range(struct i915_address_space *vm,
 	struct i915_hw_ppgtt *ppgtt =
 		container_of(vm, struct i915_hw_ppgtt, base);
 	gen8_gtt_pte_t *pt_vaddr, scratch_pte;
-	unsigned pdpe = start >> GEN8_PDPE_SHIFT & GEN8_PDPE_MASK;
-	unsigned pde = start >> GEN8_PDE_SHIFT & GEN8_PDE_MASK;
-	unsigned pte = start >> GEN8_PTE_SHIFT & GEN8_PTE_MASK;
+	unsigned pdpe = gen8_pdpe_index(start);
+	unsigned pde = gen8_pde_index(start);
+	unsigned pte = gen8_pte_index(start);
 	unsigned num_entries = (length - start) >> PAGE_SHIFT;
 	unsigned last_pte, i;
 
@@ -315,9 +364,9 @@ static void gen8_ppgtt_insert_entries(struct i915_address_space *vm,
 	struct i915_hw_ppgtt *ppgtt =
 		container_of(vm, struct i915_hw_ppgtt, base);
 	gen8_gtt_pte_t *pt_vaddr;
-	unsigned pdpe = start >> GEN8_PDPE_SHIFT & GEN8_PDPE_MASK;
-	unsigned pde = start >> GEN8_PDE_SHIFT & GEN8_PDE_MASK;
-	unsigned pte = start >> GEN8_PTE_SHIFT & GEN8_PTE_MASK;
+	unsigned pdpe = gen8_pdpe_index(start);
+	unsigned pde = gen8_pde_index(start);
+	unsigned pte = gen8_pte_index(start);
 	struct sg_page_iter sg_iter;
 
 	pt_vaddr = NULL;
@@ -661,9 +710,9 @@ static void gen6_dump_ppgtt(struct i915_hw_ppgtt *ppgtt, struct seq_file *m)
 		seq_printf(m, "\tPDE: %x\n", pd_entry);
 
 		pt_vaddr = kmap_atomic(ppgtt->pt_pages[pde]);
-		for (pte = 0; pte < I915_PPGTT_PT_ENTRIES; pte+=4) {
+		for (pte = 0; pte < GEN6_PPGTT_PT_ENTRIES; pte+=4) {
 			unsigned long va =
-				(pde * PAGE_SIZE * I915_PPGTT_PT_ENTRIES) +
+				(pde * PAGE_SIZE * GEN6_PPGTT_PT_ENTRIES) +
 				(pte * PAGE_SIZE);
 			int i;
 			bool found = false;
@@ -938,29 +987,28 @@ static void gen6_ppgtt_clear_range(struct i915_address_space *vm,
 	struct i915_hw_ppgtt *ppgtt =
 		container_of(vm, struct i915_hw_ppgtt, base);
 	gen6_gtt_pte_t *pt_vaddr, scratch_pte;
-	unsigned first_entry = start >> PAGE_SHIFT;
+	unsigned pde = gen6_pde_index(start);
 	unsigned num_entries = (length - start) >> PAGE_SHIFT;
-	unsigned act_pt = first_entry / I915_PPGTT_PT_ENTRIES;
-	unsigned first_pte = first_entry % I915_PPGTT_PT_ENTRIES;
+	unsigned pte = gen6_pte_index(start);
 	unsigned last_pte, i;
 
 	scratch_pte = vm->pte_encode(vm->scratch.addr, I915_CACHE_LLC, true);
 
 	while (num_entries) {
-		last_pte = first_pte + num_entries;
-		if (last_pte > I915_PPGTT_PT_ENTRIES)
-			last_pte = I915_PPGTT_PT_ENTRIES;
+		last_pte = pte + num_entries;
+		if (last_pte > GEN6_PPGTT_PT_ENTRIES)
+			last_pte = GEN6_PPGTT_PT_ENTRIES;
 
-		pt_vaddr = kmap_atomic(ppgtt->pt_pages[act_pt]);
+		pt_vaddr = kmap_atomic(ppgtt->pt_pages[pde]);
 
-		for (i = first_pte; i < last_pte; i++)
+		for (i = pte; i < last_pte; i++)
 			pt_vaddr[i] = scratch_pte;
 
 		kunmap_atomic(pt_vaddr);
 
-		num_entries -= last_pte - first_pte;
-		first_pte = 0;
-		act_pt++;
+		num_entries -= last_pte - pte;
+		pte = 0;
+		pde++;
 	}
 }
 
@@ -972,24 +1020,23 @@ static void gen6_ppgtt_insert_entries(struct i915_address_space *vm,
 	struct i915_hw_ppgtt *ppgtt =
 		container_of(vm, struct i915_hw_ppgtt, base);
 	gen6_gtt_pte_t *pt_vaddr;
-	unsigned first_entry = start >> PAGE_SHIFT;
-	unsigned act_pt = first_entry / I915_PPGTT_PT_ENTRIES;
-	unsigned act_pte = first_entry % I915_PPGTT_PT_ENTRIES;
+	unsigned pde = gen6_pde_index(start);
+	unsigned pte = gen6_pte_index(start);
 	struct sg_page_iter sg_iter;
 
 	pt_vaddr = NULL;
 	for_each_sg_page(pages->sgl, &sg_iter, pages->nents, 0) {
 		if (pt_vaddr == NULL)
-			pt_vaddr = kmap_atomic(ppgtt->pt_pages[act_pt]);
+			pt_vaddr = kmap_atomic(ppgtt->pt_pages[pde]);
 
-		pt_vaddr[act_pte] =
+		pt_vaddr[pte] =
 			vm->pte_encode(sg_page_iter_dma_address(&sg_iter),
 				       cache_level, true);
-		if (++act_pte == I915_PPGTT_PT_ENTRIES) {
+		if (++pte == GEN6_PPGTT_PT_ENTRIES) {
 			kunmap_atomic(pt_vaddr);
 			pt_vaddr = NULL;
-			act_pt++;
-			act_pte = 0;
+			pde++;
+			pte = 0;
 		}
 	}
 	if (pt_vaddr)
@@ -1173,7 +1220,7 @@ static int gen6_ppgtt_init(struct i915_hw_ppgtt *ppgtt)
 	ppgtt->base.cleanup = gen6_ppgtt_cleanup;
 	ppgtt->base.scratch = dev_priv->gtt.base.scratch;
 	ppgtt->base.start = 0;
-	ppgtt->base.total = GEN6_PPGTT_PD_ENTRIES * I915_PPGTT_PT_ENTRIES * PAGE_SIZE;
+	ppgtt->base.total = GEN6_PPGTT_PD_ENTRIES * GEN6_PPGTT_PT_ENTRIES * PAGE_SIZE;
 	ppgtt->debug_dump = gen6_dump_ppgtt;
 
 	ppgtt->pd_offset =
-- 
1.9.0

^ permalink raw reply related	[flat|nested] 7+ messages in thread

* Re: [PATCH 4/9] [v4] drm/i915: Make clear/insert vfuncs args absolute
  2014-02-26  2:13 ` [PATCH 4/9] [v4] drm/i915: Make clear/insert vfuncs args absolute Ben Widawsky
  2014-02-26  3:52   ` [PATCH] drm/i915: Page table helpers Ben Widawsky
@ 2014-02-26  7:27   ` Ben Widawsky
  2014-02-26  7:36     ` Ben Widawsky
  1 sibling, 1 reply; 7+ messages in thread
From: Ben Widawsky @ 2014-02-26  7:27 UTC (permalink / raw)
  Cc: Intel GFX

On Tue, Feb 25, 2014 at 06:13:44PM -0800, Ben Widawsky wrote:
> This patch converts insert_entries and clear_range, both functions which
> are specific to the VM. These functions tend to encapsulate the gen
> specific PTE writes. Passing absolute addresses to the insert_entries,
> and clear_range will help make the logic clearer within the functions as
> to what's going on. Currently, all callers simply do the appropriate
> page shift, which IMO, ends up looking weird with an upcoming change for
> the gen8 page table allocations.
> 
> Up until now, the PPGTT was a funky 2 level page table. GEN8 changes
> this to look more like a 3 level page table, and to that extent we need
> a significant amount more memory simply for the page tables. To address
> this, the allocations will be split up in finer amounts.
> 
> v2: Replace size_t with uint64_t (Chris, Imre)
> 
> v3: Fix size in gen8_ppgtt_init (Ben)
> Fix Size in i915_gem_suspend_gtt_mappings/restore (Imre)
> 
> v4: Wherever length was introduced in the API, the num_entries
> calculation should have been (length - start) as opposed to just length.
> As this is only a bug on clearing entries, it wasn't easily apparent.
> (Ben)
> 
> Reviewed-by: Imre Deak <imre.deak@intel.com>
> Signed-off-by: Ben Widawsky <ben@bwidawsk.net>

Give me a sec on this one.... something is still busted.

> ---
>  drivers/gpu/drm/i915/i915_drv.h     |  6 +--
>  drivers/gpu/drm/i915/i915_gem_gtt.c | 90 +++++++++++++++++++++----------------
>  2 files changed, 54 insertions(+), 42 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index 57556fb..ab23bfd 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -652,12 +652,12 @@ struct i915_address_space {
>  				     enum i915_cache_level level,
>  				     bool valid); /* Create a valid PTE */
>  	void (*clear_range)(struct i915_address_space *vm,
> -			    unsigned int first_entry,
> -			    unsigned int num_entries,
> +			    uint64_t start,
> +			    uint64_t length,
>  			    bool use_scratch);
>  	void (*insert_entries)(struct i915_address_space *vm,
>  			       struct sg_table *st,
> -			       unsigned int first_entry,
> +			       uint64_t start,
>  			       enum i915_cache_level cache_level);
>  	void (*cleanup)(struct i915_address_space *vm);
>  };
> diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
> index beca571..23e722b 100644
> --- a/drivers/gpu/drm/i915/i915_gem_gtt.c
> +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
> @@ -254,13 +254,15 @@ static int gen8_mm_switch(struct i915_hw_ppgtt *ppgtt,
>  }
>  
>  static void gen8_ppgtt_clear_range(struct i915_address_space *vm,
> -				   unsigned first_entry,
> -				   unsigned num_entries,
> +				   uint64_t start,
> +				   uint64_t length,
>  				   bool use_scratch)
>  {
>  	struct i915_hw_ppgtt *ppgtt =
>  		container_of(vm, struct i915_hw_ppgtt, base);
>  	gen8_gtt_pte_t *pt_vaddr, scratch_pte;
> +	unsigned first_entry = start >> PAGE_SHIFT;
> +	unsigned num_entries = (length - start) >> PAGE_SHIFT;
>  	unsigned act_pt = first_entry / GEN8_PTES_PER_PAGE;
>  	unsigned first_pte = first_entry % GEN8_PTES_PER_PAGE;
>  	unsigned last_pte, i;
> @@ -290,12 +292,13 @@ static void gen8_ppgtt_clear_range(struct i915_address_space *vm,
>  
>  static void gen8_ppgtt_insert_entries(struct i915_address_space *vm,
>  				      struct sg_table *pages,
> -				      unsigned first_entry,
> +				      uint64_t start,
>  				      enum i915_cache_level cache_level)
>  {
>  	struct i915_hw_ppgtt *ppgtt =
>  		container_of(vm, struct i915_hw_ppgtt, base);
>  	gen8_gtt_pte_t *pt_vaddr;
> +	unsigned first_entry = start >> PAGE_SHIFT;
>  	unsigned act_pt = first_entry / GEN8_PTES_PER_PAGE;
>  	unsigned act_pte = first_entry % GEN8_PTES_PER_PAGE;
>  	struct sg_page_iter sg_iter;
> @@ -539,7 +542,7 @@ static int gen8_ppgtt_init(struct i915_hw_ppgtt *ppgtt, uint64_t size)
>  	ppgtt->base.total = ppgtt->num_pt_pages * GEN8_PTES_PER_PAGE * PAGE_SIZE;
>  
>  	ppgtt->base.clear_range(&ppgtt->base, 0,
> -				ppgtt->num_pd_entries * GEN8_PTES_PER_PAGE,
> +				ppgtt->num_pd_entries * GEN8_PTES_PER_PAGE * PAGE_SIZE,
>  				true);
>  
>  	DRM_DEBUG_DRIVER("Allocated %d pages for page directories (%d wasted)\n",
> @@ -854,13 +857,15 @@ static int gen6_ppgtt_enable(struct i915_hw_ppgtt *ppgtt)
>  
>  /* PPGTT support for Sandybdrige/Gen6 and later */
>  static void gen6_ppgtt_clear_range(struct i915_address_space *vm,
> -				   unsigned first_entry,
> -				   unsigned num_entries,
> +				   uint64_t start,
> +				   uint64_t length,
>  				   bool use_scratch)
>  {
>  	struct i915_hw_ppgtt *ppgtt =
>  		container_of(vm, struct i915_hw_ppgtt, base);
>  	gen6_gtt_pte_t *pt_vaddr, scratch_pte;
> +	unsigned first_entry = start >> PAGE_SHIFT;
> +	unsigned num_entries = (length - start) >> PAGE_SHIFT;
>  	unsigned act_pt = first_entry / I915_PPGTT_PT_ENTRIES;
>  	unsigned first_pte = first_entry % I915_PPGTT_PT_ENTRIES;
>  	unsigned last_pte, i;
> @@ -887,12 +892,13 @@ static void gen6_ppgtt_clear_range(struct i915_address_space *vm,
>  
>  static void gen6_ppgtt_insert_entries(struct i915_address_space *vm,
>  				      struct sg_table *pages,
> -				      unsigned first_entry,
> +				      uint64_t start,
>  				      enum i915_cache_level cache_level)
>  {
>  	struct i915_hw_ppgtt *ppgtt =
>  		container_of(vm, struct i915_hw_ppgtt, base);
>  	gen6_gtt_pte_t *pt_vaddr;
> +	unsigned first_entry = start >> PAGE_SHIFT;
>  	unsigned act_pt = first_entry / I915_PPGTT_PT_ENTRIES;
>  	unsigned act_pte = first_entry % I915_PPGTT_PT_ENTRIES;
>  	struct sg_page_iter sg_iter;
> @@ -1024,8 +1030,7 @@ alloc:
>  		ppgtt->pt_dma_addr[i] = pt_addr;
>  	}
>  
> -	ppgtt->base.clear_range(&ppgtt->base, 0,
> -				ppgtt->num_pd_entries * I915_PPGTT_PT_ENTRIES, true);
> +	ppgtt->base.clear_range(&ppgtt->base, 0, ppgtt->base.total, true);
>  	ppgtt->debug_dump = gen6_dump_ppgtt;
>  
>  	DRM_DEBUG_DRIVER("Allocated pde space (%ldM) at GTT entry: %lx\n",
> @@ -1089,20 +1094,17 @@ ppgtt_bind_vma(struct i915_vma *vma,
>  	       enum i915_cache_level cache_level,
>  	       u32 flags)
>  {
> -	const unsigned long entry = vma->node.start >> PAGE_SHIFT;
> -
>  	WARN_ON(flags);
>  
> -	vma->vm->insert_entries(vma->vm, vma->obj->pages, entry, cache_level);
> +	vma->vm->insert_entries(vma->vm, vma->obj->pages, vma->node.start,
> +				cache_level);
>  }
>  
>  static void ppgtt_unbind_vma(struct i915_vma *vma)
>  {
> -	const unsigned long entry = vma->node.start >> PAGE_SHIFT;
> -
>  	vma->vm->clear_range(vma->vm,
> -			     entry,
> -			     vma->obj->base.size >> PAGE_SHIFT,
> +			     vma->node.start,
> +			     vma->obj->base.size,
>  			     true);
>  }
>  
> @@ -1186,8 +1188,8 @@ void i915_gem_suspend_gtt_mappings(struct drm_device *dev)
>  	i915_check_and_clear_faults(dev);
>  
>  	dev_priv->gtt.base.clear_range(&dev_priv->gtt.base,
> -				       dev_priv->gtt.base.start / PAGE_SIZE,
> -				       dev_priv->gtt.base.total / PAGE_SIZE,
> +				       dev_priv->gtt.base.start,
> +				       dev_priv->gtt.base.total,
>  				       false);
>  }
>  
> @@ -1201,8 +1203,8 @@ void i915_gem_restore_gtt_mappings(struct drm_device *dev)
>  
>  	/* First fill our portion of the GTT with scratch pages */
>  	dev_priv->gtt.base.clear_range(&dev_priv->gtt.base,
> -				       dev_priv->gtt.base.start / PAGE_SIZE,
> -				       dev_priv->gtt.base.total / PAGE_SIZE,
> +				       dev_priv->gtt.base.start,
> +				       dev_priv->gtt.base.total,
>  				       true);
>  
>  	list_for_each_entry(obj, &dev_priv->mm.bound_list, global_list) {
> @@ -1263,10 +1265,11 @@ static inline void gen8_set_pte(void __iomem *addr, gen8_gtt_pte_t pte)
>  
>  static void gen8_ggtt_insert_entries(struct i915_address_space *vm,
>  				     struct sg_table *st,
> -				     unsigned int first_entry,
> +				     uint64_t start,
>  				     enum i915_cache_level level)
>  {
>  	struct drm_i915_private *dev_priv = vm->dev->dev_private;
> +	unsigned first_entry = start >> PAGE_SHIFT;
>  	gen8_gtt_pte_t __iomem *gtt_entries =
>  		(gen8_gtt_pte_t __iomem *)dev_priv->gtt.gsm + first_entry;
>  	int i = 0;
> @@ -1308,10 +1311,11 @@ static void gen8_ggtt_insert_entries(struct i915_address_space *vm,
>   */
>  static void gen6_ggtt_insert_entries(struct i915_address_space *vm,
>  				     struct sg_table *st,
> -				     unsigned int first_entry,
> +				     uint64_t start,
>  				     enum i915_cache_level level)
>  {
>  	struct drm_i915_private *dev_priv = vm->dev->dev_private;
> +	unsigned first_entry = start >> PAGE_SHIFT;
>  	gen6_gtt_pte_t __iomem *gtt_entries =
>  		(gen6_gtt_pte_t __iomem *)dev_priv->gtt.gsm + first_entry;
>  	int i = 0;
> @@ -1343,11 +1347,13 @@ static void gen6_ggtt_insert_entries(struct i915_address_space *vm,
>  }
>  
>  static void gen8_ggtt_clear_range(struct i915_address_space *vm,
> -				  unsigned int first_entry,
> -				  unsigned int num_entries,
> +				  uint64_t start,
> +				  uint64_t length,
>  				  bool use_scratch)
>  {
>  	struct drm_i915_private *dev_priv = vm->dev->dev_private;
> +	unsigned first_entry = start >> PAGE_SHIFT;
> +	unsigned num_entries = (length - start) >> PAGE_SHIFT;
>  	gen8_gtt_pte_t scratch_pte, __iomem *gtt_base =
>  		(gen8_gtt_pte_t __iomem *) dev_priv->gtt.gsm + first_entry;
>  	const int max_entries = gtt_total_entries(dev_priv->gtt) - first_entry;
> @@ -1367,11 +1373,13 @@ static void gen8_ggtt_clear_range(struct i915_address_space *vm,
>  }
>  
>  static void gen6_ggtt_clear_range(struct i915_address_space *vm,
> -				  unsigned int first_entry,
> -				  unsigned int num_entries,
> +				  uint64_t start,
> +				  uint64_t length,
>  				  bool use_scratch)
>  {
>  	struct drm_i915_private *dev_priv = vm->dev->dev_private;
> +	unsigned first_entry = start >> PAGE_SHIFT;
> +	unsigned num_entries = (length - start) >> PAGE_SHIFT;
>  	gen6_gtt_pte_t scratch_pte, __iomem *gtt_base =
>  		(gen6_gtt_pte_t __iomem *) dev_priv->gtt.gsm + first_entry;
>  	const int max_entries = gtt_total_entries(dev_priv->gtt) - first_entry;
> @@ -1404,10 +1412,12 @@ static void i915_ggtt_bind_vma(struct i915_vma *vma,
>  }
>  
>  static void i915_ggtt_clear_range(struct i915_address_space *vm,
> -				  unsigned int first_entry,
> -				  unsigned int num_entries,
> +				  uint64_t start,
> +				  uint64_t length,
>  				  bool unused)
>  {
> +	unsigned first_entry = start >> PAGE_SHIFT;
> +	unsigned num_entries = (length - start) >> PAGE_SHIFT;
>  	intel_gtt_clear_range(first_entry, num_entries);
>  }
>  
> @@ -1428,7 +1438,6 @@ static void ggtt_bind_vma(struct i915_vma *vma,
>  	struct drm_device *dev = vma->vm->dev;
>  	struct drm_i915_private *dev_priv = dev->dev_private;
>  	struct drm_i915_gem_object *obj = vma->obj;
> -	const unsigned long entry = vma->node.start >> PAGE_SHIFT;
>  
>  	/* If there is no aliasing PPGTT, or the caller needs a global mapping,
>  	 * or we have a global mapping already but the cacheability flags have
> @@ -1444,7 +1453,8 @@ static void ggtt_bind_vma(struct i915_vma *vma,
>  	if (!dev_priv->mm.aliasing_ppgtt || flags & GLOBAL_BIND) {
>  		if (!obj->has_global_gtt_mapping ||
>  		    (cache_level != obj->cache_level)) {
> -			vma->vm->insert_entries(vma->vm, obj->pages, entry,
> +			vma->vm->insert_entries(vma->vm, obj->pages,
> +						vma->node.start,
>  						cache_level);
>  			obj->has_global_gtt_mapping = 1;
>  		}
> @@ -1455,7 +1465,9 @@ static void ggtt_bind_vma(struct i915_vma *vma,
>  	     (cache_level != obj->cache_level))) {
>  		struct i915_hw_ppgtt *appgtt = dev_priv->mm.aliasing_ppgtt;
>  		appgtt->base.insert_entries(&appgtt->base,
> -					    vma->obj->pages, entry, cache_level);
> +					    vma->obj->pages,
> +					    vma->node.start,
> +					    cache_level);
>  		vma->obj->has_aliasing_ppgtt_mapping = 1;
>  	}
>  }
> @@ -1465,11 +1477,11 @@ static void ggtt_unbind_vma(struct i915_vma *vma)
>  	struct drm_device *dev = vma->vm->dev;
>  	struct drm_i915_private *dev_priv = dev->dev_private;
>  	struct drm_i915_gem_object *obj = vma->obj;
> -	const unsigned long entry = vma->node.start >> PAGE_SHIFT;
>  
>  	if (obj->has_global_gtt_mapping) {
> -		vma->vm->clear_range(vma->vm, entry,
> -				     vma->obj->base.size >> PAGE_SHIFT,
> +		vma->vm->clear_range(vma->vm,
> +				     vma->node.start,
> +				     obj->base.size,
>  				     true);
>  		obj->has_global_gtt_mapping = 0;
>  	}
> @@ -1477,8 +1489,8 @@ static void ggtt_unbind_vma(struct i915_vma *vma)
>  	if (obj->has_aliasing_ppgtt_mapping) {
>  		struct i915_hw_ppgtt *appgtt = dev_priv->mm.aliasing_ppgtt;
>  		appgtt->base.clear_range(&appgtt->base,
> -					 entry,
> -					 obj->base.size >> PAGE_SHIFT,
> +					 vma->node.start,
> +					 obj->base.size,
>  					 true);
>  		obj->has_aliasing_ppgtt_mapping = 0;
>  	}
> @@ -1563,14 +1575,14 @@ void i915_gem_setup_global_gtt(struct drm_device *dev,
>  
>  	/* Clear any non-preallocated blocks */
>  	drm_mm_for_each_hole(entry, &ggtt_vm->mm, hole_start, hole_end) {
> -		const unsigned long count = (hole_end - hole_start) / PAGE_SIZE;
>  		DRM_DEBUG_KMS("clearing unused GTT space: [%lx, %lx]\n",
>  			      hole_start, hole_end);
> -		ggtt_vm->clear_range(ggtt_vm, hole_start / PAGE_SIZE, count, true);
> +		ggtt_vm->clear_range(ggtt_vm, hole_start,
> +				     hole_end - hole_start, true);
>  	}
>  
>  	/* And finally clear the reserved guard page */
> -	ggtt_vm->clear_range(ggtt_vm, end / PAGE_SIZE - 1, 1, true);
> +	ggtt_vm->clear_range(ggtt_vm, end - PAGE_SIZE, PAGE_SIZE, true);
>  }
>  
>  void i915_gem_init_global_gtt(struct drm_device *dev)
> -- 
> 1.9.0
> 

-- 
Ben Widawsky, Intel Open Source Technology Center

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH 4/9] [v4] drm/i915: Make clear/insert vfuncs args absolute
  2014-02-26  7:27   ` [PATCH 4/9] [v4] drm/i915: Make clear/insert vfuncs args absolute Ben Widawsky
@ 2014-02-26  7:36     ` Ben Widawsky
  2014-03-05 14:28       ` Daniel Vetter
  0 siblings, 1 reply; 7+ messages in thread
From: Ben Widawsky @ 2014-02-26  7:36 UTC (permalink / raw)
  To: Intel GFX

On Tue, Feb 25, 2014 at 11:27:15PM -0800, Ben Widawsky wrote:
> On Tue, Feb 25, 2014 at 06:13:44PM -0800, Ben Widawsky wrote:
> > This patch converts insert_entries and clear_range, both functions which
> > are specific to the VM. These functions tend to encapsulate the gen
> > specific PTE writes. Passing absolute addresses to the insert_entries,
> > and clear_range will help make the logic clearer within the functions as
> > to what's going on. Currently, all callers simply do the appropriate
> > page shift, which IMO, ends up looking weird with an upcoming change for
> > the gen8 page table allocations.
> > 
> > Up until now, the PPGTT was a funky 2 level page table. GEN8 changes
> > this to look more like a 3 level page table, and to that extent we need
> > a significant amount more memory simply for the page tables. To address
> > this, the allocations will be split up in finer amounts.
> > 
> > v2: Replace size_t with uint64_t (Chris, Imre)
> > 
> > v3: Fix size in gen8_ppgtt_init (Ben)
> > Fix Size in i915_gem_suspend_gtt_mappings/restore (Imre)
> > 
> > v4: Wherever length was introduced in the API, the num_entries
> > calculation should have been (length - start) as opposed to just length.
> > As this is only a bug on clearing entries, it wasn't easily apparent.
> > (Ben)
> > 
> > Reviewed-by: Imre Deak <imre.deak@intel.com>
> > Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
> 
> Give me a sec on this one.... something is still busted.
> 

Ah, yes. v4 is stupid. v3 is still the right one. I got confused by some
future changes I made. Please ignore v4.

> > ---
> >  drivers/gpu/drm/i915/i915_drv.h     |  6 +--
> >  drivers/gpu/drm/i915/i915_gem_gtt.c | 90 +++++++++++++++++++++----------------
> >  2 files changed, 54 insertions(+), 42 deletions(-)
> > 
> > diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> > index 57556fb..ab23bfd 100644
> > --- a/drivers/gpu/drm/i915/i915_drv.h
> > +++ b/drivers/gpu/drm/i915/i915_drv.h
> > @@ -652,12 +652,12 @@ struct i915_address_space {
> >  				     enum i915_cache_level level,
> >  				     bool valid); /* Create a valid PTE */
> >  	void (*clear_range)(struct i915_address_space *vm,
> > -			    unsigned int first_entry,
> > -			    unsigned int num_entries,
> > +			    uint64_t start,
> > +			    uint64_t length,
> >  			    bool use_scratch);
> >  	void (*insert_entries)(struct i915_address_space *vm,
> >  			       struct sg_table *st,
> > -			       unsigned int first_entry,
> > +			       uint64_t start,
> >  			       enum i915_cache_level cache_level);
> >  	void (*cleanup)(struct i915_address_space *vm);
> >  };
> > diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
> > index beca571..23e722b 100644
> > --- a/drivers/gpu/drm/i915/i915_gem_gtt.c
> > +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
> > @@ -254,13 +254,15 @@ static int gen8_mm_switch(struct i915_hw_ppgtt *ppgtt,
> >  }
> >  
> >  static void gen8_ppgtt_clear_range(struct i915_address_space *vm,
> > -				   unsigned first_entry,
> > -				   unsigned num_entries,
> > +				   uint64_t start,
> > +				   uint64_t length,
> >  				   bool use_scratch)
> >  {
> >  	struct i915_hw_ppgtt *ppgtt =
> >  		container_of(vm, struct i915_hw_ppgtt, base);
> >  	gen8_gtt_pte_t *pt_vaddr, scratch_pte;
> > +	unsigned first_entry = start >> PAGE_SHIFT;
> > +	unsigned num_entries = (length - start) >> PAGE_SHIFT;
> >  	unsigned act_pt = first_entry / GEN8_PTES_PER_PAGE;
> >  	unsigned first_pte = first_entry % GEN8_PTES_PER_PAGE;
> >  	unsigned last_pte, i;
> > @@ -290,12 +292,13 @@ static void gen8_ppgtt_clear_range(struct i915_address_space *vm,
> >  
> >  static void gen8_ppgtt_insert_entries(struct i915_address_space *vm,
> >  				      struct sg_table *pages,
> > -				      unsigned first_entry,
> > +				      uint64_t start,
> >  				      enum i915_cache_level cache_level)
> >  {
> >  	struct i915_hw_ppgtt *ppgtt =
> >  		container_of(vm, struct i915_hw_ppgtt, base);
> >  	gen8_gtt_pte_t *pt_vaddr;
> > +	unsigned first_entry = start >> PAGE_SHIFT;
> >  	unsigned act_pt = first_entry / GEN8_PTES_PER_PAGE;
> >  	unsigned act_pte = first_entry % GEN8_PTES_PER_PAGE;
> >  	struct sg_page_iter sg_iter;
> > @@ -539,7 +542,7 @@ static int gen8_ppgtt_init(struct i915_hw_ppgtt *ppgtt, uint64_t size)
> >  	ppgtt->base.total = ppgtt->num_pt_pages * GEN8_PTES_PER_PAGE * PAGE_SIZE;
> >  
> >  	ppgtt->base.clear_range(&ppgtt->base, 0,
> > -				ppgtt->num_pd_entries * GEN8_PTES_PER_PAGE,
> > +				ppgtt->num_pd_entries * GEN8_PTES_PER_PAGE * PAGE_SIZE,
> >  				true);
> >  
> >  	DRM_DEBUG_DRIVER("Allocated %d pages for page directories (%d wasted)\n",
> > @@ -854,13 +857,15 @@ static int gen6_ppgtt_enable(struct i915_hw_ppgtt *ppgtt)
> >  
> >  /* PPGTT support for Sandybdrige/Gen6 and later */
> >  static void gen6_ppgtt_clear_range(struct i915_address_space *vm,
> > -				   unsigned first_entry,
> > -				   unsigned num_entries,
> > +				   uint64_t start,
> > +				   uint64_t length,
> >  				   bool use_scratch)
> >  {
> >  	struct i915_hw_ppgtt *ppgtt =
> >  		container_of(vm, struct i915_hw_ppgtt, base);
> >  	gen6_gtt_pte_t *pt_vaddr, scratch_pte;
> > +	unsigned first_entry = start >> PAGE_SHIFT;
> > +	unsigned num_entries = (length - start) >> PAGE_SHIFT;
> >  	unsigned act_pt = first_entry / I915_PPGTT_PT_ENTRIES;
> >  	unsigned first_pte = first_entry % I915_PPGTT_PT_ENTRIES;
> >  	unsigned last_pte, i;
> > @@ -887,12 +892,13 @@ static void gen6_ppgtt_clear_range(struct i915_address_space *vm,
> >  
> >  static void gen6_ppgtt_insert_entries(struct i915_address_space *vm,
> >  				      struct sg_table *pages,
> > -				      unsigned first_entry,
> > +				      uint64_t start,
> >  				      enum i915_cache_level cache_level)
> >  {
> >  	struct i915_hw_ppgtt *ppgtt =
> >  		container_of(vm, struct i915_hw_ppgtt, base);
> >  	gen6_gtt_pte_t *pt_vaddr;
> > +	unsigned first_entry = start >> PAGE_SHIFT;
> >  	unsigned act_pt = first_entry / I915_PPGTT_PT_ENTRIES;
> >  	unsigned act_pte = first_entry % I915_PPGTT_PT_ENTRIES;
> >  	struct sg_page_iter sg_iter;
> > @@ -1024,8 +1030,7 @@ alloc:
> >  		ppgtt->pt_dma_addr[i] = pt_addr;
> >  	}
> >  
> > -	ppgtt->base.clear_range(&ppgtt->base, 0,
> > -				ppgtt->num_pd_entries * I915_PPGTT_PT_ENTRIES, true);
> > +	ppgtt->base.clear_range(&ppgtt->base, 0, ppgtt->base.total, true);
> >  	ppgtt->debug_dump = gen6_dump_ppgtt;
> >  
> >  	DRM_DEBUG_DRIVER("Allocated pde space (%ldM) at GTT entry: %lx\n",
> > @@ -1089,20 +1094,17 @@ ppgtt_bind_vma(struct i915_vma *vma,
> >  	       enum i915_cache_level cache_level,
> >  	       u32 flags)
> >  {
> > -	const unsigned long entry = vma->node.start >> PAGE_SHIFT;
> > -
> >  	WARN_ON(flags);
> >  
> > -	vma->vm->insert_entries(vma->vm, vma->obj->pages, entry, cache_level);
> > +	vma->vm->insert_entries(vma->vm, vma->obj->pages, vma->node.start,
> > +				cache_level);
> >  }
> >  
> >  static void ppgtt_unbind_vma(struct i915_vma *vma)
> >  {
> > -	const unsigned long entry = vma->node.start >> PAGE_SHIFT;
> > -
> >  	vma->vm->clear_range(vma->vm,
> > -			     entry,
> > -			     vma->obj->base.size >> PAGE_SHIFT,
> > +			     vma->node.start,
> > +			     vma->obj->base.size,
> >  			     true);
> >  }
> >  
> > @@ -1186,8 +1188,8 @@ void i915_gem_suspend_gtt_mappings(struct drm_device *dev)
> >  	i915_check_and_clear_faults(dev);
> >  
> >  	dev_priv->gtt.base.clear_range(&dev_priv->gtt.base,
> > -				       dev_priv->gtt.base.start / PAGE_SIZE,
> > -				       dev_priv->gtt.base.total / PAGE_SIZE,
> > +				       dev_priv->gtt.base.start,
> > +				       dev_priv->gtt.base.total,
> >  				       false);
> >  }
> >  
> > @@ -1201,8 +1203,8 @@ void i915_gem_restore_gtt_mappings(struct drm_device *dev)
> >  
> >  	/* First fill our portion of the GTT with scratch pages */
> >  	dev_priv->gtt.base.clear_range(&dev_priv->gtt.base,
> > -				       dev_priv->gtt.base.start / PAGE_SIZE,
> > -				       dev_priv->gtt.base.total / PAGE_SIZE,
> > +				       dev_priv->gtt.base.start,
> > +				       dev_priv->gtt.base.total,
> >  				       true);
> >  
> >  	list_for_each_entry(obj, &dev_priv->mm.bound_list, global_list) {
> > @@ -1263,10 +1265,11 @@ static inline void gen8_set_pte(void __iomem *addr, gen8_gtt_pte_t pte)
> >  
> >  static void gen8_ggtt_insert_entries(struct i915_address_space *vm,
> >  				     struct sg_table *st,
> > -				     unsigned int first_entry,
> > +				     uint64_t start,
> >  				     enum i915_cache_level level)
> >  {
> >  	struct drm_i915_private *dev_priv = vm->dev->dev_private;
> > +	unsigned first_entry = start >> PAGE_SHIFT;
> >  	gen8_gtt_pte_t __iomem *gtt_entries =
> >  		(gen8_gtt_pte_t __iomem *)dev_priv->gtt.gsm + first_entry;
> >  	int i = 0;
> > @@ -1308,10 +1311,11 @@ static void gen8_ggtt_insert_entries(struct i915_address_space *vm,
> >   */
> >  static void gen6_ggtt_insert_entries(struct i915_address_space *vm,
> >  				     struct sg_table *st,
> > -				     unsigned int first_entry,
> > +				     uint64_t start,
> >  				     enum i915_cache_level level)
> >  {
> >  	struct drm_i915_private *dev_priv = vm->dev->dev_private;
> > +	unsigned first_entry = start >> PAGE_SHIFT;
> >  	gen6_gtt_pte_t __iomem *gtt_entries =
> >  		(gen6_gtt_pte_t __iomem *)dev_priv->gtt.gsm + first_entry;
> >  	int i = 0;
> > @@ -1343,11 +1347,13 @@ static void gen6_ggtt_insert_entries(struct i915_address_space *vm,
> >  }
> >  
> >  static void gen8_ggtt_clear_range(struct i915_address_space *vm,
> > -				  unsigned int first_entry,
> > -				  unsigned int num_entries,
> > +				  uint64_t start,
> > +				  uint64_t length,
> >  				  bool use_scratch)
> >  {
> >  	struct drm_i915_private *dev_priv = vm->dev->dev_private;
> > +	unsigned first_entry = start >> PAGE_SHIFT;
> > +	unsigned num_entries = (length - start) >> PAGE_SHIFT;
> >  	gen8_gtt_pte_t scratch_pte, __iomem *gtt_base =
> >  		(gen8_gtt_pte_t __iomem *) dev_priv->gtt.gsm + first_entry;
> >  	const int max_entries = gtt_total_entries(dev_priv->gtt) - first_entry;
> > @@ -1367,11 +1373,13 @@ static void gen8_ggtt_clear_range(struct i915_address_space *vm,
> >  }
> >  
> >  static void gen6_ggtt_clear_range(struct i915_address_space *vm,
> > -				  unsigned int first_entry,
> > -				  unsigned int num_entries,
> > +				  uint64_t start,
> > +				  uint64_t length,
> >  				  bool use_scratch)
> >  {
> >  	struct drm_i915_private *dev_priv = vm->dev->dev_private;
> > +	unsigned first_entry = start >> PAGE_SHIFT;
> > +	unsigned num_entries = (length - start) >> PAGE_SHIFT;
> >  	gen6_gtt_pte_t scratch_pte, __iomem *gtt_base =
> >  		(gen6_gtt_pte_t __iomem *) dev_priv->gtt.gsm + first_entry;
> >  	const int max_entries = gtt_total_entries(dev_priv->gtt) - first_entry;
> > @@ -1404,10 +1412,12 @@ static void i915_ggtt_bind_vma(struct i915_vma *vma,
> >  }
> >  
> >  static void i915_ggtt_clear_range(struct i915_address_space *vm,
> > -				  unsigned int first_entry,
> > -				  unsigned int num_entries,
> > +				  uint64_t start,
> > +				  uint64_t length,
> >  				  bool unused)
> >  {
> > +	unsigned first_entry = start >> PAGE_SHIFT;
> > +	unsigned num_entries = (length - start) >> PAGE_SHIFT;
> >  	intel_gtt_clear_range(first_entry, num_entries);
> >  }
> >  
> > @@ -1428,7 +1438,6 @@ static void ggtt_bind_vma(struct i915_vma *vma,
> >  	struct drm_device *dev = vma->vm->dev;
> >  	struct drm_i915_private *dev_priv = dev->dev_private;
> >  	struct drm_i915_gem_object *obj = vma->obj;
> > -	const unsigned long entry = vma->node.start >> PAGE_SHIFT;
> >  
> >  	/* If there is no aliasing PPGTT, or the caller needs a global mapping,
> >  	 * or we have a global mapping already but the cacheability flags have
> > @@ -1444,7 +1453,8 @@ static void ggtt_bind_vma(struct i915_vma *vma,
> >  	if (!dev_priv->mm.aliasing_ppgtt || flags & GLOBAL_BIND) {
> >  		if (!obj->has_global_gtt_mapping ||
> >  		    (cache_level != obj->cache_level)) {
> > -			vma->vm->insert_entries(vma->vm, obj->pages, entry,
> > +			vma->vm->insert_entries(vma->vm, obj->pages,
> > +						vma->node.start,
> >  						cache_level);
> >  			obj->has_global_gtt_mapping = 1;
> >  		}
> > @@ -1455,7 +1465,9 @@ static void ggtt_bind_vma(struct i915_vma *vma,
> >  	     (cache_level != obj->cache_level))) {
> >  		struct i915_hw_ppgtt *appgtt = dev_priv->mm.aliasing_ppgtt;
> >  		appgtt->base.insert_entries(&appgtt->base,
> > -					    vma->obj->pages, entry, cache_level);
> > +					    vma->obj->pages,
> > +					    vma->node.start,
> > +					    cache_level);
> >  		vma->obj->has_aliasing_ppgtt_mapping = 1;
> >  	}
> >  }
> > @@ -1465,11 +1477,11 @@ static void ggtt_unbind_vma(struct i915_vma *vma)
> >  	struct drm_device *dev = vma->vm->dev;
> >  	struct drm_i915_private *dev_priv = dev->dev_private;
> >  	struct drm_i915_gem_object *obj = vma->obj;
> > -	const unsigned long entry = vma->node.start >> PAGE_SHIFT;
> >  
> >  	if (obj->has_global_gtt_mapping) {
> > -		vma->vm->clear_range(vma->vm, entry,
> > -				     vma->obj->base.size >> PAGE_SHIFT,
> > +		vma->vm->clear_range(vma->vm,
> > +				     vma->node.start,
> > +				     obj->base.size,
> >  				     true);
> >  		obj->has_global_gtt_mapping = 0;
> >  	}
> > @@ -1477,8 +1489,8 @@ static void ggtt_unbind_vma(struct i915_vma *vma)
> >  	if (obj->has_aliasing_ppgtt_mapping) {
> >  		struct i915_hw_ppgtt *appgtt = dev_priv->mm.aliasing_ppgtt;
> >  		appgtt->base.clear_range(&appgtt->base,
> > -					 entry,
> > -					 obj->base.size >> PAGE_SHIFT,
> > +					 vma->node.start,
> > +					 obj->base.size,
> >  					 true);
> >  		obj->has_aliasing_ppgtt_mapping = 0;
> >  	}
> > @@ -1563,14 +1575,14 @@ void i915_gem_setup_global_gtt(struct drm_device *dev,
> >  
> >  	/* Clear any non-preallocated blocks */
> >  	drm_mm_for_each_hole(entry, &ggtt_vm->mm, hole_start, hole_end) {
> > -		const unsigned long count = (hole_end - hole_start) / PAGE_SIZE;
> >  		DRM_DEBUG_KMS("clearing unused GTT space: [%lx, %lx]\n",
> >  			      hole_start, hole_end);
> > -		ggtt_vm->clear_range(ggtt_vm, hole_start / PAGE_SIZE, count, true);
> > +		ggtt_vm->clear_range(ggtt_vm, hole_start,
> > +				     hole_end - hole_start, true);
> >  	}
> >  
> >  	/* And finally clear the reserved guard page */
> > -	ggtt_vm->clear_range(ggtt_vm, end / PAGE_SIZE - 1, 1, true);
> > +	ggtt_vm->clear_range(ggtt_vm, end - PAGE_SIZE, PAGE_SIZE, true);
> >  }
> >  
> >  void i915_gem_init_global_gtt(struct drm_device *dev)
> > -- 
> > 1.9.0
> > 
> 
> -- 
> Ben Widawsky, Intel Open Source Technology Center
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/intel-gfx

-- 
Ben Widawsky, Intel Open Source Technology Center

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH 4/9] [v4] drm/i915: Make clear/insert vfuncs args absolute
  2014-02-26  7:36     ` Ben Widawsky
@ 2014-03-05 14:28       ` Daniel Vetter
  0 siblings, 0 replies; 7+ messages in thread
From: Daniel Vetter @ 2014-03-05 14:28 UTC (permalink / raw)
  To: Ben Widawsky; +Cc: Intel GFX

On Tue, Feb 25, 2014 at 11:36:15PM -0800, Ben Widawsky wrote:
> On Tue, Feb 25, 2014 at 11:27:15PM -0800, Ben Widawsky wrote:
> > On Tue, Feb 25, 2014 at 06:13:44PM -0800, Ben Widawsky wrote:
> > > This patch converts insert_entries and clear_range, both functions which
> > > are specific to the VM. These functions tend to encapsulate the gen
> > > specific PTE writes. Passing absolute addresses to the insert_entries,
> > > and clear_range will help make the logic clearer within the functions as
> > > to what's going on. Currently, all callers simply do the appropriate
> > > page shift, which IMO, ends up looking weird with an upcoming change for
> > > the gen8 page table allocations.
> > > 
> > > Up until now, the PPGTT was a funky 2 level page table. GEN8 changes
> > > this to look more like a 3 level page table, and to that extent we need
> > > a significant amount more memory simply for the page tables. To address
> > > this, the allocations will be split up in finer amounts.
> > > 
> > > v2: Replace size_t with uint64_t (Chris, Imre)
> > > 
> > > v3: Fix size in gen8_ppgtt_init (Ben)
> > > Fix Size in i915_gem_suspend_gtt_mappings/restore (Imre)
> > > 
> > > v4: Wherever length was introduced in the API, the num_entries
> > > calculation should have been (length - start) as opposed to just length.
> > > As this is only a bug on clearing entries, it wasn't easily apparent.
> > > (Ben)
> > > 
> > > Reviewed-by: Imre Deak <imre.deak@intel.com>
> > > Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
> > 
> > Give me a sec on this one.... something is still busted.
> > 
> 
> Ah, yes. v4 is stupid. v3 is still the right one. I got confused by some
> future changes I made. Please ignore v4.

Well num_entries = length - start should ring all available alarm clocks.
Either you want num_entries = length or s/length/end/. length - start
makes absolutely no sense at all ;-)

Cheers, Daniel
-- 
Daniel Vetter
Software Engineer, Intel Corporation
+41 (0) 79 365 57 48 - http://blog.ffwll.ch

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH] drm/i915: Page table helpers
  2014-02-26  3:52   ` [PATCH] drm/i915: Page table helpers Ben Widawsky
@ 2014-03-10 21:05     ` Imre Deak
  2014-03-10 21:37       ` Ben Widawsky
  0 siblings, 1 reply; 7+ messages in thread
From: Imre Deak @ 2014-03-10 21:05 UTC (permalink / raw)
  To: Ben Widawsky; +Cc: Intel GFX, Ben Widawsky

On Tue, 2014-02-25 at 19:52 -0800, Ben Widawsky wrote:
> These page table helpers make the code much cleaner. There is some
> room to use the arch/x86 header files. The reason I've opted not to is
> in several cases, the definitions are dictated by the CONFIG_ options
> which do not always indicate the restrictions in the GPU.
> 
> Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
> ---
> 
> I have this patch queued up for the next round of /stuff/ I am working on. If
> you want to pull it into this series, it's fine by me. As I deal with the code
> more, it does become more obvious what looks good, and what does not.
> 
> ---
> 
>  drivers/gpu/drm/i915/i915_gem_gtt.c | 115 +++++++++++++++++++++++++-----------
>  1 file changed, 81 insertions(+), 34 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
> index aa3ef7f..43d9129 100644
> --- a/drivers/gpu/drm/i915/i915_gem_gtt.c
> +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
> @@ -30,8 +30,6 @@
>  #include "i915_trace.h"
>  #include "intel_drv.h"
>  
> -#define GEN6_PPGTT_PD_ENTRIES 512
> -#define I915_PPGTT_PT_ENTRIES (PAGE_SIZE / sizeof(gen6_gtt_pte_t))
>  typedef uint64_t gen8_gtt_pte_t;
>  typedef gen8_gtt_pte_t gen8_ppgtt_pde_t;
>  
> @@ -51,6 +49,27 @@ typedef gen8_gtt_pte_t gen8_ppgtt_pde_t;
>  #define GEN6_PTE_ADDR_ENCODE(addr)	GEN6_GTT_ADDR_ENCODE(addr)
>  #define HSW_PTE_ADDR_ENCODE(addr)	HSW_GTT_ADDR_ENCODE(addr)
>  
> +/* GEN6 PPGTT resembles a 2 level page table:
> + * 31:22 | 21:12 |  11:0
> + *  PDE  |  PTE  | offset
> + */
> +#define GEN6_PDE_SHIFT			22
> +#define GEN6_PPGTT_PD_ENTRIES		512
> +#define GEN6_PDE_MASK			(GEN6_PPGTT_PD_ENTRIES-1)
> +#define GEN6_PTE_SHIFT			12
> +#define GEN6_PPGTT_PT_ENTRIES (PAGE_SIZE / sizeof(gen6_gtt_pte_t))
> +#define GEN6_PTE_MASK			(GEN6_PPGTT_PT_ENTRIES-1)
> +
> +static inline uint32_t gen6_pte_index(uint32_t address)
> +{
> +	return (address >> GEN6_PTE_SHIFT) & GEN6_PTE_MASK;
> +}
> +
> +static inline uint32_t gen6_pde_index(uint32_t address)
> +{
> +	return (address >> GEN6_PDE_SHIFT) & GEN6_PDE_MASK;
> +}
> +
>  /* Cacheability Control is a 4-bit value. The low three bits are stored in *
>   * bits 3:1 of the PTE, while the fourth bit is stored in bit 11 of the PTE.
>   */
> @@ -63,6 +82,11 @@ typedef gen8_gtt_pte_t gen8_ppgtt_pde_t;
>  #define HSW_WT_ELLC_LLC_AGE0		HSW_CACHEABILITY_CONTROL(0x6)
>  #define HSW_WT_ELLC_LLC_AGE3		HSW_CACHEABILITY_CONTROL(0x7)
>  
> +#define PPAT_UNCACHED_INDEX		(_PAGE_PWT | _PAGE_PCD)
> +#define PPAT_CACHED_PDE_INDEX		0 /* WB LLC */
> +#define PPAT_CACHED_INDEX		_PAGE_PAT /* WB LLCeLLC */
> +#define PPAT_DISPLAY_ELLC_INDEX		_PAGE_PCD /* WT eLLC */
> +
>  #define GEN8_PTES_PER_PAGE		(PAGE_SIZE / sizeof(gen8_gtt_pte_t))
>  #define GEN8_PDES_PER_PAGE		(PAGE_SIZE / sizeof(gen8_ppgtt_pde_t))
>  
> @@ -71,6 +95,10 @@ typedef gen8_gtt_pte_t gen8_ppgtt_pde_t;
>   * PDPE  |  PDE  |  PTE  | offset
>   * The difference as compared to normal x86 3 level page table is the PDPEs are
>   * programmed via register.
> + *
> + * The x86 pagetable code is flexible in its ability to handle varying page
> + * table depths via abstracted PGDIR/PUD/PMD/PTE. I've opted to not do this and
> + * instead replicate the interesting functionality.
>   */
>  #define GEN8_PDPE_SHIFT			30
>  #define GEN8_PDPE_MASK			0x3
> @@ -79,10 +107,31 @@ typedef gen8_gtt_pte_t gen8_ppgtt_pde_t;
>  #define GEN8_PTE_SHIFT			12
>  #define GEN8_PTE_MASK			0x1ff
>  
> -#define PPAT_UNCACHED_INDEX		(_PAGE_PWT | _PAGE_PCD)
> -#define PPAT_CACHED_PDE_INDEX		0 /* WB LLC */
> -#define PPAT_CACHED_INDEX		_PAGE_PAT /* WB LLCeLLC */
> -#define PPAT_DISPLAY_ELLC_INDEX		_PAGE_PCD /* WT eLLC */
> +static inline uint32_t gen8_pte_index(uint64_t address)
> +{
> +	return (address >> GEN8_PTE_SHIFT) & GEN8_PTE_MASK;
> +}
> +
> +static inline uint32_t gen8_pde_index(uint64_t address)
> +{
> +	return (address >> GEN8_PDE_SHIFT) & GEN8_PDE_MASK;
> +}
> +
> +static inline uint32_t gen8_pdpe_index(uint64_t address)
> +{
> +	return (address >> GEN8_PDPE_SHIFT) & GEN8_PDPE_MASK;
> +}
> +
> +static inline uint32_t gen8_pml4e_index(uint64_t address)
> +{
> +	BUG();
> +}

Would be better to add this if/when it's first used.

> +
> +/* Useful for 3 level page table functions */
> +static inline uint32_t gen8_pde_count(uint64_t start, uint64_t length)
> +{
> +	return ((length - start) / PAGE_SIZE) << GEN8_PDE_SHIFT;
> +}

This isn't used anywhere either and has the <start,length> vs.
<start,end> issue Daniel pointed out elsewhere.

Otherwise the patch looks good and makes things indeed clearer, so with
the above two functions removed:

Reviewed-by: Imre Deak <imre.deak@intel.com>

>  
>  static void ppgtt_bind_vma(struct i915_vma *vma,
>  			   enum i915_cache_level cache_level,
> @@ -274,9 +323,9 @@ static void gen8_ppgtt_clear_range(struct i915_address_space *vm,
>  	struct i915_hw_ppgtt *ppgtt =
>  		container_of(vm, struct i915_hw_ppgtt, base);
>  	gen8_gtt_pte_t *pt_vaddr, scratch_pte;
> -	unsigned pdpe = start >> GEN8_PDPE_SHIFT & GEN8_PDPE_MASK;
> -	unsigned pde = start >> GEN8_PDE_SHIFT & GEN8_PDE_MASK;
> -	unsigned pte = start >> GEN8_PTE_SHIFT & GEN8_PTE_MASK;
> +	unsigned pdpe = gen8_pdpe_index(start);
> +	unsigned pde = gen8_pde_index(start);
> +	unsigned pte = gen8_pte_index(start);
>  	unsigned num_entries = (length - start) >> PAGE_SHIFT;
>  	unsigned last_pte, i;
>  
> @@ -315,9 +364,9 @@ static void gen8_ppgtt_insert_entries(struct i915_address_space *vm,
>  	struct i915_hw_ppgtt *ppgtt =
>  		container_of(vm, struct i915_hw_ppgtt, base);
>  	gen8_gtt_pte_t *pt_vaddr;
> -	unsigned pdpe = start >> GEN8_PDPE_SHIFT & GEN8_PDPE_MASK;
> -	unsigned pde = start >> GEN8_PDE_SHIFT & GEN8_PDE_MASK;
> -	unsigned pte = start >> GEN8_PTE_SHIFT & GEN8_PTE_MASK;
> +	unsigned pdpe = gen8_pdpe_index(start);
> +	unsigned pde = gen8_pde_index(start);
> +	unsigned pte = gen8_pte_index(start);
>  	struct sg_page_iter sg_iter;
>  
>  	pt_vaddr = NULL;
> @@ -661,9 +710,9 @@ static void gen6_dump_ppgtt(struct i915_hw_ppgtt *ppgtt, struct seq_file *m)
>  		seq_printf(m, "\tPDE: %x\n", pd_entry);
>  
>  		pt_vaddr = kmap_atomic(ppgtt->pt_pages[pde]);
> -		for (pte = 0; pte < I915_PPGTT_PT_ENTRIES; pte+=4) {
> +		for (pte = 0; pte < GEN6_PPGTT_PT_ENTRIES; pte+=4) {
>  			unsigned long va =
> -				(pde * PAGE_SIZE * I915_PPGTT_PT_ENTRIES) +
> +				(pde * PAGE_SIZE * GEN6_PPGTT_PT_ENTRIES) +
>  				(pte * PAGE_SIZE);
>  			int i;
>  			bool found = false;
> @@ -938,29 +987,28 @@ static void gen6_ppgtt_clear_range(struct i915_address_space *vm,
>  	struct i915_hw_ppgtt *ppgtt =
>  		container_of(vm, struct i915_hw_ppgtt, base);
>  	gen6_gtt_pte_t *pt_vaddr, scratch_pte;
> -	unsigned first_entry = start >> PAGE_SHIFT;
> +	unsigned pde = gen6_pde_index(start);
>  	unsigned num_entries = (length - start) >> PAGE_SHIFT;
> -	unsigned act_pt = first_entry / I915_PPGTT_PT_ENTRIES;
> -	unsigned first_pte = first_entry % I915_PPGTT_PT_ENTRIES;
> +	unsigned pte = gen6_pte_index(start);
>  	unsigned last_pte, i;
>  
>  	scratch_pte = vm->pte_encode(vm->scratch.addr, I915_CACHE_LLC, true);
>  
>  	while (num_entries) {
> -		last_pte = first_pte + num_entries;
> -		if (last_pte > I915_PPGTT_PT_ENTRIES)
> -			last_pte = I915_PPGTT_PT_ENTRIES;
> +		last_pte = pte + num_entries;
> +		if (last_pte > GEN6_PPGTT_PT_ENTRIES)
> +			last_pte = GEN6_PPGTT_PT_ENTRIES;
>  
> -		pt_vaddr = kmap_atomic(ppgtt->pt_pages[act_pt]);
> +		pt_vaddr = kmap_atomic(ppgtt->pt_pages[pde]);
>  
> -		for (i = first_pte; i < last_pte; i++)
> +		for (i = pte; i < last_pte; i++)
>  			pt_vaddr[i] = scratch_pte;
>  
>  		kunmap_atomic(pt_vaddr);
>  
> -		num_entries -= last_pte - first_pte;
> -		first_pte = 0;
> -		act_pt++;
> +		num_entries -= last_pte - pte;
> +		pte = 0;
> +		pde++;
>  	}
>  }
>  
> @@ -972,24 +1020,23 @@ static void gen6_ppgtt_insert_entries(struct i915_address_space *vm,
>  	struct i915_hw_ppgtt *ppgtt =
>  		container_of(vm, struct i915_hw_ppgtt, base);
>  	gen6_gtt_pte_t *pt_vaddr;
> -	unsigned first_entry = start >> PAGE_SHIFT;
> -	unsigned act_pt = first_entry / I915_PPGTT_PT_ENTRIES;
> -	unsigned act_pte = first_entry % I915_PPGTT_PT_ENTRIES;
> +	unsigned pde = gen6_pde_index(start);
> +	unsigned pte = gen6_pte_index(start);
>  	struct sg_page_iter sg_iter;
>  
>  	pt_vaddr = NULL;
>  	for_each_sg_page(pages->sgl, &sg_iter, pages->nents, 0) {
>  		if (pt_vaddr == NULL)
> -			pt_vaddr = kmap_atomic(ppgtt->pt_pages[act_pt]);
> +			pt_vaddr = kmap_atomic(ppgtt->pt_pages[pde]);
>  
> -		pt_vaddr[act_pte] =
> +		pt_vaddr[pte] =
>  			vm->pte_encode(sg_page_iter_dma_address(&sg_iter),
>  				       cache_level, true);
> -		if (++act_pte == I915_PPGTT_PT_ENTRIES) {
> +		if (++pte == GEN6_PPGTT_PT_ENTRIES) {
>  			kunmap_atomic(pt_vaddr);
>  			pt_vaddr = NULL;
> -			act_pt++;
> -			act_pte = 0;
> +			pde++;
> +			pte = 0;
>  		}
>  	}
>  	if (pt_vaddr)
> @@ -1173,7 +1220,7 @@ static int gen6_ppgtt_init(struct i915_hw_ppgtt *ppgtt)
>  	ppgtt->base.cleanup = gen6_ppgtt_cleanup;
>  	ppgtt->base.scratch = dev_priv->gtt.base.scratch;
>  	ppgtt->base.start = 0;
> -	ppgtt->base.total = GEN6_PPGTT_PD_ENTRIES * I915_PPGTT_PT_ENTRIES * PAGE_SIZE;
> +	ppgtt->base.total = GEN6_PPGTT_PD_ENTRIES * GEN6_PPGTT_PT_ENTRIES * PAGE_SIZE;
>  	ppgtt->debug_dump = gen6_dump_ppgtt;
>  
>  	ppgtt->pd_offset =

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH] drm/i915: Page table helpers
  2014-03-10 21:05     ` Imre Deak
@ 2014-03-10 21:37       ` Ben Widawsky
  0 siblings, 0 replies; 7+ messages in thread
From: Ben Widawsky @ 2014-03-10 21:37 UTC (permalink / raw)
  To: Imre Deak; +Cc: Intel GFX, Ben Widawsky

On Mon, Mar 10, 2014 at 11:05:42PM +0200, Imre Deak wrote:
> On Tue, 2014-02-25 at 19:52 -0800, Ben Widawsky wrote:
> > These page table helpers make the code much cleaner. There is some
> > room to use the arch/x86 header files. The reason I've opted not to is
> > in several cases, the definitions are dictated by the CONFIG_ options
> > which do not always indicate the restrictions in the GPU.
> > 
> > Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
> > ---
> > 
> > I have this patch queued up for the next round of /stuff/ I am working on. If
> > you want to pull it into this series, it's fine by me. As I deal with the code
> > more, it does become more obvious what looks good, and what does not.
> > 
> > ---
> > 
> >  drivers/gpu/drm/i915/i915_gem_gtt.c | 115 +++++++++++++++++++++++++-----------
> >  1 file changed, 81 insertions(+), 34 deletions(-)
> > 
> > diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
> > index aa3ef7f..43d9129 100644
> > --- a/drivers/gpu/drm/i915/i915_gem_gtt.c
> > +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
> > @@ -30,8 +30,6 @@
> >  #include "i915_trace.h"
> >  #include "intel_drv.h"
> >  
> > -#define GEN6_PPGTT_PD_ENTRIES 512
> > -#define I915_PPGTT_PT_ENTRIES (PAGE_SIZE / sizeof(gen6_gtt_pte_t))
> >  typedef uint64_t gen8_gtt_pte_t;
> >  typedef gen8_gtt_pte_t gen8_ppgtt_pde_t;
> >  
> > @@ -51,6 +49,27 @@ typedef gen8_gtt_pte_t gen8_ppgtt_pde_t;
> >  #define GEN6_PTE_ADDR_ENCODE(addr)	GEN6_GTT_ADDR_ENCODE(addr)
> >  #define HSW_PTE_ADDR_ENCODE(addr)	HSW_GTT_ADDR_ENCODE(addr)
> >  
> > +/* GEN6 PPGTT resembles a 2 level page table:
> > + * 31:22 | 21:12 |  11:0
> > + *  PDE  |  PTE  | offset
> > + */
> > +#define GEN6_PDE_SHIFT			22
> > +#define GEN6_PPGTT_PD_ENTRIES		512
> > +#define GEN6_PDE_MASK			(GEN6_PPGTT_PD_ENTRIES-1)
> > +#define GEN6_PTE_SHIFT			12
> > +#define GEN6_PPGTT_PT_ENTRIES (PAGE_SIZE / sizeof(gen6_gtt_pte_t))
> > +#define GEN6_PTE_MASK			(GEN6_PPGTT_PT_ENTRIES-1)
> > +
> > +static inline uint32_t gen6_pte_index(uint32_t address)
> > +{
> > +	return (address >> GEN6_PTE_SHIFT) & GEN6_PTE_MASK;
> > +}
> > +
> > +static inline uint32_t gen6_pde_index(uint32_t address)
> > +{
> > +	return (address >> GEN6_PDE_SHIFT) & GEN6_PDE_MASK;
> > +}
> > +
> >  /* Cacheability Control is a 4-bit value. The low three bits are stored in *
> >   * bits 3:1 of the PTE, while the fourth bit is stored in bit 11 of the PTE.
> >   */
> > @@ -63,6 +82,11 @@ typedef gen8_gtt_pte_t gen8_ppgtt_pde_t;
> >  #define HSW_WT_ELLC_LLC_AGE0		HSW_CACHEABILITY_CONTROL(0x6)
> >  #define HSW_WT_ELLC_LLC_AGE3		HSW_CACHEABILITY_CONTROL(0x7)
> >  
> > +#define PPAT_UNCACHED_INDEX		(_PAGE_PWT | _PAGE_PCD)
> > +#define PPAT_CACHED_PDE_INDEX		0 /* WB LLC */
> > +#define PPAT_CACHED_INDEX		_PAGE_PAT /* WB LLCeLLC */
> > +#define PPAT_DISPLAY_ELLC_INDEX		_PAGE_PCD /* WT eLLC */
> > +
> >  #define GEN8_PTES_PER_PAGE		(PAGE_SIZE / sizeof(gen8_gtt_pte_t))
> >  #define GEN8_PDES_PER_PAGE		(PAGE_SIZE / sizeof(gen8_ppgtt_pde_t))
> >  
> > @@ -71,6 +95,10 @@ typedef gen8_gtt_pte_t gen8_ppgtt_pde_t;
> >   * PDPE  |  PDE  |  PTE  | offset
> >   * The difference as compared to normal x86 3 level page table is the PDPEs are
> >   * programmed via register.
> > + *
> > + * The x86 pagetable code is flexible in its ability to handle varying page
> > + * table depths via abstracted PGDIR/PUD/PMD/PTE. I've opted to not do this and
> > + * instead replicate the interesting functionality.
> >   */
> >  #define GEN8_PDPE_SHIFT			30
> >  #define GEN8_PDPE_MASK			0x3
> > @@ -79,10 +107,31 @@ typedef gen8_gtt_pte_t gen8_ppgtt_pde_t;
> >  #define GEN8_PTE_SHIFT			12
> >  #define GEN8_PTE_MASK			0x1ff
> >  
> > -#define PPAT_UNCACHED_INDEX		(_PAGE_PWT | _PAGE_PCD)
> > -#define PPAT_CACHED_PDE_INDEX		0 /* WB LLC */
> > -#define PPAT_CACHED_INDEX		_PAGE_PAT /* WB LLCeLLC */
> > -#define PPAT_DISPLAY_ELLC_INDEX		_PAGE_PCD /* WT eLLC */
> > +static inline uint32_t gen8_pte_index(uint64_t address)
> > +{
> > +	return (address >> GEN8_PTE_SHIFT) & GEN8_PTE_MASK;
> > +}
> > +
> > +static inline uint32_t gen8_pde_index(uint64_t address)
> > +{
> > +	return (address >> GEN8_PDE_SHIFT) & GEN8_PDE_MASK;
> > +}
> > +
> > +static inline uint32_t gen8_pdpe_index(uint64_t address)
> > +{
> > +	return (address >> GEN8_PDPE_SHIFT) & GEN8_PDPE_MASK;
> > +}
> > +
> > +static inline uint32_t gen8_pml4e_index(uint64_t address)
> > +{
> > +	BUG();
> > +}
> 
> Would be better to add this if/when it's first used.
> 
> > +
> > +/* Useful for 3 level page table functions */
> > +static inline uint32_t gen8_pde_count(uint64_t start, uint64_t length)
> > +{
> > +	return ((length - start) / PAGE_SIZE) << GEN8_PDE_SHIFT;
> > +}
> 
> This isn't used anywhere either and has the <start,length> vs.
> <start,end> issue Daniel pointed out elsewhere.
> 
> Otherwise the patch looks good and makes things indeed clearer, so with
> the above two functions removed:
> 
> Reviewed-by: Imre Deak <imre.deak@intel.com>
> 

I'll be posting some code soon of which this patch is a part of. I think
it makes everything a lot simpler, but there is one weird bug I still
need to resolve.

Thanks for looking at this. If you want to see dynamic-pt-pages in my
fdo repo, you can see where things are going.
http://cgit.freedesktop.org/~bwidawsk/drm-intel/log/?h=dynamic_pt_alloc

> >  
> >  static void ppgtt_bind_vma(struct i915_vma *vma,
> >  			   enum i915_cache_level cache_level,
> > @@ -274,9 +323,9 @@ static void gen8_ppgtt_clear_range(struct i915_address_space *vm,
> >  	struct i915_hw_ppgtt *ppgtt =
> >  		container_of(vm, struct i915_hw_ppgtt, base);
> >  	gen8_gtt_pte_t *pt_vaddr, scratch_pte;
> > -	unsigned pdpe = start >> GEN8_PDPE_SHIFT & GEN8_PDPE_MASK;
> > -	unsigned pde = start >> GEN8_PDE_SHIFT & GEN8_PDE_MASK;
> > -	unsigned pte = start >> GEN8_PTE_SHIFT & GEN8_PTE_MASK;
> > +	unsigned pdpe = gen8_pdpe_index(start);
> > +	unsigned pde = gen8_pde_index(start);
> > +	unsigned pte = gen8_pte_index(start);
> >  	unsigned num_entries = (length - start) >> PAGE_SHIFT;
> >  	unsigned last_pte, i;
> >  
> > @@ -315,9 +364,9 @@ static void gen8_ppgtt_insert_entries(struct i915_address_space *vm,
> >  	struct i915_hw_ppgtt *ppgtt =
> >  		container_of(vm, struct i915_hw_ppgtt, base);
> >  	gen8_gtt_pte_t *pt_vaddr;
> > -	unsigned pdpe = start >> GEN8_PDPE_SHIFT & GEN8_PDPE_MASK;
> > -	unsigned pde = start >> GEN8_PDE_SHIFT & GEN8_PDE_MASK;
> > -	unsigned pte = start >> GEN8_PTE_SHIFT & GEN8_PTE_MASK;
> > +	unsigned pdpe = gen8_pdpe_index(start);
> > +	unsigned pde = gen8_pde_index(start);
> > +	unsigned pte = gen8_pte_index(start);
> >  	struct sg_page_iter sg_iter;
> >  
> >  	pt_vaddr = NULL;
> > @@ -661,9 +710,9 @@ static void gen6_dump_ppgtt(struct i915_hw_ppgtt *ppgtt, struct seq_file *m)
> >  		seq_printf(m, "\tPDE: %x\n", pd_entry);
> >  
> >  		pt_vaddr = kmap_atomic(ppgtt->pt_pages[pde]);
> > -		for (pte = 0; pte < I915_PPGTT_PT_ENTRIES; pte+=4) {
> > +		for (pte = 0; pte < GEN6_PPGTT_PT_ENTRIES; pte+=4) {
> >  			unsigned long va =
> > -				(pde * PAGE_SIZE * I915_PPGTT_PT_ENTRIES) +
> > +				(pde * PAGE_SIZE * GEN6_PPGTT_PT_ENTRIES) +
> >  				(pte * PAGE_SIZE);
> >  			int i;
> >  			bool found = false;
> > @@ -938,29 +987,28 @@ static void gen6_ppgtt_clear_range(struct i915_address_space *vm,
> >  	struct i915_hw_ppgtt *ppgtt =
> >  		container_of(vm, struct i915_hw_ppgtt, base);
> >  	gen6_gtt_pte_t *pt_vaddr, scratch_pte;
> > -	unsigned first_entry = start >> PAGE_SHIFT;
> > +	unsigned pde = gen6_pde_index(start);
> >  	unsigned num_entries = (length - start) >> PAGE_SHIFT;
> > -	unsigned act_pt = first_entry / I915_PPGTT_PT_ENTRIES;
> > -	unsigned first_pte = first_entry % I915_PPGTT_PT_ENTRIES;
> > +	unsigned pte = gen6_pte_index(start);
> >  	unsigned last_pte, i;
> >  
> >  	scratch_pte = vm->pte_encode(vm->scratch.addr, I915_CACHE_LLC, true);
> >  
> >  	while (num_entries) {
> > -		last_pte = first_pte + num_entries;
> > -		if (last_pte > I915_PPGTT_PT_ENTRIES)
> > -			last_pte = I915_PPGTT_PT_ENTRIES;
> > +		last_pte = pte + num_entries;
> > +		if (last_pte > GEN6_PPGTT_PT_ENTRIES)
> > +			last_pte = GEN6_PPGTT_PT_ENTRIES;
> >  
> > -		pt_vaddr = kmap_atomic(ppgtt->pt_pages[act_pt]);
> > +		pt_vaddr = kmap_atomic(ppgtt->pt_pages[pde]);
> >  
> > -		for (i = first_pte; i < last_pte; i++)
> > +		for (i = pte; i < last_pte; i++)
> >  			pt_vaddr[i] = scratch_pte;
> >  
> >  		kunmap_atomic(pt_vaddr);
> >  
> > -		num_entries -= last_pte - first_pte;
> > -		first_pte = 0;
> > -		act_pt++;
> > +		num_entries -= last_pte - pte;
> > +		pte = 0;
> > +		pde++;
> >  	}
> >  }
> >  
> > @@ -972,24 +1020,23 @@ static void gen6_ppgtt_insert_entries(struct i915_address_space *vm,
> >  	struct i915_hw_ppgtt *ppgtt =
> >  		container_of(vm, struct i915_hw_ppgtt, base);
> >  	gen6_gtt_pte_t *pt_vaddr;
> > -	unsigned first_entry = start >> PAGE_SHIFT;
> > -	unsigned act_pt = first_entry / I915_PPGTT_PT_ENTRIES;
> > -	unsigned act_pte = first_entry % I915_PPGTT_PT_ENTRIES;
> > +	unsigned pde = gen6_pde_index(start);
> > +	unsigned pte = gen6_pte_index(start);
> >  	struct sg_page_iter sg_iter;
> >  
> >  	pt_vaddr = NULL;
> >  	for_each_sg_page(pages->sgl, &sg_iter, pages->nents, 0) {
> >  		if (pt_vaddr == NULL)
> > -			pt_vaddr = kmap_atomic(ppgtt->pt_pages[act_pt]);
> > +			pt_vaddr = kmap_atomic(ppgtt->pt_pages[pde]);
> >  
> > -		pt_vaddr[act_pte] =
> > +		pt_vaddr[pte] =
> >  			vm->pte_encode(sg_page_iter_dma_address(&sg_iter),
> >  				       cache_level, true);
> > -		if (++act_pte == I915_PPGTT_PT_ENTRIES) {
> > +		if (++pte == GEN6_PPGTT_PT_ENTRIES) {
> >  			kunmap_atomic(pt_vaddr);
> >  			pt_vaddr = NULL;
> > -			act_pt++;
> > -			act_pte = 0;
> > +			pde++;
> > +			pte = 0;
> >  		}
> >  	}
> >  	if (pt_vaddr)
> > @@ -1173,7 +1220,7 @@ static int gen6_ppgtt_init(struct i915_hw_ppgtt *ppgtt)
> >  	ppgtt->base.cleanup = gen6_ppgtt_cleanup;
> >  	ppgtt->base.scratch = dev_priv->gtt.base.scratch;
> >  	ppgtt->base.start = 0;
> > -	ppgtt->base.total = GEN6_PPGTT_PD_ENTRIES * I915_PPGTT_PT_ENTRIES * PAGE_SIZE;
> > +	ppgtt->base.total = GEN6_PPGTT_PD_ENTRIES * GEN6_PPGTT_PT_ENTRIES * PAGE_SIZE;
> >  	ppgtt->debug_dump = gen6_dump_ppgtt;
> >  
> >  	ppgtt->pd_offset =
> 
> 

-- 
Ben Widawsky, Intel Open Source Technology Center

^ permalink raw reply	[flat|nested] 7+ messages in thread

end of thread, other threads:[~2014-03-10 21:37 UTC | newest]

Thread overview: 7+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
     [not found] <1392925833-6593-1-git-send-email-benjamin.widawsky>
2014-02-26  2:13 ` [PATCH 4/9] [v4] drm/i915: Make clear/insert vfuncs args absolute Ben Widawsky
2014-02-26  3:52   ` [PATCH] drm/i915: Page table helpers Ben Widawsky
2014-03-10 21:05     ` Imre Deak
2014-03-10 21:37       ` Ben Widawsky
2014-02-26  7:27   ` [PATCH 4/9] [v4] drm/i915: Make clear/insert vfuncs args absolute Ben Widawsky
2014-02-26  7:36     ` Ben Widawsky
2014-03-05 14:28       ` Daniel Vetter

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox