* [PATCH v2 1/3] drm/i915: Remove unused "valid" parameter from pte_encode
@ 2016-10-07 13:17 Michał Winiarski
2016-10-07 13:17 ` [PATCH v2 2/3] drm/i915/gtt: Split gen8_ppgtt_clear_pte_range Michał Winiarski
` (2 more replies)
0 siblings, 3 replies; 9+ messages in thread
From: Michał Winiarski @ 2016-10-07 13:17 UTC (permalink / raw)
To: intel-gfx
We never used any invalid ptes, those were put in place for
a possibility of doing gpu faults. However our batchbuffers are not
restricted in length, so everything needs to be pointing to something
and thus out-of-bounds is pointing to scratch.
Remove the valid flag as it is always true.
v2: Expand commit msg, patch reorder (Mika)
Cc: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Michel Thierry <michel.thierry@intel.com>
Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Reviewed-by: Mika Kuoppala <mika.kuoppala@intel.com>
Signed-off-by: Michał Winiarski <michal.winiarski@intel.com>
---
drivers/gpu/drm/i915/i915_gem.c | 6 +-
drivers/gpu/drm/i915/i915_gem_execbuffer.c | 3 +-
drivers/gpu/drm/i915/i915_gem_gtt.c | 98 ++++++++++++------------------
drivers/gpu/drm/i915/i915_gem_gtt.h | 5 +-
4 files changed, 45 insertions(+), 67 deletions(-)
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index a89a889..60222b5 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -919,8 +919,7 @@ out_unpin:
if (node.allocated) {
wmb();
ggtt->base.clear_range(&ggtt->base,
- node.start, node.size,
- true);
+ node.start, node.size);
i915_gem_object_unpin_pages(obj);
remove_mappable_node(&node);
} else {
@@ -1228,8 +1227,7 @@ out_unpin:
if (node.allocated) {
wmb();
ggtt->base.clear_range(&ggtt->base,
- node.start, node.size,
- true);
+ node.start, node.size);
i915_gem_object_unpin_pages(obj);
remove_mappable_node(&node);
} else {
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index 72c7c18..6835074 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -370,8 +370,7 @@ static void reloc_cache_fini(struct reloc_cache *cache)
ggtt->base.clear_range(&ggtt->base,
cache->node.start,
- cache->node.size,
- true);
+ cache->node.size);
drm_mm_remove_node(&cache->node);
} else {
i915_vma_unpin((struct i915_vma *)cache->node.mm);
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
index 0bb4232..08e2f35 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -191,15 +191,13 @@ static void ppgtt_unbind_vma(struct i915_vma *vma)
{
vma->vm->clear_range(vma->vm,
vma->node.start,
- vma->size,
- true);
+ vma->size);
}
static gen8_pte_t gen8_pte_encode(dma_addr_t addr,
- enum i915_cache_level level,
- bool valid)
+ enum i915_cache_level level)
{
- gen8_pte_t pte = valid ? _PAGE_PRESENT | _PAGE_RW : 0;
+ gen8_pte_t pte = _PAGE_PRESENT | _PAGE_RW;
pte |= addr;
switch (level) {
@@ -234,9 +232,9 @@ static gen8_pde_t gen8_pde_encode(const dma_addr_t addr,
static gen6_pte_t snb_pte_encode(dma_addr_t addr,
enum i915_cache_level level,
- bool valid, u32 unused)
+ u32 unused)
{
- gen6_pte_t pte = valid ? GEN6_PTE_VALID : 0;
+ gen6_pte_t pte = GEN6_PTE_VALID;
pte |= GEN6_PTE_ADDR_ENCODE(addr);
switch (level) {
@@ -256,9 +254,9 @@ static gen6_pte_t snb_pte_encode(dma_addr_t addr,
static gen6_pte_t ivb_pte_encode(dma_addr_t addr,
enum i915_cache_level level,
- bool valid, u32 unused)
+ u32 unused)
{
- gen6_pte_t pte = valid ? GEN6_PTE_VALID : 0;
+ gen6_pte_t pte = GEN6_PTE_VALID;
pte |= GEN6_PTE_ADDR_ENCODE(addr);
switch (level) {
@@ -280,9 +278,9 @@ static gen6_pte_t ivb_pte_encode(dma_addr_t addr,
static gen6_pte_t byt_pte_encode(dma_addr_t addr,
enum i915_cache_level level,
- bool valid, u32 flags)
+ u32 flags)
{
- gen6_pte_t pte = valid ? GEN6_PTE_VALID : 0;
+ gen6_pte_t pte = GEN6_PTE_VALID;
pte |= GEN6_PTE_ADDR_ENCODE(addr);
if (!(flags & PTE_READ_ONLY))
@@ -296,9 +294,9 @@ static gen6_pte_t byt_pte_encode(dma_addr_t addr,
static gen6_pte_t hsw_pte_encode(dma_addr_t addr,
enum i915_cache_level level,
- bool valid, u32 unused)
+ u32 unused)
{
- gen6_pte_t pte = valid ? GEN6_PTE_VALID : 0;
+ gen6_pte_t pte = GEN6_PTE_VALID;
pte |= HSW_PTE_ADDR_ENCODE(addr);
if (level != I915_CACHE_NONE)
@@ -309,9 +307,9 @@ static gen6_pte_t hsw_pte_encode(dma_addr_t addr,
static gen6_pte_t iris_pte_encode(dma_addr_t addr,
enum i915_cache_level level,
- bool valid, u32 unused)
+ u32 unused)
{
- gen6_pte_t pte = valid ? GEN6_PTE_VALID : 0;
+ gen6_pte_t pte = GEN6_PTE_VALID;
pte |= HSW_PTE_ADDR_ENCODE(addr);
switch (level) {
@@ -472,7 +470,7 @@ static void gen8_initialize_pt(struct i915_address_space *vm,
gen8_pte_t scratch_pte;
scratch_pte = gen8_pte_encode(vm->scratch_page.daddr,
- I915_CACHE_LLC, true);
+ I915_CACHE_LLC);
fill_px(vm->dev, pt, scratch_pte);
}
@@ -485,7 +483,7 @@ static void gen6_initialize_pt(struct i915_address_space *vm,
WARN_ON(vm->scratch_page.daddr == 0);
scratch_pte = vm->pte_encode(vm->scratch_page.daddr,
- I915_CACHE_LLC, true, 0);
+ I915_CACHE_LLC, 0);
fill32_px(vm->dev, pt, scratch_pte);
}
@@ -763,13 +761,11 @@ static void gen8_ppgtt_clear_pte_range(struct i915_address_space *vm,
}
static void gen8_ppgtt_clear_range(struct i915_address_space *vm,
- uint64_t start,
- uint64_t length,
- bool use_scratch)
+ uint64_t start, uint64_t length)
{
struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
gen8_pte_t scratch_pte = gen8_pte_encode(vm->scratch_page.daddr,
- I915_CACHE_LLC, use_scratch);
+ I915_CACHE_LLC);
if (!USES_FULL_48BIT_PPGTT(vm->dev)) {
gen8_ppgtt_clear_pte_range(vm, &ppgtt->pdp, start, length,
@@ -809,7 +805,7 @@ gen8_ppgtt_insert_pte_entries(struct i915_address_space *vm,
pt_vaddr[pte] =
gen8_pte_encode(sg_page_iter_dma_address(sg_iter),
- cache_level, true);
+ cache_level);
if (++pte == GEN8_PTES) {
kunmap_px(ppgtt, pt_vaddr);
pt_vaddr = NULL;
@@ -1452,7 +1448,7 @@ static void gen8_dump_ppgtt(struct i915_hw_ppgtt *ppgtt, struct seq_file *m)
uint64_t start = ppgtt->base.start;
uint64_t length = ppgtt->base.total;
gen8_pte_t scratch_pte = gen8_pte_encode(vm->scratch_page.daddr,
- I915_CACHE_LLC, true);
+ I915_CACHE_LLC);
if (!USES_FULL_48BIT_PPGTT(vm->dev)) {
gen8_dump_pdp(&ppgtt->pdp, start, length, scratch_pte, m);
@@ -1569,7 +1565,7 @@ static void gen6_dump_ppgtt(struct i915_hw_ppgtt *ppgtt, struct seq_file *m)
uint32_t start = ppgtt->base.start, length = ppgtt->base.total;
scratch_pte = vm->pte_encode(vm->scratch_page.daddr,
- I915_CACHE_LLC, true, 0);
+ I915_CACHE_LLC, 0);
gen6_for_each_pde(unused, &ppgtt->pd, start, length, pde) {
u32 expected;
@@ -1782,8 +1778,7 @@ static void gen6_ppgtt_enable(struct drm_device *dev)
/* PPGTT support for Sandybdrige/Gen6 and later */
static void gen6_ppgtt_clear_range(struct i915_address_space *vm,
uint64_t start,
- uint64_t length,
- bool use_scratch)
+ uint64_t length)
{
struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
gen6_pte_t *pt_vaddr, scratch_pte;
@@ -1794,7 +1789,7 @@ static void gen6_ppgtt_clear_range(struct i915_address_space *vm,
unsigned last_pte, i;
scratch_pte = vm->pte_encode(vm->scratch_page.daddr,
- I915_CACHE_LLC, true, 0);
+ I915_CACHE_LLC, 0);
while (num_entries) {
last_pte = first_pte + num_entries;
@@ -1832,7 +1827,7 @@ static void gen6_ppgtt_insert_entries(struct i915_address_space *vm,
pt_vaddr = kmap_px(ppgtt->pd.page_table[act_pt]);
pt_vaddr[act_pte] =
- vm->pte_encode(addr, cache_level, true, flags);
+ vm->pte_encode(addr, cache_level, flags);
if (++act_pte == GEN6_PTES) {
kunmap_px(ppgtt, pt_vaddr);
@@ -2286,8 +2281,7 @@ void i915_gem_suspend_gtt_mappings(struct drm_device *dev)
i915_check_and_clear_faults(dev_priv);
- ggtt->base.clear_range(&ggtt->base, ggtt->base.start, ggtt->base.total,
- true);
+ ggtt->base.clear_range(&ggtt->base, ggtt->base.start, ggtt->base.total);
i915_ggtt_flush(dev_priv);
}
@@ -2321,7 +2315,7 @@ static void gen8_ggtt_insert_page(struct i915_address_space *vm,
rpm_atomic_seq = assert_rpm_atomic_begin(dev_priv);
- gen8_set_pte(pte, gen8_pte_encode(addr, level, true));
+ gen8_set_pte(pte, gen8_pte_encode(addr, level));
I915_WRITE(GFX_FLSH_CNTL_GEN6, GFX_FLSH_CNTL_EN);
POSTING_READ(GFX_FLSH_CNTL_GEN6);
@@ -2348,7 +2342,7 @@ static void gen8_ggtt_insert_entries(struct i915_address_space *vm,
gtt_entries = (gen8_pte_t __iomem *)ggtt->gsm + (start >> PAGE_SHIFT);
for_each_sgt_dma(addr, sgt_iter, st) {
- gtt_entry = gen8_pte_encode(addr, level, true);
+ gtt_entry = gen8_pte_encode(addr, level);
gen8_set_pte(>t_entries[i++], gtt_entry);
}
@@ -2412,7 +2406,7 @@ static void gen6_ggtt_insert_page(struct i915_address_space *vm,
rpm_atomic_seq = assert_rpm_atomic_begin(dev_priv);
- iowrite32(vm->pte_encode(addr, level, true, flags), pte);
+ iowrite32(vm->pte_encode(addr, level, flags), pte);
I915_WRITE(GFX_FLSH_CNTL_GEN6, GFX_FLSH_CNTL_EN);
POSTING_READ(GFX_FLSH_CNTL_GEN6);
@@ -2445,7 +2439,7 @@ static void gen6_ggtt_insert_entries(struct i915_address_space *vm,
gtt_entries = (gen6_pte_t __iomem *)ggtt->gsm + (start >> PAGE_SHIFT);
for_each_sgt_dma(addr, sgt_iter, st) {
- gtt_entry = vm->pte_encode(addr, level, true, flags);
+ gtt_entry = vm->pte_encode(addr, level, flags);
iowrite32(gtt_entry, >t_entries[i++]);
}
@@ -2469,16 +2463,12 @@ static void gen6_ggtt_insert_entries(struct i915_address_space *vm,
}
static void nop_clear_range(struct i915_address_space *vm,
- uint64_t start,
- uint64_t length,
- bool use_scratch)
+ uint64_t start, uint64_t length)
{
}
static void gen8_ggtt_clear_range(struct i915_address_space *vm,
- uint64_t start,
- uint64_t length,
- bool use_scratch)
+ uint64_t start, uint64_t length)
{
struct drm_i915_private *dev_priv = to_i915(vm->dev);
struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
@@ -2498,8 +2488,7 @@ static void gen8_ggtt_clear_range(struct i915_address_space *vm,
num_entries = max_entries;
scratch_pte = gen8_pte_encode(vm->scratch_page.daddr,
- I915_CACHE_LLC,
- use_scratch);
+ I915_CACHE_LLC);
for (i = 0; i < num_entries; i++)
gen8_set_pte(>t_base[i], scratch_pte);
readl(gtt_base);
@@ -2509,8 +2498,7 @@ static void gen8_ggtt_clear_range(struct i915_address_space *vm,
static void gen6_ggtt_clear_range(struct i915_address_space *vm,
uint64_t start,
- uint64_t length,
- bool use_scratch)
+ uint64_t length)
{
struct drm_i915_private *dev_priv = to_i915(vm->dev);
struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
@@ -2530,7 +2518,7 @@ static void gen6_ggtt_clear_range(struct i915_address_space *vm,
num_entries = max_entries;
scratch_pte = vm->pte_encode(vm->scratch_page.daddr,
- I915_CACHE_LLC, use_scratch, 0);
+ I915_CACHE_LLC, 0);
for (i = 0; i < num_entries; i++)
iowrite32(scratch_pte, >t_base[i]);
@@ -2577,8 +2565,7 @@ static void i915_ggtt_insert_entries(struct i915_address_space *vm,
static void i915_ggtt_clear_range(struct i915_address_space *vm,
uint64_t start,
- uint64_t length,
- bool unused)
+ uint64_t length)
{
struct drm_i915_private *dev_priv = to_i915(vm->dev);
unsigned first_entry = start >> PAGE_SHIFT;
@@ -2662,13 +2649,11 @@ static void ggtt_unbind_vma(struct i915_vma *vma)
if (vma->flags & I915_VMA_GLOBAL_BIND)
vma->vm->clear_range(vma->vm,
- vma->node.start, size,
- true);
+ vma->node.start, size);
if (vma->flags & I915_VMA_LOCAL_BIND && appgtt)
appgtt->base.clear_range(&appgtt->base,
- vma->node.start, size,
- true);
+ vma->node.start, size);
}
void i915_gem_gtt_finish_object(struct drm_i915_gem_object *obj)
@@ -2729,13 +2714,12 @@ int i915_gem_init_ggtt(struct drm_i915_private *dev_priv)
DRM_DEBUG_KMS("clearing unused GTT space: [%lx, %lx]\n",
hole_start, hole_end);
ggtt->base.clear_range(&ggtt->base, hole_start,
- hole_end - hole_start, true);
+ hole_end - hole_start);
}
/* And finally clear the reserved guard page */
ggtt->base.clear_range(&ggtt->base,
- ggtt->base.total - PAGE_SIZE, PAGE_SIZE,
- true);
+ ggtt->base.total - PAGE_SIZE, PAGE_SIZE);
if (USES_PPGTT(dev_priv) && !USES_FULL_PPGTT(dev_priv)) {
struct i915_hw_ppgtt *ppgtt;
@@ -2761,8 +2745,7 @@ int i915_gem_init_ggtt(struct drm_i915_private *dev_priv)
ppgtt->base.clear_range(&ppgtt->base,
ppgtt->base.start,
- ppgtt->base.total,
- true);
+ ppgtt->base.total);
dev_priv->mm.aliasing_ppgtt = ppgtt;
WARN_ON(ggtt->base.bind_vma != ggtt_bind_vma);
@@ -3237,8 +3220,7 @@ void i915_gem_restore_gtt_mappings(struct drm_device *dev)
i915_check_and_clear_faults(dev_priv);
/* First fill our portion of the GTT with scratch pages */
- ggtt->base.clear_range(&ggtt->base, ggtt->base.start, ggtt->base.total,
- true);
+ ggtt->base.clear_range(&ggtt->base, ggtt->base.start, ggtt->base.total);
ggtt->base.closed = true; /* skip rewriting PTE on VMA unbind */
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h
index ec78be2..931d712 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.h
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.h
@@ -395,7 +395,7 @@ struct i915_address_space {
/* FIXME: Need a more generic return type */
gen6_pte_t (*pte_encode)(dma_addr_t addr,
enum i915_cache_level level,
- bool valid, u32 flags); /* Create a valid PTE */
+ u32 flags); /* Create a valid PTE */
/* flags for pte_encode */
#define PTE_READ_ONLY (1<<0)
int (*allocate_va_range)(struct i915_address_space *vm,
@@ -403,8 +403,7 @@ struct i915_address_space {
uint64_t length);
void (*clear_range)(struct i915_address_space *vm,
uint64_t start,
- uint64_t length,
- bool use_scratch);
+ uint64_t length);
void (*insert_page)(struct i915_address_space *vm,
dma_addr_t addr,
uint64_t offset,
--
2.7.4
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply related [flat|nested] 9+ messages in thread
* [PATCH v2 2/3] drm/i915/gtt: Split gen8_ppgtt_clear_pte_range
2016-10-07 13:17 [PATCH v2 1/3] drm/i915: Remove unused "valid" parameter from pte_encode Michał Winiarski
@ 2016-10-07 13:17 ` Michał Winiarski
2016-10-10 13:12 ` Mika Kuoppala
2016-10-10 13:37 ` Joonas Lahtinen
2016-10-07 13:17 ` [PATCH v2 3/3] drm/i915/gtt: Free unused lower-level page tables Michał Winiarski
2016-10-10 12:14 ` ✗ Fi.CI.BAT: warning for series starting with [v2,1/3] drm/i915: Remove unused "valid" parameter from pte_encode (rev2) Patchwork
2 siblings, 2 replies; 9+ messages in thread
From: Michał Winiarski @ 2016-10-07 13:17 UTC (permalink / raw)
To: intel-gfx; +Cc: Mika Kuoppala
Let's use more top-down approach, where each gen8_ppgtt_clear_* function
is responsible for clearing the struct passed as an argument and calling
relevant clear_range functions on lower-level tables.
Doing this rather than operating on PTE ranges makes the implementation
of shrinking page tables quite simple.
v2: Drop min when calculating num_entries, no negation in 48b ppgtt
check, no newlines in vars block (Joonas)
Cc: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Cc: Michel Thierry <michel.thierry@intel.com>
Cc: Mika Kuoppala <mika.kuoppala@intel.com>
Signed-off-by: Michał Winiarski <michal.winiarski@intel.com>
---
drivers/gpu/drm/i915/i915_gem_gtt.c | 107 +++++++++++++++++++-----------------
1 file changed, 58 insertions(+), 49 deletions(-)
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
index 08e2f35..adabf58 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -704,59 +704,78 @@ static int gen8_48b_mm_switch(struct i915_hw_ppgtt *ppgtt,
return gen8_write_pdp(req, 0, px_dma(&ppgtt->pml4));
}
-static void gen8_ppgtt_clear_pte_range(struct i915_address_space *vm,
- struct i915_page_directory_pointer *pdp,
- uint64_t start,
- uint64_t length,
- gen8_pte_t scratch_pte)
+static void gen8_ppgtt_clear_pt(struct i915_address_space *vm,
+ struct i915_page_table *pt,
+ uint64_t start,
+ uint64_t length)
{
struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
+
+ unsigned int pte_start = gen8_pte_index(start);
+ unsigned int num_entries = gen8_pte_count(start, length);
+ uint64_t pte;
gen8_pte_t *pt_vaddr;
- unsigned pdpe = gen8_pdpe_index(start);
- unsigned pde = gen8_pde_index(start);
- unsigned pte = gen8_pte_index(start);
- unsigned num_entries = length >> PAGE_SHIFT;
- unsigned last_pte, i;
+ gen8_pte_t scratch_pte = gen8_pte_encode(vm->scratch_page.daddr,
+ I915_CACHE_LLC);
- if (WARN_ON(!pdp))
+ if (WARN_ON(!px_page(pt)))
return;
- while (num_entries) {
- struct i915_page_directory *pd;
- struct i915_page_table *pt;
+ bitmap_clear(pt->used_ptes, pte_start, num_entries);
- if (WARN_ON(!pdp->page_directory[pdpe]))
- break;
+ pt_vaddr = kmap_px(pt);
+
+ for (pte = pte_start; pte < num_entries; pte++)
+ pt_vaddr[pte] = scratch_pte;
- pd = pdp->page_directory[pdpe];
+ kunmap_px(ppgtt, pt_vaddr);
+}
+
+static void gen8_ppgtt_clear_pd(struct i915_address_space *vm,
+ struct i915_page_directory *pd,
+ uint64_t start,
+ uint64_t length)
+{
+ struct i915_page_table *pt;
+ uint64_t pde;
+ gen8_for_each_pde(pt, pd, start, length, pde) {
if (WARN_ON(!pd->page_table[pde]))
break;
- pt = pd->page_table[pde];
+ gen8_ppgtt_clear_pt(vm, pt, start, length);
+ }
+}
- if (WARN_ON(!px_page(pt)))
- break;
+static void gen8_ppgtt_clear_pdp(struct i915_address_space *vm,
+ struct i915_page_directory_pointer *pdp,
+ uint64_t start,
+ uint64_t length)
+{
+ struct i915_page_directory *pd;
+ uint64_t pdpe;
- last_pte = pte + num_entries;
- if (last_pte > GEN8_PTES)
- last_pte = GEN8_PTES;
+ gen8_for_each_pdpe(pd, pdp, start, length, pdpe) {
+ if (WARN_ON(!pdp->page_directory[pdpe]))
+ break;
- pt_vaddr = kmap_px(pt);
+ gen8_ppgtt_clear_pd(vm, pd, start, length);
+ }
+}
- for (i = pte; i < last_pte; i++) {
- pt_vaddr[i] = scratch_pte;
- num_entries--;
- }
+static void gen8_ppgtt_clear_pml4(struct i915_address_space *vm,
+ struct i915_pml4 *pml4,
+ uint64_t start,
+ uint64_t length)
+{
+ struct i915_page_directory_pointer *pdp;
+ uint64_t pml4e;
- kunmap_px(ppgtt, pt_vaddr);
+ gen8_for_each_pml4e(pdp, pml4, start, length, pml4e) {
+ if (WARN_ON(!pml4->pdps[pml4e]))
+ break;
- pte = 0;
- if (++pde == I915_PDES) {
- if (++pdpe == I915_PDPES_PER_PDP(vm->dev))
- break;
- pde = 0;
- }
+ gen8_ppgtt_clear_pdp(vm, pdp, start, length);
}
}
@@ -764,21 +783,11 @@ static void gen8_ppgtt_clear_range(struct i915_address_space *vm,
uint64_t start, uint64_t length)
{
struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
- gen8_pte_t scratch_pte = gen8_pte_encode(vm->scratch_page.daddr,
- I915_CACHE_LLC);
- if (!USES_FULL_48BIT_PPGTT(vm->dev)) {
- gen8_ppgtt_clear_pte_range(vm, &ppgtt->pdp, start, length,
- scratch_pte);
- } else {
- uint64_t pml4e;
- struct i915_page_directory_pointer *pdp;
-
- gen8_for_each_pml4e(pdp, &ppgtt->pml4, start, length, pml4e) {
- gen8_ppgtt_clear_pte_range(vm, pdp, start, length,
- scratch_pte);
- }
- }
+ if (USES_FULL_48BIT_PPGTT(vm->dev))
+ gen8_ppgtt_clear_pml4(vm, &ppgtt->pml4, start, length);
+ else
+ gen8_ppgtt_clear_pdp(vm, &ppgtt->pdp, start, length);
}
static void
--
2.7.4
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply related [flat|nested] 9+ messages in thread
* [PATCH v2 3/3] drm/i915/gtt: Free unused lower-level page tables
2016-10-07 13:17 [PATCH v2 1/3] drm/i915: Remove unused "valid" parameter from pte_encode Michał Winiarski
2016-10-07 13:17 ` [PATCH v2 2/3] drm/i915/gtt: Split gen8_ppgtt_clear_pte_range Michał Winiarski
@ 2016-10-07 13:17 ` Michał Winiarski
2016-10-07 18:39 ` [PATCH v3 " Michał Winiarski
2016-10-10 12:14 ` ✗ Fi.CI.BAT: warning for series starting with [v2,1/3] drm/i915: Remove unused "valid" parameter from pte_encode (rev2) Patchwork
2 siblings, 1 reply; 9+ messages in thread
From: Michał Winiarski @ 2016-10-07 13:17 UTC (permalink / raw)
To: intel-gfx; +Cc: Mika Kuoppala
Since "Dynamic page table allocations" were introduced, our page tables
can grow (being dynamically allocated) with address space range usage.
Unfortunately, their lifetime is bound to vm. This is not a huge problem
when we're not using softpin - drm_mm is creating an upper bound on used
range by causing addresses for our VMAs to eventually be reused.
With softpin, long lived contexts can drain the system out of memory
even with a single "small" object. For example:
bo = bo_alloc(size);
while(true)
offset += size;
exec(bo, offset);
Will cause us to create new allocations until all memory in the system
is used for tracking GPU pages (even though almost all PTEs in this vm
are pointing to scratch).
Let's free unused page tables in clear_range to prevent this - if no
entries are used, we can safely free it and return this information to
the caller (so that higher-level entry is pointing to scratch).
v2: Document return value and free semantics (Joonas)
Cc: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Cc: Michel Thierry <michel.thierry@intel.com>
Cc: Mika Kuoppala <mika.kuoppala@intel.com>
Signed-off-by: Michał Winiarski <michal.winiarski@intel.com>
---
drivers/gpu/drm/i915/i915_gem_gtt.c | 89 ++++++++++++++++++++++++++++++++++---
1 file changed, 82 insertions(+), 7 deletions(-)
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
index adabf58..7850094 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -704,7 +704,9 @@ static int gen8_48b_mm_switch(struct i915_hw_ppgtt *ppgtt,
return gen8_write_pdp(req, 0, px_dma(&ppgtt->pml4));
}
-static void gen8_ppgtt_clear_pt(struct i915_address_space *vm,
+/* Removes entries from a single page table, releasing it if it's empty.
+ * Caller can use the return value to update higher-level entries */
+static bool gen8_ppgtt_clear_pt(struct i915_address_space *vm,
struct i915_page_table *pt,
uint64_t start,
uint64_t length)
@@ -719,63 +721,136 @@ static void gen8_ppgtt_clear_pt(struct i915_address_space *vm,
I915_CACHE_LLC);
if (WARN_ON(!px_page(pt)))
- return;
+ return false;
bitmap_clear(pt->used_ptes, pte_start, num_entries);
+ if (bitmap_empty(pt->used_ptes, GEN8_PTES)) {
+ free_pt(vm->dev, pt);
+ return true;
+ }
+
pt_vaddr = kmap_px(pt);
for (pte = pte_start; pte < num_entries; pte++)
pt_vaddr[pte] = scratch_pte;
kunmap_px(ppgtt, pt_vaddr);
+
+ return false;
}
-static void gen8_ppgtt_clear_pd(struct i915_address_space *vm,
+/* Removes entries from a single page dir, releasing it if it's empty.
+ * Caller can use the return value to update higher-level entries
+ */
+static bool gen8_ppgtt_clear_pd(struct i915_address_space *vm,
struct i915_page_directory *pd,
uint64_t start,
uint64_t length)
{
+ struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
+
struct i915_page_table *pt;
uint64_t pde;
+ gen8_pde_t *pde_vaddr;
+ gen8_pde_t scratch_pde = gen8_pde_encode(px_dma(vm->scratch_pt),
+ I915_CACHE_LLC);
+ bool reduce;
+
gen8_for_each_pde(pt, pd, start, length, pde) {
if (WARN_ON(!pd->page_table[pde]))
break;
- gen8_ppgtt_clear_pt(vm, pt, start, length);
+ reduce = gen8_ppgtt_clear_pt(vm, pt, start, length);
+
+ if (reduce) {
+ __clear_bit(pde, pd->used_pdes);
+ pde_vaddr = kmap_px(pd);
+ pde_vaddr[pde] = scratch_pde;
+ kunmap_px(ppgtt, pde_vaddr);
+ }
}
+
+ if (bitmap_empty(pd->used_pdes, I915_PDES)) {
+ free_pd(vm->dev, pd);
+ return true;
+ }
+
+ return false;
}
-static void gen8_ppgtt_clear_pdp(struct i915_address_space *vm,
+/* Removes entries from a single page dir pointer, releasing it if it's empty.
+ * Caller can use the return value to update higher-level entries
+ */
+static bool gen8_ppgtt_clear_pdp(struct i915_address_space *vm,
struct i915_page_directory_pointer *pdp,
uint64_t start,
uint64_t length)
{
+ struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
+
struct i915_page_directory *pd;
uint64_t pdpe;
+ gen8_ppgtt_pdpe_t *pdpe_vaddr;
+ gen8_ppgtt_pdpe_t scratch_pdpe =
+ gen8_pdpe_encode(px_dma(vm->scratch_pd), I915_CACHE_LLC);
+ bool reduce;
+
gen8_for_each_pdpe(pd, pdp, start, length, pdpe) {
if (WARN_ON(!pdp->page_directory[pdpe]))
break;
- gen8_ppgtt_clear_pd(vm, pd, start, length);
+ reduce = gen8_ppgtt_clear_pd(vm, pd, start, length);
+
+ if (reduce) {
+ __clear_bit(pdpe, pdp->used_pdpes);
+ pdpe_vaddr = kmap_px(pdp);
+ pdpe_vaddr[pdpe] = scratch_pdpe;
+ kunmap_px(ppgtt, pdpe_vaddr);
+ }
+ }
+
+ if (bitmap_empty(pdp->used_pdpes, I915_PDPES_PER_PDP(vm->dev))) {
+ free_pdp(vm->dev, pdp);
+ return true;
}
+
+ return false;
}
+/* Removes entries from a single pml4.
+ * This is the top-level structure in 4-level page tables used on gen8+.
+ * Empty entries are always scratch pml4e.
+ */
static void gen8_ppgtt_clear_pml4(struct i915_address_space *vm,
struct i915_pml4 *pml4,
uint64_t start,
uint64_t length)
{
+ struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
+
struct i915_page_directory_pointer *pdp;
uint64_t pml4e;
+ gen8_ppgtt_pml4e_t *pml4e_vaddr;
+ gen8_ppgtt_pml4e_t scratch_pml4e =
+ gen8_pml4e_encode(px_dma(vm->scratch_pdp), I915_CACHE_LLC);
+ bool reduce;
+
gen8_for_each_pml4e(pdp, pml4, start, length, pml4e) {
if (WARN_ON(!pml4->pdps[pml4e]))
break;
- gen8_ppgtt_clear_pdp(vm, pdp, start, length);
+ reduce = gen8_ppgtt_clear_pdp(vm, pdp, start, length);
+
+ if (reduce) {
+ __clear_bit(pml4e, pml4->used_pml4es);
+ pml4e_vaddr = kmap_px(pml4);
+ pml4e_vaddr[pml4e] = scratch_pml4e;
+ kunmap_px(ppgtt, pml4e_vaddr);
+ }
}
}
--
2.7.4
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply related [flat|nested] 9+ messages in thread
* [PATCH v3 3/3] drm/i915/gtt: Free unused lower-level page tables
2016-10-07 13:17 ` [PATCH v2 3/3] drm/i915/gtt: Free unused lower-level page tables Michał Winiarski
@ 2016-10-07 18:39 ` Michał Winiarski
2016-10-10 12:26 ` Chris Wilson
2016-10-10 13:39 ` Joonas Lahtinen
0 siblings, 2 replies; 9+ messages in thread
From: Michał Winiarski @ 2016-10-07 18:39 UTC (permalink / raw)
To: intel-gfx; +Cc: Mika Kuoppala
Since "Dynamic page table allocations" were introduced, our page tables
can grow (being dynamically allocated) with address space range usage.
Unfortunately, their lifetime is bound to vm. This is not a huge problem
when we're not using softpin - drm_mm is creating an upper bound on used
range by causing addresses for our VMAs to eventually be reused.
With softpin, long lived contexts can drain the system out of memory
even with a single "small" object. For example:
bo = bo_alloc(size);
while(true)
offset += size;
exec(bo, offset);
Will cause us to create new allocations until all memory in the system
is used for tracking GPU pages (even though almost all PTEs in this vm
are pointing to scratch).
Let's free unused page tables in clear_range to prevent this - if no
entries are used, we can safely free it and return this information to
the caller (so that higher-level entry is pointing to scratch).
v2: Document return value and free semantics (Joonas)
v3: No newlines in vars block (Joonas)
Cc: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Cc: Michel Thierry <michel.thierry@intel.com>
Cc: Mika Kuoppala <mika.kuoppala@intel.com>
Signed-off-by: Michał Winiarski <michal.winiarski@intel.com>
---
drivers/gpu/drm/i915/i915_gem_gtt.c | 84 +++++++++++++++++++++++++++++++++----
1 file changed, 76 insertions(+), 8 deletions(-)
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
index adabf58..0e096b2 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -704,13 +704,14 @@ static int gen8_48b_mm_switch(struct i915_hw_ppgtt *ppgtt,
return gen8_write_pdp(req, 0, px_dma(&ppgtt->pml4));
}
-static void gen8_ppgtt_clear_pt(struct i915_address_space *vm,
+/* Removes entries from a single page table, releasing it if it's empty.
+ * Caller can use the return value to update higher-level entries */
+static bool gen8_ppgtt_clear_pt(struct i915_address_space *vm,
struct i915_page_table *pt,
uint64_t start,
uint64_t length)
{
struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
-
unsigned int pte_start = gen8_pte_index(start);
unsigned int num_entries = gen8_pte_count(start, length);
uint64_t pte;
@@ -719,63 +720,130 @@ static void gen8_ppgtt_clear_pt(struct i915_address_space *vm,
I915_CACHE_LLC);
if (WARN_ON(!px_page(pt)))
- return;
+ return false;
bitmap_clear(pt->used_ptes, pte_start, num_entries);
+ if (bitmap_empty(pt->used_ptes, GEN8_PTES)) {
+ free_pt(vm->dev, pt);
+ return true;
+ }
+
pt_vaddr = kmap_px(pt);
for (pte = pte_start; pte < num_entries; pte++)
pt_vaddr[pte] = scratch_pte;
kunmap_px(ppgtt, pt_vaddr);
+
+ return false;
}
-static void gen8_ppgtt_clear_pd(struct i915_address_space *vm,
+/* Removes entries from a single page dir, releasing it if it's empty.
+ * Caller can use the return value to update higher-level entries
+ */
+static bool gen8_ppgtt_clear_pd(struct i915_address_space *vm,
struct i915_page_directory *pd,
uint64_t start,
uint64_t length)
{
+ struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
struct i915_page_table *pt;
uint64_t pde;
+ gen8_pde_t *pde_vaddr;
+ gen8_pde_t scratch_pde = gen8_pde_encode(px_dma(vm->scratch_pt),
+ I915_CACHE_LLC);
+ bool reduce;
gen8_for_each_pde(pt, pd, start, length, pde) {
if (WARN_ON(!pd->page_table[pde]))
break;
- gen8_ppgtt_clear_pt(vm, pt, start, length);
+ reduce = gen8_ppgtt_clear_pt(vm, pt, start, length);
+
+ if (reduce) {
+ __clear_bit(pde, pd->used_pdes);
+ pde_vaddr = kmap_px(pd);
+ pde_vaddr[pde] = scratch_pde;
+ kunmap_px(ppgtt, pde_vaddr);
+ }
+ }
+
+ if (bitmap_empty(pd->used_pdes, I915_PDES)) {
+ free_pd(vm->dev, pd);
+ return true;
}
+
+ return false;
}
-static void gen8_ppgtt_clear_pdp(struct i915_address_space *vm,
+/* Removes entries from a single page dir pointer, releasing it if it's empty.
+ * Caller can use the return value to update higher-level entries
+ */
+static bool gen8_ppgtt_clear_pdp(struct i915_address_space *vm,
struct i915_page_directory_pointer *pdp,
uint64_t start,
uint64_t length)
{
+ struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
struct i915_page_directory *pd;
uint64_t pdpe;
+ gen8_ppgtt_pdpe_t *pdpe_vaddr;
+ gen8_ppgtt_pdpe_t scratch_pdpe =
+ gen8_pdpe_encode(px_dma(vm->scratch_pd), I915_CACHE_LLC);
+ bool reduce;
gen8_for_each_pdpe(pd, pdp, start, length, pdpe) {
if (WARN_ON(!pdp->page_directory[pdpe]))
break;
- gen8_ppgtt_clear_pd(vm, pd, start, length);
+ reduce = gen8_ppgtt_clear_pd(vm, pd, start, length);
+
+ if (reduce) {
+ __clear_bit(pdpe, pdp->used_pdpes);
+ pdpe_vaddr = kmap_px(pdp);
+ pdpe_vaddr[pdpe] = scratch_pdpe;
+ kunmap_px(ppgtt, pdpe_vaddr);
+ }
+ }
+
+ if (bitmap_empty(pdp->used_pdpes, I915_PDPES_PER_PDP(vm->dev))) {
+ free_pdp(vm->dev, pdp);
+ return true;
}
+
+ return false;
}
+/* Removes entries from a single pml4.
+ * This is the top-level structure in 4-level page tables used on gen8+.
+ * Empty entries are always scratch pml4e.
+ */
static void gen8_ppgtt_clear_pml4(struct i915_address_space *vm,
struct i915_pml4 *pml4,
uint64_t start,
uint64_t length)
{
+ struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
struct i915_page_directory_pointer *pdp;
uint64_t pml4e;
+ gen8_ppgtt_pml4e_t *pml4e_vaddr;
+ gen8_ppgtt_pml4e_t scratch_pml4e =
+ gen8_pml4e_encode(px_dma(vm->scratch_pdp), I915_CACHE_LLC);
+ bool reduce;
gen8_for_each_pml4e(pdp, pml4, start, length, pml4e) {
if (WARN_ON(!pml4->pdps[pml4e]))
break;
- gen8_ppgtt_clear_pdp(vm, pdp, start, length);
+ reduce = gen8_ppgtt_clear_pdp(vm, pdp, start, length);
+
+ if (reduce) {
+ __clear_bit(pml4e, pml4->used_pml4es);
+ pml4e_vaddr = kmap_px(pml4);
+ pml4e_vaddr[pml4e] = scratch_pml4e;
+ kunmap_px(ppgtt, pml4e_vaddr);
+ }
}
}
--
2.7.4
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply related [flat|nested] 9+ messages in thread
* ✗ Fi.CI.BAT: warning for series starting with [v2,1/3] drm/i915: Remove unused "valid" parameter from pte_encode (rev2)
2016-10-07 13:17 [PATCH v2 1/3] drm/i915: Remove unused "valid" parameter from pte_encode Michał Winiarski
2016-10-07 13:17 ` [PATCH v2 2/3] drm/i915/gtt: Split gen8_ppgtt_clear_pte_range Michał Winiarski
2016-10-07 13:17 ` [PATCH v2 3/3] drm/i915/gtt: Free unused lower-level page tables Michał Winiarski
@ 2016-10-10 12:14 ` Patchwork
2 siblings, 0 replies; 9+ messages in thread
From: Patchwork @ 2016-10-10 12:14 UTC (permalink / raw)
To: Michał Winiarski; +Cc: intel-gfx
== Series Details ==
Series: series starting with [v2,1/3] drm/i915: Remove unused "valid" parameter from pte_encode (rev2)
URL : https://patchwork.freedesktop.org/series/13444/
State : warning
== Summary ==
Series 13444v2 Series without cover letter
https://patchwork.freedesktop.org/api/1.0/series/13444/revisions/2/mbox/
Test drv_module_reload_basic:
pass -> SKIP (fi-skl-6260u)
Test kms_pipe_crc_basic:
Subgroup suspend-read-crc-pipe-a:
pass -> DMESG-WARN (fi-byt-j1900)
Test vgem_basic:
Subgroup unload:
pass -> SKIP (fi-skl-6770hq)
pass -> SKIP (fi-skl-6260u)
pass -> SKIP (fi-bxt-t5700)
pass -> SKIP (fi-skl-6700hq)
fi-bdw-5557u total:248 pass:231 dwarn:0 dfail:0 fail:0 skip:17
fi-bxt-t5700 total:248 pass:216 dwarn:0 dfail:0 fail:0 skip:32
fi-byt-j1900 total:248 pass:213 dwarn:2 dfail:0 fail:1 skip:32
fi-byt-n2820 total:248 pass:210 dwarn:0 dfail:0 fail:1 skip:37
fi-hsw-4770 total:248 pass:224 dwarn:0 dfail:0 fail:0 skip:24
fi-hsw-4770r total:248 pass:224 dwarn:0 dfail:0 fail:0 skip:24
fi-ivb-3520m total:248 pass:221 dwarn:0 dfail:0 fail:0 skip:27
fi-ivb-3770 total:248 pass:207 dwarn:0 dfail:0 fail:0 skip:41
fi-kbl-7200u total:248 pass:222 dwarn:0 dfail:0 fail:0 skip:26
fi-skl-6260u total:248 pass:231 dwarn:0 dfail:0 fail:0 skip:17
fi-skl-6700hq total:248 pass:223 dwarn:1 dfail:0 fail:0 skip:24
fi-skl-6700k total:248 pass:221 dwarn:1 dfail:0 fail:0 skip:26
fi-skl-6770hq total:248 pass:230 dwarn:1 dfail:0 fail:1 skip:16
fi-snb-2520m total:248 pass:211 dwarn:0 dfail:0 fail:0 skip:37
fi-snb-2600 total:248 pass:209 dwarn:0 dfail:0 fail:0 skip:39
Results at /archive/results/CI_IGT_test/Patchwork_2651/
f35ed31aea66b3230c366fcba5f3456ae2cb956e drm-intel-nightly: 2016y-10m-10d-11h-28m-51s UTC integration manifest
b3114dc drm/i915/gtt: Free unused lower-level page tables
a7480f3 drm/i915/gtt: Split gen8_ppgtt_clear_pte_range
c151f15 drm/i915: Remove unused "valid" parameter from pte_encode
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [PATCH v3 3/3] drm/i915/gtt: Free unused lower-level page tables
2016-10-07 18:39 ` [PATCH v3 " Michał Winiarski
@ 2016-10-10 12:26 ` Chris Wilson
2016-10-10 13:39 ` Joonas Lahtinen
1 sibling, 0 replies; 9+ messages in thread
From: Chris Wilson @ 2016-10-10 12:26 UTC (permalink / raw)
To: Michał Winiarski; +Cc: intel-gfx, Mika Kuoppala
On Fri, Oct 07, 2016 at 08:39:22PM +0200, Michał Winiarski wrote:
> Since "Dynamic page table allocations" were introduced, our page tables
> can grow (being dynamically allocated) with address space range usage.
> Unfortunately, their lifetime is bound to vm. This is not a huge problem
> when we're not using softpin - drm_mm is creating an upper bound on used
> range by causing addresses for our VMAs to eventually be reused.
>
> With softpin, long lived contexts can drain the system out of memory
> even with a single "small" object. For example:
>
> bo = bo_alloc(size);
> while(true)
> offset += size;
> exec(bo, offset);
>
> Will cause us to create new allocations until all memory in the system
> is used for tracking GPU pages (even though almost all PTEs in this vm
> are pointing to scratch).
>
> Let's free unused page tables in clear_range to prevent this - if no
> entries are used, we can safely free it and return this information to
> the caller (so that higher-level entry is pointing to scratch).
>
> v2: Document return value and free semantics (Joonas)
> v3: No newlines in vars block (Joonas)
>
> Cc: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
> Cc: Michel Thierry <michel.thierry@intel.com>
> Cc: Mika Kuoppala <mika.kuoppala@intel.com>
> Signed-off-by: Michał Winiarski <michal.winiarski@intel.com>
Aside from not liking the bitmaps at all (yay for the extra memory
pressure for no purpose),
Reviewed-by: Chris wilson <chris@chris-wilson.co.uk>
(and for the earlier patches)
> gen8_for_each_pde(pt, pd, start, length, pde) {
> if (WARN_ON(!pd->page_table[pde]))
> break;
>
> - gen8_ppgtt_clear_pt(vm, pt, start, length);
> + reduce = gen8_ppgtt_clear_pt(vm, pt, start, length);
> +
> + if (reduce) {
I would not have put a newline here as the if() is coupled to the
clear_pte() (and I would have just used if (clear_pte())).
-Chris
--
Chris Wilson, Intel Open Source Technology Centre
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [PATCH v2 2/3] drm/i915/gtt: Split gen8_ppgtt_clear_pte_range
2016-10-07 13:17 ` [PATCH v2 2/3] drm/i915/gtt: Split gen8_ppgtt_clear_pte_range Michał Winiarski
@ 2016-10-10 13:12 ` Mika Kuoppala
2016-10-10 13:37 ` Joonas Lahtinen
1 sibling, 0 replies; 9+ messages in thread
From: Mika Kuoppala @ 2016-10-10 13:12 UTC (permalink / raw)
To: Michał Winiarski, intel-gfx
Michał Winiarski <michal.winiarski@intel.com> writes:
> Let's use more top-down approach, where each gen8_ppgtt_clear_* function
> is responsible for clearing the struct passed as an argument and calling
> relevant clear_range functions on lower-level tables.
> Doing this rather than operating on PTE ranges makes the implementation
> of shrinking page tables quite simple.
>
> v2: Drop min when calculating num_entries, no negation in 48b ppgtt
> check, no newlines in vars block (Joonas)
>
> Cc: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
> Cc: Michel Thierry <michel.thierry@intel.com>
> Cc: Mika Kuoppala <mika.kuoppala@intel.com>
> Signed-off-by: Michał Winiarski <michal.winiarski@intel.com>
Reviewed-by: Mika Kuoppala <mika.kuoppala@intel.com>
> ---
> drivers/gpu/drm/i915/i915_gem_gtt.c | 107 +++++++++++++++++++-----------------
> 1 file changed, 58 insertions(+), 49 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
> index 08e2f35..adabf58 100644
> --- a/drivers/gpu/drm/i915/i915_gem_gtt.c
> +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
> @@ -704,59 +704,78 @@ static int gen8_48b_mm_switch(struct i915_hw_ppgtt *ppgtt,
> return gen8_write_pdp(req, 0, px_dma(&ppgtt->pml4));
> }
>
> -static void gen8_ppgtt_clear_pte_range(struct i915_address_space *vm,
> - struct i915_page_directory_pointer *pdp,
> - uint64_t start,
> - uint64_t length,
> - gen8_pte_t scratch_pte)
> +static void gen8_ppgtt_clear_pt(struct i915_address_space *vm,
> + struct i915_page_table *pt,
> + uint64_t start,
> + uint64_t length)
> {
> struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
> +
> + unsigned int pte_start = gen8_pte_index(start);
> + unsigned int num_entries = gen8_pte_count(start, length);
> + uint64_t pte;
> gen8_pte_t *pt_vaddr;
> - unsigned pdpe = gen8_pdpe_index(start);
> - unsigned pde = gen8_pde_index(start);
> - unsigned pte = gen8_pte_index(start);
> - unsigned num_entries = length >> PAGE_SHIFT;
> - unsigned last_pte, i;
> + gen8_pte_t scratch_pte = gen8_pte_encode(vm->scratch_page.daddr,
> + I915_CACHE_LLC);
>
> - if (WARN_ON(!pdp))
> + if (WARN_ON(!px_page(pt)))
> return;
>
> - while (num_entries) {
> - struct i915_page_directory *pd;
> - struct i915_page_table *pt;
> + bitmap_clear(pt->used_ptes, pte_start, num_entries);
>
> - if (WARN_ON(!pdp->page_directory[pdpe]))
> - break;
> + pt_vaddr = kmap_px(pt);
> +
> + for (pte = pte_start; pte < num_entries; pte++)
> + pt_vaddr[pte] = scratch_pte;
>
> - pd = pdp->page_directory[pdpe];
> + kunmap_px(ppgtt, pt_vaddr);
> +}
> +
> +static void gen8_ppgtt_clear_pd(struct i915_address_space *vm,
> + struct i915_page_directory *pd,
> + uint64_t start,
> + uint64_t length)
> +{
> + struct i915_page_table *pt;
> + uint64_t pde;
>
> + gen8_for_each_pde(pt, pd, start, length, pde) {
> if (WARN_ON(!pd->page_table[pde]))
> break;
>
> - pt = pd->page_table[pde];
> + gen8_ppgtt_clear_pt(vm, pt, start, length);
> + }
> +}
>
> - if (WARN_ON(!px_page(pt)))
> - break;
> +static void gen8_ppgtt_clear_pdp(struct i915_address_space *vm,
> + struct i915_page_directory_pointer *pdp,
> + uint64_t start,
> + uint64_t length)
> +{
> + struct i915_page_directory *pd;
> + uint64_t pdpe;
>
> - last_pte = pte + num_entries;
> - if (last_pte > GEN8_PTES)
> - last_pte = GEN8_PTES;
> + gen8_for_each_pdpe(pd, pdp, start, length, pdpe) {
> + if (WARN_ON(!pdp->page_directory[pdpe]))
> + break;
>
> - pt_vaddr = kmap_px(pt);
> + gen8_ppgtt_clear_pd(vm, pd, start, length);
> + }
> +}
>
> - for (i = pte; i < last_pte; i++) {
> - pt_vaddr[i] = scratch_pte;
> - num_entries--;
> - }
> +static void gen8_ppgtt_clear_pml4(struct i915_address_space *vm,
> + struct i915_pml4 *pml4,
> + uint64_t start,
> + uint64_t length)
> +{
> + struct i915_page_directory_pointer *pdp;
> + uint64_t pml4e;
>
> - kunmap_px(ppgtt, pt_vaddr);
> + gen8_for_each_pml4e(pdp, pml4, start, length, pml4e) {
> + if (WARN_ON(!pml4->pdps[pml4e]))
> + break;
>
> - pte = 0;
> - if (++pde == I915_PDES) {
> - if (++pdpe == I915_PDPES_PER_PDP(vm->dev))
> - break;
> - pde = 0;
> - }
> + gen8_ppgtt_clear_pdp(vm, pdp, start, length);
> }
> }
>
> @@ -764,21 +783,11 @@ static void gen8_ppgtt_clear_range(struct i915_address_space *vm,
> uint64_t start, uint64_t length)
> {
> struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
> - gen8_pte_t scratch_pte = gen8_pte_encode(vm->scratch_page.daddr,
> - I915_CACHE_LLC);
>
> - if (!USES_FULL_48BIT_PPGTT(vm->dev)) {
> - gen8_ppgtt_clear_pte_range(vm, &ppgtt->pdp, start, length,
> - scratch_pte);
> - } else {
> - uint64_t pml4e;
> - struct i915_page_directory_pointer *pdp;
> -
> - gen8_for_each_pml4e(pdp, &ppgtt->pml4, start, length, pml4e) {
> - gen8_ppgtt_clear_pte_range(vm, pdp, start, length,
> - scratch_pte);
> - }
> - }
> + if (USES_FULL_48BIT_PPGTT(vm->dev))
> + gen8_ppgtt_clear_pml4(vm, &ppgtt->pml4, start, length);
> + else
> + gen8_ppgtt_clear_pdp(vm, &ppgtt->pdp, start, length);
> }
>
> static void
> --
> 2.7.4
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [PATCH v2 2/3] drm/i915/gtt: Split gen8_ppgtt_clear_pte_range
2016-10-07 13:17 ` [PATCH v2 2/3] drm/i915/gtt: Split gen8_ppgtt_clear_pte_range Michał Winiarski
2016-10-10 13:12 ` Mika Kuoppala
@ 2016-10-10 13:37 ` Joonas Lahtinen
1 sibling, 0 replies; 9+ messages in thread
From: Joonas Lahtinen @ 2016-10-10 13:37 UTC (permalink / raw)
To: Michał Winiarski, intel-gfx; +Cc: Mika Kuoppala
On pe, 2016-10-07 at 15:17 +0200, Michał Winiarski wrote:
> Let's use more top-down approach, where each gen8_ppgtt_clear_* function
> is responsible for clearing the struct passed as an argument and calling
> relevant clear_range functions on lower-level tables.
> Doing this rather than operating on PTE ranges makes the implementation
> of shrinking page tables quite simple.
>
> v2: Drop min when calculating num_entries, no negation in 48b ppgtt
> check, no newlines in vars block (Joonas)
>
> Cc: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
> Cc: Michel Thierry <michel.thierry@intel.com>
> Cc: Mika Kuoppala <mika.kuoppala@intel.com>
> Signed-off-by: Michał Winiarski <michal.winiarski@intel.com>
Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Regards, Joonas
--
Joonas Lahtinen
Open Source Technology Center
Intel Corporation
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [PATCH v3 3/3] drm/i915/gtt: Free unused lower-level page tables
2016-10-07 18:39 ` [PATCH v3 " Michał Winiarski
2016-10-10 12:26 ` Chris Wilson
@ 2016-10-10 13:39 ` Joonas Lahtinen
1 sibling, 0 replies; 9+ messages in thread
From: Joonas Lahtinen @ 2016-10-10 13:39 UTC (permalink / raw)
To: Michał Winiarski, intel-gfx; +Cc: Mika Kuoppala
On pe, 2016-10-07 at 20:39 +0200, Michał Winiarski wrote:
> Since "Dynamic page table allocations" were introduced, our page tables
> can grow (being dynamically allocated) with address space range usage.
> Unfortunately, their lifetime is bound to vm. This is not a huge problem
> when we're not using softpin - drm_mm is creating an upper bound on used
> range by causing addresses for our VMAs to eventually be reused.
>
> With softpin, long lived contexts can drain the system out of memory
> even with a single "small" object. For example:
>
> bo = bo_alloc(size);
> while(true)
> offset += size;
> exec(bo, offset);
>
> Will cause us to create new allocations until all memory in the system
> is used for tracking GPU pages (even though almost all PTEs in this vm
> are pointing to scratch).
>
> Let's free unused page tables in clear_range to prevent this - if no
> entries are used, we can safely free it and return this information to
> the caller (so that higher-level entry is pointing to scratch).
>
> v2: Document return value and free semantics (Joonas)
> v3: No newlines in vars block (Joonas)
>
> Cc: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
> Cc: Michel Thierry <michel.thierry@intel.com>
> Cc: Mika Kuoppala <mika.kuoppala@intel.com>
> Signed-off-by: Michał Winiarski <michal.winiarski@intel.com>
Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Now that we're using Patchwork for CI, better always send the whole
series so that testing catches it properly.
Once the CI results are in and OK, can be merged.
Regards, Joonas
--
Joonas Lahtinen
Open Source Technology Center
Intel Corporation
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply [flat|nested] 9+ messages in thread
end of thread, other threads:[~2016-10-10 13:39 UTC | newest]
Thread overview: 9+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2016-10-07 13:17 [PATCH v2 1/3] drm/i915: Remove unused "valid" parameter from pte_encode Michał Winiarski
2016-10-07 13:17 ` [PATCH v2 2/3] drm/i915/gtt: Split gen8_ppgtt_clear_pte_range Michał Winiarski
2016-10-10 13:12 ` Mika Kuoppala
2016-10-10 13:37 ` Joonas Lahtinen
2016-10-07 13:17 ` [PATCH v2 3/3] drm/i915/gtt: Free unused lower-level page tables Michał Winiarski
2016-10-07 18:39 ` [PATCH v3 " Michał Winiarski
2016-10-10 12:26 ` Chris Wilson
2016-10-10 13:39 ` Joonas Lahtinen
2016-10-10 12:14 ` ✗ Fi.CI.BAT: warning for series starting with [v2,1/3] drm/i915: Remove unused "valid" parameter from pte_encode (rev2) Patchwork
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox