* [PATCH 1/2] drm/i915: Make PTE valid encoding optional @ 2013-10-16 16:18 Ben Widawsky 2013-10-16 16:18 ` [PATCH 2/2] drm/i915: Disable GGTT PTEs on GEN6+ suspend Ben Widawsky 2013-10-18 13:40 ` [PATCH 1/2] drm/i915: Make PTE valid encoding optional Daniel Vetter 0 siblings, 2 replies; 12+ messages in thread From: Ben Widawsky @ 2013-10-16 16:18 UTC (permalink / raw) To: Intel GFX; +Cc: Ben Widawsky, Ben Widawsky Signed-off-by: Ben Widawsky <ben@bwidawsk.net> --- drivers/gpu/drm/i915/i915_drv.h | 3 ++- drivers/gpu/drm/i915/i915_gem_gtt.c | 35 ++++++++++++++++++++--------------- 2 files changed, 22 insertions(+), 16 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 6106d3d..0cbeb0e 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -517,7 +517,8 @@ struct i915_address_space { /* FIXME: Need a more generic return type */ gen6_gtt_pte_t (*pte_encode)(dma_addr_t addr, - enum i915_cache_level level); + enum i915_cache_level level, + bool valid); /* Create a valid PTE */ void (*clear_range)(struct i915_address_space *vm, unsigned int first_entry, unsigned int num_entries); diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index e999496..81dce29 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -58,9 +58,10 @@ #define HSW_WT_ELLC_LLC_AGE0 HSW_CACHEABILITY_CONTROL(0x6) static gen6_gtt_pte_t snb_pte_encode(dma_addr_t addr, - enum i915_cache_level level) + enum i915_cache_level level, + bool valid) { - gen6_gtt_pte_t pte = GEN6_PTE_VALID; + gen6_gtt_pte_t pte = valid ? GEN6_PTE_VALID : 0; pte |= GEN6_PTE_ADDR_ENCODE(addr); switch (level) { @@ -79,9 +80,10 @@ static gen6_gtt_pte_t snb_pte_encode(dma_addr_t addr, } static gen6_gtt_pte_t ivb_pte_encode(dma_addr_t addr, - enum i915_cache_level level) + enum i915_cache_level level, + bool valid) { - gen6_gtt_pte_t pte = GEN6_PTE_VALID; + gen6_gtt_pte_t pte = valid ? GEN6_PTE_VALID : 0; pte |= GEN6_PTE_ADDR_ENCODE(addr); switch (level) { @@ -105,9 +107,10 @@ static gen6_gtt_pte_t ivb_pte_encode(dma_addr_t addr, #define BYT_PTE_SNOOPED_BY_CPU_CACHES (1 << 2) static gen6_gtt_pte_t byt_pte_encode(dma_addr_t addr, - enum i915_cache_level level) + enum i915_cache_level level, + bool valid) { - gen6_gtt_pte_t pte = GEN6_PTE_VALID; + gen6_gtt_pte_t pte = valid ? GEN6_PTE_VALID : 0; pte |= GEN6_PTE_ADDR_ENCODE(addr); /* Mark the page as writeable. Other platforms don't have a @@ -122,9 +125,10 @@ static gen6_gtt_pte_t byt_pte_encode(dma_addr_t addr, } static gen6_gtt_pte_t hsw_pte_encode(dma_addr_t addr, - enum i915_cache_level level) + enum i915_cache_level level, + bool valid) { - gen6_gtt_pte_t pte = GEN6_PTE_VALID; + gen6_gtt_pte_t pte = valid ? GEN6_PTE_VALID : 0; pte |= HSW_PTE_ADDR_ENCODE(addr); if (level != I915_CACHE_NONE) @@ -134,9 +138,10 @@ static gen6_gtt_pte_t hsw_pte_encode(dma_addr_t addr, } static gen6_gtt_pte_t iris_pte_encode(dma_addr_t addr, - enum i915_cache_level level) + enum i915_cache_level level, + bool valid) { - gen6_gtt_pte_t pte = GEN6_PTE_VALID; + gen6_gtt_pte_t pte = valid ? GEN6_PTE_VALID : 0; pte |= HSW_PTE_ADDR_ENCODE(addr); switch (level) { @@ -245,7 +250,7 @@ static void gen6_ppgtt_clear_range(struct i915_address_space *vm, unsigned first_pte = first_entry % I915_PPGTT_PT_ENTRIES; unsigned last_pte, i; - scratch_pte = vm->pte_encode(vm->scratch.addr, I915_CACHE_LLC); + scratch_pte = vm->pte_encode(vm->scratch.addr, I915_CACHE_LLC, true); while (num_entries) { last_pte = first_pte + num_entries; @@ -282,7 +287,7 @@ static void gen6_ppgtt_insert_entries(struct i915_address_space *vm, dma_addr_t page_addr; page_addr = sg_page_iter_dma_address(&sg_iter); - pt_vaddr[act_pte] = vm->pte_encode(page_addr, cache_level); + pt_vaddr[act_pte] = vm->pte_encode(page_addr, cache_level, true); if (++act_pte == I915_PPGTT_PT_ENTRIES) { kunmap_atomic(pt_vaddr); act_pt++; @@ -536,7 +541,7 @@ static void gen6_ggtt_insert_entries(struct i915_address_space *vm, for_each_sg_page(st->sgl, &sg_iter, st->nents, 0) { addr = sg_page_iter_dma_address(&sg_iter); - iowrite32(vm->pte_encode(addr, level), >t_entries[i]); + iowrite32(vm->pte_encode(addr, level, true), >t_entries[i]); i++; } @@ -548,7 +553,7 @@ static void gen6_ggtt_insert_entries(struct i915_address_space *vm, */ if (i != 0) WARN_ON(readl(>t_entries[i-1]) != - vm->pte_encode(addr, level)); + vm->pte_encode(addr, level, true)); /* This next bit makes the above posting read even more important. We * want to flush the TLBs only after we're certain all the PTE updates @@ -573,7 +578,7 @@ static void gen6_ggtt_clear_range(struct i915_address_space *vm, first_entry, num_entries, max_entries)) num_entries = max_entries; - scratch_pte = vm->pte_encode(vm->scratch.addr, I915_CACHE_LLC); + scratch_pte = vm->pte_encode(vm->scratch.addr, I915_CACHE_LLC, true); for (i = 0; i < num_entries; i++) iowrite32(scratch_pte, >t_base[i]); readl(gtt_base); -- 1.8.4 ^ permalink raw reply related [flat|nested] 12+ messages in thread
* [PATCH 2/2] drm/i915: Disable GGTT PTEs on GEN6+ suspend 2013-10-16 16:18 [PATCH 1/2] drm/i915: Make PTE valid encoding optional Ben Widawsky @ 2013-10-16 16:18 ` Ben Widawsky 2013-10-16 16:21 ` [PATCH 2/2] [v2] " Ben Widawsky 2013-10-18 13:40 ` [PATCH 1/2] drm/i915: Make PTE valid encoding optional Daniel Vetter 1 sibling, 1 reply; 12+ messages in thread From: Ben Widawsky @ 2013-10-16 16:18 UTC (permalink / raw) To: Intel GFX; +Cc: Ben Widawsky, Ben Widawsky Once the machine gets to a certain point in the suspend process, we expect the GPU to be idle. If it is not, we might corrupt memory. Empirically (with an early version of this patch) we have seen this is not the case. We cannot currently explain why the latent GPU writes occur. In the technical sense, this patch is a workaround in that we have an issue we can't explain, and the patch indirectly solves the issue. However, it's really better than a workaround because we understand why it works, and it really should be a safe thing to do in all cases. The noticeable effect other than the debug messages would be an increase in the suspend time. I have not measure how expensive it actually is. I think it would be good to spend further time to root cause why we're seeing these latent writes, but it shouldn't preclude preventing the fallout. NOTE: It should be safe (and makes some sense IMO) to also keep the VALID bit unset on resume when we clear_range(). I've opted not to do this as properly clearing those bits at some later point would be extra work. Bugzilla: http://bugs.freedesktop.org/6549://bugs.freedesktop.org/show_bug.cgi?id=65496 Bugzilla: https://bugzilla.kernel.org/attachment.cgi?id=110401 Tested-by: Takashi Iwai <tiwai@suse.de> Tested-by: Paulo Zanoni <paulo.r.zanoni@intel.com> Signed-off-by: Ben Widawsky <ben@bwidawsk.net> --- drivers/gpu/drm/i915/i915_drv.c | 5 ++- drivers/gpu/drm/i915/i915_drv.h | 5 ++- drivers/gpu/drm/i915/i915_gem_gtt.c | 76 ++++++++++++++++++++++++++++++++----- drivers/gpu/drm/i915/i915_reg.h | 4 ++ 4 files changed, 78 insertions(+), 12 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index 59649c0..ea90c5f 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -502,6 +502,8 @@ static int i915_drm_freeze(struct drm_device *dev) intel_modeset_suspend_hw(dev); } + i915_gem_suspend_gtt_mappings(dev); + i915_save_state(dev); intel_opregion_fini(dev); @@ -587,7 +589,8 @@ static int __i915_drm_thaw(struct drm_device *dev, bool restore_gtt_mappings) mutex_lock(&dev->struct_mutex); i915_gem_restore_gtt_mappings(dev); mutex_unlock(&dev->struct_mutex); - } + } else if (drm_core_check_feature(dev, DRIVER_MODESET)) + i915_check_and_clear_faults(dev); intel_init_power_well(dev); diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 0cbeb0e..7ca99350 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -521,7 +521,8 @@ struct i915_address_space { bool valid); /* Create a valid PTE */ void (*clear_range)(struct i915_address_space *vm, unsigned int first_entry, - unsigned int num_entries); + unsigned int num_entries, + bool use_scratch); void (*insert_entries)(struct i915_address_space *vm, struct sg_table *st, unsigned int first_entry, @@ -2143,6 +2144,8 @@ void i915_ppgtt_bind_object(struct i915_hw_ppgtt *ppgtt, void i915_ppgtt_unbind_object(struct i915_hw_ppgtt *ppgtt, struct drm_i915_gem_object *obj); +void i915_check_and_clear_faults(struct drm_device *dev); +void i915_gem_suspend_gtt_mappings(struct drm_device *dev); void i915_gem_restore_gtt_mappings(struct drm_device *dev); int __must_check i915_gem_gtt_prepare_object(struct drm_i915_gem_object *obj); void i915_gem_gtt_bind_object(struct drm_i915_gem_object *obj, diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index 81dce29..c4c42e7 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -241,7 +241,8 @@ static int gen6_ppgtt_enable(struct drm_device *dev) /* PPGTT support for Sandybdrige/Gen6 and later */ static void gen6_ppgtt_clear_range(struct i915_address_space *vm, unsigned first_entry, - unsigned num_entries) + unsigned num_entries, + bool use_scratch) { struct i915_hw_ppgtt *ppgtt = container_of(vm, struct i915_hw_ppgtt, base); @@ -372,7 +373,7 @@ static int gen6_ppgtt_init(struct i915_hw_ppgtt *ppgtt) } ppgtt->base.clear_range(&ppgtt->base, 0, - ppgtt->num_pd_entries * I915_PPGTT_PT_ENTRIES); + ppgtt->num_pd_entries * I915_PPGTT_PT_ENTRIES, true); ppgtt->pd_offset = first_pd_entry_in_global_pt * sizeof(gen6_gtt_pte_t); @@ -449,7 +450,8 @@ void i915_ppgtt_unbind_object(struct i915_hw_ppgtt *ppgtt, { ppgtt->base.clear_range(&ppgtt->base, i915_gem_obj_ggtt_offset(obj) >> PAGE_SHIFT, - obj->base.size >> PAGE_SHIFT); + obj->base.size >> PAGE_SHIFT, + true); } extern int intel_iommu_gfx_mapped; @@ -490,15 +492,65 @@ static void undo_idling(struct drm_i915_private *dev_priv, bool interruptible) dev_priv->mm.interruptible = interruptible; } +void i915_check_and_clear_faults(struct drm_device *dev) +{ + struct drm_i915_private *dev_priv = dev->dev_private; + struct intel_ring_buffer *ring; + int i; + + if (INTEL_INFO(dev)->gen < 6) + return; + + for_each_ring(ring, dev_priv, i) { + u32 fault_reg; + fault_reg = I915_READ(RING_FAULT_REG(ring)); + if (fault_reg & RING_FAULT_VALID) { + DRM_DEBUG_DRIVER("Unexpected fault\n" + "\tAddr: 0x%08lx\\n" + "\tAddress space: %s\n" + "\tSource ID: %d\n" + "\tType: %d\n", + fault_reg & PAGE_MASK, + fault_reg & RING_FAULT_GTTSEL_MASK ? "GGTT" : "PPGTT", + RING_FAULT_SRCID(fault_reg), + RING_FAULT_FAULT_TYPE(fault_reg)); + I915_WRITE(RING_FAULT_REG(ring), + fault_reg & ~RING_FAULT_VALID); + } + } + POSTING_READ(RING_FAULT_REG(&dev_priv->ring[RCS])); +} + +void i915_gem_suspend_gtt_mappings(struct drm_device *dev) +{ + struct drm_i915_private *dev_priv = dev->dev_private; + + /* Don't bother messing with faults pre GEN6 as we have little + * documentation supporting that it's a good idea. + */ + if (INTEL_INFO(dev)->gen < 6) + return; + + i915_check_and_clear_faults(dev); + + dev_priv->gtt.base.clear_range(&dev_priv->gtt.base, + dev_priv->gtt.base.start / PAGE_SIZE, + dev_priv->gtt.base.total / PAGE_SIZE, + false); +} + void i915_gem_restore_gtt_mappings(struct drm_device *dev) { struct drm_i915_private *dev_priv = dev->dev_private; struct drm_i915_gem_object *obj; + i915_check_and_clear_faults(dev); + /* First fill our portion of the GTT with scratch pages */ dev_priv->gtt.base.clear_range(&dev_priv->gtt.base, dev_priv->gtt.base.start / PAGE_SIZE, - dev_priv->gtt.base.total / PAGE_SIZE); + dev_priv->gtt.base.total / PAGE_SIZE, + true); list_for_each_entry(obj, &dev_priv->mm.bound_list, global_list) { i915_gem_clflush_object(obj, obj->pin_display); @@ -565,7 +617,8 @@ static void gen6_ggtt_insert_entries(struct i915_address_space *vm, static void gen6_ggtt_clear_range(struct i915_address_space *vm, unsigned int first_entry, - unsigned int num_entries) + unsigned int num_entries, + bool use_scratch) { struct drm_i915_private *dev_priv = vm->dev->dev_private; gen6_gtt_pte_t scratch_pte, __iomem *gtt_base = @@ -578,7 +631,8 @@ static void gen6_ggtt_clear_range(struct i915_address_space *vm, first_entry, num_entries, max_entries)) num_entries = max_entries; - scratch_pte = vm->pte_encode(vm->scratch.addr, I915_CACHE_LLC, true); + scratch_pte = vm->pte_encode(vm->scratch.addr, I915_CACHE_LLC, use_scratch); + for (i = 0; i < num_entries; i++) iowrite32(scratch_pte, >t_base[i]); readl(gtt_base); @@ -599,7 +653,8 @@ static void i915_ggtt_insert_entries(struct i915_address_space *vm, static void i915_ggtt_clear_range(struct i915_address_space *vm, unsigned int first_entry, - unsigned int num_entries) + unsigned int num_entries, + bool unused) { intel_gtt_clear_range(first_entry, num_entries); } @@ -627,7 +682,8 @@ void i915_gem_gtt_unbind_object(struct drm_i915_gem_object *obj) dev_priv->gtt.base.clear_range(&dev_priv->gtt.base, entry, - obj->base.size >> PAGE_SHIFT); + obj->base.size >> PAGE_SHIFT, + true); obj->has_global_gtt_mapping = 0; } @@ -714,11 +770,11 @@ void i915_gem_setup_global_gtt(struct drm_device *dev, const unsigned long count = (hole_end - hole_start) / PAGE_SIZE; DRM_DEBUG_KMS("clearing unused GTT space: [%lx, %lx]\n", hole_start, hole_end); - ggtt_vm->clear_range(ggtt_vm, hole_start / PAGE_SIZE, count); + ggtt_vm->clear_range(ggtt_vm, hole_start / PAGE_SIZE, count, true); } /* And finally clear the reserved guard page */ - ggtt_vm->clear_range(ggtt_vm, end / PAGE_SIZE - 1, 1); + ggtt_vm->clear_range(ggtt_vm, end / PAGE_SIZE - 1, 1, true); } static bool diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index 13153c3..f98f584 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -656,6 +656,10 @@ #define ARB_MODE_SWIZZLE_IVB (1<<5) #define RENDER_HWS_PGA_GEN7 (0x04080) #define RING_FAULT_REG(ring) (0x4094 + 0x100*(ring)->id) +#define RING_FAULT_GTTSEL_MASK (1<<11) +#define RING_FAULT_SRCID(x) ((x >> 3) & 0xff) +#define RING_FAULT_FAULT_TYPE(x) ((x >> 1) & 0x3) +#define RING_FAULT_VALID (1<<0) #define DONE_REG 0x40b0 #define BSD_HWS_PGA_GEN7 (0x04180) #define BLT_HWS_PGA_GEN7 (0x04280) -- 1.8.4 ^ permalink raw reply related [flat|nested] 12+ messages in thread
* [PATCH 2/2] [v2] drm/i915: Disable GGTT PTEs on GEN6+ suspend 2013-10-16 16:18 ` [PATCH 2/2] drm/i915: Disable GGTT PTEs on GEN6+ suspend Ben Widawsky @ 2013-10-16 16:21 ` Ben Widawsky 2013-10-16 16:58 ` Chris Wilson 2013-10-18 0:20 ` Todd Previte 0 siblings, 2 replies; 12+ messages in thread From: Ben Widawsky @ 2013-10-16 16:21 UTC (permalink / raw) To: Intel GFX; +Cc: Ben Widawsky, Ben Widawsky Once the machine gets to a certain point in the suspend process, we expect the GPU to be idle. If it is not, we might corrupt memory. Empirically (with an early version of this patch) we have seen this is not the case. We cannot currently explain why the latent GPU writes occur. In the technical sense, this patch is a workaround in that we have an issue we can't explain, and the patch indirectly solves the issue. However, it's really better than a workaround because we understand why it works, and it really should be a safe thing to do in all cases. The noticeable effect other than the debug messages would be an increase in the suspend time. I have not measure how expensive it actually is. I think it would be good to spend further time to root cause why we're seeing these latent writes, but it shouldn't preclude preventing the fallout. NOTE: It should be safe (and makes some sense IMO) to also keep the VALID bit unset on resume when we clear_range(). I've opted not to do this as properly clearing those bits at some later point would be extra work. v2: Fix bugzilla link Bugzilla: http://bugs.freedesktop.org/6549://bugs.freedesktop.org/show_bug.cgi?id=65496 Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=59321 Tested-by: Takashi Iwai <tiwai@suse.de> Tested-by: Paulo Zanoni <paulo.r.zanoni@intel.com> Signed-off-by: Ben Widawsky <ben@bwidawsk.net> --- drivers/gpu/drm/i915/i915_drv.c | 5 ++- drivers/gpu/drm/i915/i915_drv.h | 5 ++- drivers/gpu/drm/i915/i915_gem_gtt.c | 76 ++++++++++++++++++++++++++++++++----- drivers/gpu/drm/i915/i915_reg.h | 4 ++ 4 files changed, 78 insertions(+), 12 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index 59649c0..ea90c5f 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -502,6 +502,8 @@ static int i915_drm_freeze(struct drm_device *dev) intel_modeset_suspend_hw(dev); } + i915_gem_suspend_gtt_mappings(dev); + i915_save_state(dev); intel_opregion_fini(dev); @@ -587,7 +589,8 @@ static int __i915_drm_thaw(struct drm_device *dev, bool restore_gtt_mappings) mutex_lock(&dev->struct_mutex); i915_gem_restore_gtt_mappings(dev); mutex_unlock(&dev->struct_mutex); - } + } else if (drm_core_check_feature(dev, DRIVER_MODESET)) + i915_check_and_clear_faults(dev); intel_init_power_well(dev); diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 0cbeb0e..7ca99350 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -521,7 +521,8 @@ struct i915_address_space { bool valid); /* Create a valid PTE */ void (*clear_range)(struct i915_address_space *vm, unsigned int first_entry, - unsigned int num_entries); + unsigned int num_entries, + bool use_scratch); void (*insert_entries)(struct i915_address_space *vm, struct sg_table *st, unsigned int first_entry, @@ -2143,6 +2144,8 @@ void i915_ppgtt_bind_object(struct i915_hw_ppgtt *ppgtt, void i915_ppgtt_unbind_object(struct i915_hw_ppgtt *ppgtt, struct drm_i915_gem_object *obj); +void i915_check_and_clear_faults(struct drm_device *dev); +void i915_gem_suspend_gtt_mappings(struct drm_device *dev); void i915_gem_restore_gtt_mappings(struct drm_device *dev); int __must_check i915_gem_gtt_prepare_object(struct drm_i915_gem_object *obj); void i915_gem_gtt_bind_object(struct drm_i915_gem_object *obj, diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index 81dce29..c4c42e7 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -241,7 +241,8 @@ static int gen6_ppgtt_enable(struct drm_device *dev) /* PPGTT support for Sandybdrige/Gen6 and later */ static void gen6_ppgtt_clear_range(struct i915_address_space *vm, unsigned first_entry, - unsigned num_entries) + unsigned num_entries, + bool use_scratch) { struct i915_hw_ppgtt *ppgtt = container_of(vm, struct i915_hw_ppgtt, base); @@ -372,7 +373,7 @@ static int gen6_ppgtt_init(struct i915_hw_ppgtt *ppgtt) } ppgtt->base.clear_range(&ppgtt->base, 0, - ppgtt->num_pd_entries * I915_PPGTT_PT_ENTRIES); + ppgtt->num_pd_entries * I915_PPGTT_PT_ENTRIES, true); ppgtt->pd_offset = first_pd_entry_in_global_pt * sizeof(gen6_gtt_pte_t); @@ -449,7 +450,8 @@ void i915_ppgtt_unbind_object(struct i915_hw_ppgtt *ppgtt, { ppgtt->base.clear_range(&ppgtt->base, i915_gem_obj_ggtt_offset(obj) >> PAGE_SHIFT, - obj->base.size >> PAGE_SHIFT); + obj->base.size >> PAGE_SHIFT, + true); } extern int intel_iommu_gfx_mapped; @@ -490,15 +492,65 @@ static void undo_idling(struct drm_i915_private *dev_priv, bool interruptible) dev_priv->mm.interruptible = interruptible; } +void i915_check_and_clear_faults(struct drm_device *dev) +{ + struct drm_i915_private *dev_priv = dev->dev_private; + struct intel_ring_buffer *ring; + int i; + + if (INTEL_INFO(dev)->gen < 6) + return; + + for_each_ring(ring, dev_priv, i) { + u32 fault_reg; + fault_reg = I915_READ(RING_FAULT_REG(ring)); + if (fault_reg & RING_FAULT_VALID) { + DRM_DEBUG_DRIVER("Unexpected fault\n" + "\tAddr: 0x%08lx\\n" + "\tAddress space: %s\n" + "\tSource ID: %d\n" + "\tType: %d\n", + fault_reg & PAGE_MASK, + fault_reg & RING_FAULT_GTTSEL_MASK ? "GGTT" : "PPGTT", + RING_FAULT_SRCID(fault_reg), + RING_FAULT_FAULT_TYPE(fault_reg)); + I915_WRITE(RING_FAULT_REG(ring), + fault_reg & ~RING_FAULT_VALID); + } + } + POSTING_READ(RING_FAULT_REG(&dev_priv->ring[RCS])); +} + +void i915_gem_suspend_gtt_mappings(struct drm_device *dev) +{ + struct drm_i915_private *dev_priv = dev->dev_private; + + /* Don't bother messing with faults pre GEN6 as we have little + * documentation supporting that it's a good idea. + */ + if (INTEL_INFO(dev)->gen < 6) + return; + + i915_check_and_clear_faults(dev); + + dev_priv->gtt.base.clear_range(&dev_priv->gtt.base, + dev_priv->gtt.base.start / PAGE_SIZE, + dev_priv->gtt.base.total / PAGE_SIZE, + false); +} + void i915_gem_restore_gtt_mappings(struct drm_device *dev) { struct drm_i915_private *dev_priv = dev->dev_private; struct drm_i915_gem_object *obj; + i915_check_and_clear_faults(dev); + /* First fill our portion of the GTT with scratch pages */ dev_priv->gtt.base.clear_range(&dev_priv->gtt.base, dev_priv->gtt.base.start / PAGE_SIZE, - dev_priv->gtt.base.total / PAGE_SIZE); + dev_priv->gtt.base.total / PAGE_SIZE, + true); list_for_each_entry(obj, &dev_priv->mm.bound_list, global_list) { i915_gem_clflush_object(obj, obj->pin_display); @@ -565,7 +617,8 @@ static void gen6_ggtt_insert_entries(struct i915_address_space *vm, static void gen6_ggtt_clear_range(struct i915_address_space *vm, unsigned int first_entry, - unsigned int num_entries) + unsigned int num_entries, + bool use_scratch) { struct drm_i915_private *dev_priv = vm->dev->dev_private; gen6_gtt_pte_t scratch_pte, __iomem *gtt_base = @@ -578,7 +631,8 @@ static void gen6_ggtt_clear_range(struct i915_address_space *vm, first_entry, num_entries, max_entries)) num_entries = max_entries; - scratch_pte = vm->pte_encode(vm->scratch.addr, I915_CACHE_LLC, true); + scratch_pte = vm->pte_encode(vm->scratch.addr, I915_CACHE_LLC, use_scratch); + for (i = 0; i < num_entries; i++) iowrite32(scratch_pte, >t_base[i]); readl(gtt_base); @@ -599,7 +653,8 @@ static void i915_ggtt_insert_entries(struct i915_address_space *vm, static void i915_ggtt_clear_range(struct i915_address_space *vm, unsigned int first_entry, - unsigned int num_entries) + unsigned int num_entries, + bool unused) { intel_gtt_clear_range(first_entry, num_entries); } @@ -627,7 +682,8 @@ void i915_gem_gtt_unbind_object(struct drm_i915_gem_object *obj) dev_priv->gtt.base.clear_range(&dev_priv->gtt.base, entry, - obj->base.size >> PAGE_SHIFT); + obj->base.size >> PAGE_SHIFT, + true); obj->has_global_gtt_mapping = 0; } @@ -714,11 +770,11 @@ void i915_gem_setup_global_gtt(struct drm_device *dev, const unsigned long count = (hole_end - hole_start) / PAGE_SIZE; DRM_DEBUG_KMS("clearing unused GTT space: [%lx, %lx]\n", hole_start, hole_end); - ggtt_vm->clear_range(ggtt_vm, hole_start / PAGE_SIZE, count); + ggtt_vm->clear_range(ggtt_vm, hole_start / PAGE_SIZE, count, true); } /* And finally clear the reserved guard page */ - ggtt_vm->clear_range(ggtt_vm, end / PAGE_SIZE - 1, 1); + ggtt_vm->clear_range(ggtt_vm, end / PAGE_SIZE - 1, 1, true); } static bool diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index 13153c3..f98f584 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -656,6 +656,10 @@ #define ARB_MODE_SWIZZLE_IVB (1<<5) #define RENDER_HWS_PGA_GEN7 (0x04080) #define RING_FAULT_REG(ring) (0x4094 + 0x100*(ring)->id) +#define RING_FAULT_GTTSEL_MASK (1<<11) +#define RING_FAULT_SRCID(x) ((x >> 3) & 0xff) +#define RING_FAULT_FAULT_TYPE(x) ((x >> 1) & 0x3) +#define RING_FAULT_VALID (1<<0) #define DONE_REG 0x40b0 #define BSD_HWS_PGA_GEN7 (0x04180) #define BLT_HWS_PGA_GEN7 (0x04280) -- 1.8.4 ^ permalink raw reply related [flat|nested] 12+ messages in thread
* Re: [PATCH 2/2] [v2] drm/i915: Disable GGTT PTEs on GEN6+ suspend 2013-10-16 16:21 ` [PATCH 2/2] [v2] " Ben Widawsky @ 2013-10-16 16:58 ` Chris Wilson 2013-10-16 17:06 ` Ben Widawsky 2013-10-18 0:20 ` Todd Previte 1 sibling, 1 reply; 12+ messages in thread From: Chris Wilson @ 2013-10-16 16:58 UTC (permalink / raw) To: Ben Widawsky; +Cc: Intel GFX, Ben Widawsky On Wed, Oct 16, 2013 at 09:21:30AM -0700, Ben Widawsky wrote: > Once the machine gets to a certain point in the suspend process, we > expect the GPU to be idle. If it is not, we might corrupt memory. > Empirically (with an early version of this patch) we have seen this is > not the case. We cannot currently explain why the latent GPU writes > occur. > > In the technical sense, this patch is a workaround in that we have an > issue we can't explain, and the patch indirectly solves the issue. > However, it's really better than a workaround because we understand why > it works, and it really should be a safe thing to do in all cases. > > The noticeable effect other than the debug messages would be an increase > in the suspend time. I have not measure how expensive it actually is. > > I think it would be good to spend further time to root cause why we're > seeing these latent writes, but it shouldn't preclude preventing the > fallout. > > NOTE: It should be safe (and makes some sense IMO) to also keep the > VALID bit unset on resume when we clear_range(). I've opted not to do > this as properly clearing those bits at some later point would be extra > work. > > v2: Fix bugzilla link And the other one? > Bugzilla: http://bugs.freedesktop.org/6549://bugs.freedesktop.org/show_bug.cgi?id=65496 > Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=59321 > Tested-by: Takashi Iwai <tiwai@suse.de> > Tested-by: Paulo Zanoni <paulo.r.zanoni@intel.com> > Signed-off-by: Ben Widawsky <ben@bwidawsk.net> So clearing the valid bit should result in the GPU reporting errors for delayed accesses, but none were reported? -Chris -- Chris Wilson, Intel Open Source Technology Centre ^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [PATCH 2/2] [v2] drm/i915: Disable GGTT PTEs on GEN6+ suspend 2013-10-16 16:58 ` Chris Wilson @ 2013-10-16 17:06 ` Ben Widawsky 2013-10-16 17:27 ` Chris Wilson 2013-10-18 13:45 ` Daniel Vetter 0 siblings, 2 replies; 12+ messages in thread From: Ben Widawsky @ 2013-10-16 17:06 UTC (permalink / raw) To: Chris Wilson, Ben Widawsky, Intel GFX On Wed, Oct 16, 2013 at 05:58:31PM +0100, Chris Wilson wrote: > On Wed, Oct 16, 2013 at 09:21:30AM -0700, Ben Widawsky wrote: > > Once the machine gets to a certain point in the suspend process, we > > expect the GPU to be idle. If it is not, we might corrupt memory. > > Empirically (with an early version of this patch) we have seen this is > > not the case. We cannot currently explain why the latent GPU writes > > occur. > > > > In the technical sense, this patch is a workaround in that we have an > > issue we can't explain, and the patch indirectly solves the issue. > > However, it's really better than a workaround because we understand why > > it works, and it really should be a safe thing to do in all cases. > > > > The noticeable effect other than the debug messages would be an increase > > in the suspend time. I have not measure how expensive it actually is. > > > > I think it would be good to spend further time to root cause why we're > > seeing these latent writes, but it shouldn't preclude preventing the > > fallout. > > > > NOTE: It should be safe (and makes some sense IMO) to also keep the > > VALID bit unset on resume when we clear_range(). I've opted not to do > > this as properly clearing those bits at some later point would be extra > > work. > > > > v2: Fix bugzilla link > > And the other one? > I'm really amazing. If we move ahead with this patch, Daniel, can you just erase the extra bugs.freedesktop.org/6549:// > > Bugzilla: http://bugs.freedesktop.org/6549://bugs.freedesktop.org/show_bug.cgi?id=65496 Bugzilla: http://bugs.freedesktop.org/show_bug.cgi?id=65496 > > Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=59321 > > Tested-by: Takashi Iwai <tiwai@suse.de> > > Tested-by: Paulo Zanoni <paulo.r.zanoni@intel.com> > > Signed-off-by: Ben Widawsky <ben@bwidawsk.net> > > So clearing the valid bit should result in the GPU reporting errors for > delayed accesses, but none were reported? > -Chris > So I can't actually reproduce the problem for some reason. Paulo will need to answer. One theory is the fault information is lost on suspend. The original patch put faults both in suspend, and resume. After this, I asked Paulo to wedge the GPU, and there I saw faults. -- Ben Widawsky, Intel Open Source Technology Center ^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [PATCH 2/2] [v2] drm/i915: Disable GGTT PTEs on GEN6+ suspend 2013-10-16 17:06 ` Ben Widawsky @ 2013-10-16 17:27 ` Chris Wilson 2013-10-17 7:41 ` Takashi Iwai 2013-10-18 13:45 ` Daniel Vetter 1 sibling, 1 reply; 12+ messages in thread From: Chris Wilson @ 2013-10-16 17:27 UTC (permalink / raw) To: Ben Widawsky; +Cc: Intel GFX, Ben Widawsky On Wed, Oct 16, 2013 at 10:06:27AM -0700, Ben Widawsky wrote: > On Wed, Oct 16, 2013 at 05:58:31PM +0100, Chris Wilson wrote: > > So clearing the valid bit should result in the GPU reporting errors for > > delayed accesses, but none were reported? > > So I can't actually reproduce the problem for some reason. Paulo will > need to answer. One theory is the fault information is lost on suspend. > > The original patch put faults both in suspend, and resume. After this, I > asked Paulo to wedge the GPU, and there I saw faults. If we can capture the error, and it should be very possible to do so, we should be able to pinpoint the cause quite quickly. If it is just deferred writes, it should also be a problem across module unload - which should be easier for getting debug information out. -Chris -- Chris Wilson, Intel Open Source Technology Centre ^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [PATCH 2/2] [v2] drm/i915: Disable GGTT PTEs on GEN6+ suspend 2013-10-16 17:27 ` Chris Wilson @ 2013-10-17 7:41 ` Takashi Iwai 2013-10-17 9:24 ` Chris Wilson 0 siblings, 1 reply; 12+ messages in thread From: Takashi Iwai @ 2013-10-17 7:41 UTC (permalink / raw) To: Chris Wilson; +Cc: Ben Widawsky, Intel GFX, Ben Widawsky At Wed, 16 Oct 2013 18:27:33 +0100, Chris Wilson wrote: > > On Wed, Oct 16, 2013 at 10:06:27AM -0700, Ben Widawsky wrote: > > On Wed, Oct 16, 2013 at 05:58:31PM +0100, Chris Wilson wrote: > > > So clearing the valid bit should result in the GPU reporting errors for > > > delayed accesses, but none were reported? > > > > So I can't actually reproduce the problem for some reason. Paulo will > > need to answer. One theory is the fault information is lost on suspend. > > > > The original patch put faults both in suspend, and resume. After this, I > > asked Paulo to wedge the GPU, and there I saw faults. > > If we can capture the error, and it should be very possible to do so, we > should be able to pinpoint the cause quite quickly. If it is just deferred > writes, it should also be a problem across module unload - which should > be easier for getting debug information out. The bug is only about S4, thus it's not so easy to capture anything in the resume kernel, as all lost after transition to the restored kernel. BTW, I also suspect that the similar problem might still happen in other cases, e.g. via kexec even with this patch. thanks, Takashi ^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [PATCH 2/2] [v2] drm/i915: Disable GGTT PTEs on GEN6+ suspend 2013-10-17 7:41 ` Takashi Iwai @ 2013-10-17 9:24 ` Chris Wilson 2013-10-17 10:06 ` Takashi Iwai 0 siblings, 1 reply; 12+ messages in thread From: Chris Wilson @ 2013-10-17 9:24 UTC (permalink / raw) To: Takashi Iwai; +Cc: Ben Widawsky, Intel GFX, Ben Widawsky On Thu, Oct 17, 2013 at 09:41:09AM +0200, Takashi Iwai wrote: > At Wed, 16 Oct 2013 18:27:33 +0100, > Chris Wilson wrote: > > > > On Wed, Oct 16, 2013 at 10:06:27AM -0700, Ben Widawsky wrote: > > > On Wed, Oct 16, 2013 at 05:58:31PM +0100, Chris Wilson wrote: > > > > So clearing the valid bit should result in the GPU reporting errors for > > > > delayed accesses, but none were reported? > > > > > > So I can't actually reproduce the problem for some reason. Paulo will > > > need to answer. One theory is the fault information is lost on suspend. > > > > > > The original patch put faults both in suspend, and resume. After this, I > > > asked Paulo to wedge the GPU, and there I saw faults. > > > > If we can capture the error, and it should be very possible to do so, we > > should be able to pinpoint the cause quite quickly. If it is just deferred > > writes, it should also be a problem across module unload - which should > > be easier for getting debug information out. > > The bug is only about S4, thus it's not so easy to capture anything in > the resume kernel, as all lost after transition to the restored > kernel. > > BTW, I also suspect that the similar problem might still happen in > other cases, e.g. via kexec even with this patch. How are devices idled (or suspended) prior to hibernate resume or kexec? >From my reading, i915_drm_freeze() should be called before the resume image is executed. What we can do is to make the first action of i915_driver_unload() be i915_drm_freeze(), then clear the PTE valid bits and wait a second or two for a GPU fault before proceeding with an unload. By doing that we can debug our suspend paths - all that remains is the possibility of rogue hardware state. And that should show up by breaking module load. -Chris -- Chris Wilson, Intel Open Source Technology Centre ^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [PATCH 2/2] [v2] drm/i915: Disable GGTT PTEs on GEN6+ suspend 2013-10-17 9:24 ` Chris Wilson @ 2013-10-17 10:06 ` Takashi Iwai 0 siblings, 0 replies; 12+ messages in thread From: Takashi Iwai @ 2013-10-17 10:06 UTC (permalink / raw) To: Chris Wilson; +Cc: Ben Widawsky, Intel GFX, Ben Widawsky At Thu, 17 Oct 2013 10:24:07 +0100, Chris Wilson wrote: > > On Thu, Oct 17, 2013 at 09:41:09AM +0200, Takashi Iwai wrote: > > At Wed, 16 Oct 2013 18:27:33 +0100, > > Chris Wilson wrote: > > > > > > On Wed, Oct 16, 2013 at 10:06:27AM -0700, Ben Widawsky wrote: > > > > On Wed, Oct 16, 2013 at 05:58:31PM +0100, Chris Wilson wrote: > > > > > So clearing the valid bit should result in the GPU reporting errors for > > > > > delayed accesses, but none were reported? > > > > > > > > So I can't actually reproduce the problem for some reason. Paulo will > > > > need to answer. One theory is the fault information is lost on suspend. > > > > > > > > The original patch put faults both in suspend, and resume. After this, I > > > > asked Paulo to wedge the GPU, and there I saw faults. > > > > > > If we can capture the error, and it should be very possible to do so, we > > > should be able to pinpoint the cause quite quickly. If it is just deferred > > > writes, it should also be a problem across module unload - which should > > > be easier for getting debug information out. > > > > The bug is only about S4, thus it's not so easy to capture anything in > > the resume kernel, as all lost after transition to the restored > > kernel. > > > > BTW, I also suspect that the similar problem might still happen in > > other cases, e.g. via kexec even with this patch. > > How are devices idled (or suspended) prior to hibernate resume or kexec? > >From my reading, i915_drm_freeze() should be called before the resume > image is executed. I also didn't follow the complete (and complex) flow, but from my understanding, S4 case: hibernation_restore() in kernel/power/hibernate.c calls dpm_suspend_start(PMSG_QUIESCE), which invokes pm->freeze in the end. Since there is no pm->freeze_noirq, dpm_suspend_end(PMSG_QUIESCE) in resume_target_kernel() shouldn't matter. kexec case: it's usually shutdown ops called from kernel_restart_prepare() -> device_shutdown(). So, it's same as the normal shutdown. When KEXEC_PRESERVE_CONTEXT flag is set (where it works like suspend/resume), dpm_suspend_start(PMSG_FREEZE) will be called, which again invokes pm->freeze. i915 driver has no shutdown ops, and it's good so (we'd like to see the messages), but this means the device is still active at the normal kexec until the very latest stage, I'm afraid. > What we can do is to make the first action of > i915_driver_unload() be i915_drm_freeze(), then clear the PTE valid > bits and wait a second or two for a GPU fault before proceeding with an > unload. By doing that we can debug our suspend paths - all that remains > is the possibility of rogue hardware state. And that should show up by > breaking module load. Well, I somehow think the problem happens at transition to the restored image, where we have completely different memory maps from the boot kernel and this leads to memory corruption in /proc dcache or such. With unload / reload module case, the rest memory is preserved, thus it's a fairly different situation. Of course, I'm not against testing this at all. Just trying to understand what's going on... thanks, Takashi ^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [PATCH 2/2] [v2] drm/i915: Disable GGTT PTEs on GEN6+ suspend 2013-10-16 17:06 ` Ben Widawsky 2013-10-16 17:27 ` Chris Wilson @ 2013-10-18 13:45 ` Daniel Vetter 1 sibling, 0 replies; 12+ messages in thread From: Daniel Vetter @ 2013-10-18 13:45 UTC (permalink / raw) To: Ben Widawsky; +Cc: Intel GFX, Ben Widawsky On Wed, Oct 16, 2013 at 10:06:27AM -0700, Ben Widawsky wrote: > On Wed, Oct 16, 2013 at 05:58:31PM +0100, Chris Wilson wrote: > > On Wed, Oct 16, 2013 at 09:21:30AM -0700, Ben Widawsky wrote: > > > Once the machine gets to a certain point in the suspend process, we > > > expect the GPU to be idle. If it is not, we might corrupt memory. > > > Empirically (with an early version of this patch) we have seen this is > > > not the case. We cannot currently explain why the latent GPU writes > > > occur. > > > > > > In the technical sense, this patch is a workaround in that we have an > > > issue we can't explain, and the patch indirectly solves the issue. > > > However, it's really better than a workaround because we understand why > > > it works, and it really should be a safe thing to do in all cases. > > > > > > The noticeable effect other than the debug messages would be an increase > > > in the suspend time. I have not measure how expensive it actually is. > > > > > > I think it would be good to spend further time to root cause why we're > > > seeing these latent writes, but it shouldn't preclude preventing the > > > fallout. > > > > > > NOTE: It should be safe (and makes some sense IMO) to also keep the > > > VALID bit unset on resume when we clear_range(). I've opted not to do > > > this as properly clearing those bits at some later point would be extra > > > work. > > > > > > v2: Fix bugzilla link > > > > And the other one? > > > > I'm really amazing. If we move ahead with this patch, Daniel, can you just erase > the extra bugs.freedesktop.org/6549:// > > > > Bugzilla: http://bugs.freedesktop.org/6549://bugs.freedesktop.org/show_bug.cgi?id=65496 > > Bugzilla: http://bugs.freedesktop.org/show_bug.cgi?id=65496 Fixed and merged with cc: stable. -Daniel > > > > Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=59321 > > > Tested-by: Takashi Iwai <tiwai@suse.de> > > > Tested-by: Paulo Zanoni <paulo.r.zanoni@intel.com> > > > Signed-off-by: Ben Widawsky <ben@bwidawsk.net> > > > > So clearing the valid bit should result in the GPU reporting errors for > > delayed accesses, but none were reported? > > -Chris > > > > So I can't actually reproduce the problem for some reason. Paulo will > need to answer. One theory is the fault information is lost on suspend. > > The original patch put faults both in suspend, and resume. After this, I > asked Paulo to wedge the GPU, and there I saw faults. > > -- > Ben Widawsky, Intel Open Source Technology Center > _______________________________________________ > Intel-gfx mailing list > Intel-gfx@lists.freedesktop.org > http://lists.freedesktop.org/mailman/listinfo/intel-gfx -- Daniel Vetter Software Engineer, Intel Corporation +41 (0) 79 365 57 48 - http://blog.ffwll.ch ^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [PATCH 2/2] [v2] drm/i915: Disable GGTT PTEs on GEN6+ suspend 2013-10-16 16:21 ` [PATCH 2/2] [v2] " Ben Widawsky 2013-10-16 16:58 ` Chris Wilson @ 2013-10-18 0:20 ` Todd Previte 1 sibling, 0 replies; 12+ messages in thread From: Todd Previte @ 2013-10-18 0:20 UTC (permalink / raw) To: intel-gfx On 10/16/13 9:21 AM, Ben Widawsky wrote: > Once the machine gets to a certain point in the suspend process, we > expect the GPU to be idle. If it is not, we might corrupt memory. > Empirically (with an early version of this patch) we have seen this is > not the case. We cannot currently explain why the latent GPU writes > occur. > > In the technical sense, this patch is a workaround in that we have an > issue we can't explain, and the patch indirectly solves the issue. > However, it's really better than a workaround because we understand why > it works, and it really should be a safe thing to do in all cases. > > The noticeable effect other than the debug messages would be an increase > in the suspend time. I have not measure how expensive it actually is. > > I think it would be good to spend further time to root cause why we're > seeing these latent writes, but it shouldn't preclude preventing the > fallout. > > NOTE: It should be safe (and makes some sense IMO) to also keep the > VALID bit unset on resume when we clear_range(). I've opted not to do > this as properly clearing those bits at some later point would be extra > work. > > v2: Fix bugzilla link > > Bugzilla: http://bugs.freedesktop.org/6549://bugs.freedesktop.org/show_bug.cgi?id=65496 > Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=59321 > Tested-by: Takashi Iwai <tiwai@suse.de> > Tested-by: Paulo Zanoni <paulo.r.zanoni@intel.com> > Signed-off-by: Ben Widawsky <ben@bwidawsk.net> Tested-By: Todd Previte <tprevite@gmail.com> > --- > drivers/gpu/drm/i915/i915_drv.c | 5 ++- > drivers/gpu/drm/i915/i915_drv.h | 5 ++- > drivers/gpu/drm/i915/i915_gem_gtt.c | 76 ++++++++++++++++++++++++++++++++----- > drivers/gpu/drm/i915/i915_reg.h | 4 ++ > 4 files changed, 78 insertions(+), 12 deletions(-) > > diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c > index 59649c0..ea90c5f 100644 > --- a/drivers/gpu/drm/i915/i915_drv.c > +++ b/drivers/gpu/drm/i915/i915_drv.c > @@ -502,6 +502,8 @@ static int i915_drm_freeze(struct drm_device *dev) > intel_modeset_suspend_hw(dev); > } > > + i915_gem_suspend_gtt_mappings(dev); > + > i915_save_state(dev); > > intel_opregion_fini(dev); > @@ -587,7 +589,8 @@ static int __i915_drm_thaw(struct drm_device *dev, bool restore_gtt_mappings) > mutex_lock(&dev->struct_mutex); > i915_gem_restore_gtt_mappings(dev); > mutex_unlock(&dev->struct_mutex); > - } > + } else if (drm_core_check_feature(dev, DRIVER_MODESET)) > + i915_check_and_clear_faults(dev); > > intel_init_power_well(dev); > > diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h > index 0cbeb0e..7ca99350 100644 > --- a/drivers/gpu/drm/i915/i915_drv.h > +++ b/drivers/gpu/drm/i915/i915_drv.h > @@ -521,7 +521,8 @@ struct i915_address_space { > bool valid); /* Create a valid PTE */ > void (*clear_range)(struct i915_address_space *vm, > unsigned int first_entry, > - unsigned int num_entries); > + unsigned int num_entries, > + bool use_scratch); > void (*insert_entries)(struct i915_address_space *vm, > struct sg_table *st, > unsigned int first_entry, > @@ -2143,6 +2144,8 @@ void i915_ppgtt_bind_object(struct i915_hw_ppgtt *ppgtt, > void i915_ppgtt_unbind_object(struct i915_hw_ppgtt *ppgtt, > struct drm_i915_gem_object *obj); > > +void i915_check_and_clear_faults(struct drm_device *dev); > +void i915_gem_suspend_gtt_mappings(struct drm_device *dev); > void i915_gem_restore_gtt_mappings(struct drm_device *dev); > int __must_check i915_gem_gtt_prepare_object(struct drm_i915_gem_object *obj); > void i915_gem_gtt_bind_object(struct drm_i915_gem_object *obj, > diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c > index 81dce29..c4c42e7 100644 > --- a/drivers/gpu/drm/i915/i915_gem_gtt.c > +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c > @@ -241,7 +241,8 @@ static int gen6_ppgtt_enable(struct drm_device *dev) > /* PPGTT support for Sandybdrige/Gen6 and later */ > static void gen6_ppgtt_clear_range(struct i915_address_space *vm, > unsigned first_entry, > - unsigned num_entries) > + unsigned num_entries, > + bool use_scratch) > { > struct i915_hw_ppgtt *ppgtt = > container_of(vm, struct i915_hw_ppgtt, base); > @@ -372,7 +373,7 @@ static int gen6_ppgtt_init(struct i915_hw_ppgtt *ppgtt) > } > > ppgtt->base.clear_range(&ppgtt->base, 0, > - ppgtt->num_pd_entries * I915_PPGTT_PT_ENTRIES); > + ppgtt->num_pd_entries * I915_PPGTT_PT_ENTRIES, true); > > ppgtt->pd_offset = first_pd_entry_in_global_pt * sizeof(gen6_gtt_pte_t); > > @@ -449,7 +450,8 @@ void i915_ppgtt_unbind_object(struct i915_hw_ppgtt *ppgtt, > { > ppgtt->base.clear_range(&ppgtt->base, > i915_gem_obj_ggtt_offset(obj) >> PAGE_SHIFT, > - obj->base.size >> PAGE_SHIFT); > + obj->base.size >> PAGE_SHIFT, > + true); > } > > extern int intel_iommu_gfx_mapped; > @@ -490,15 +492,65 @@ static void undo_idling(struct drm_i915_private *dev_priv, bool interruptible) > dev_priv->mm.interruptible = interruptible; > } > > +void i915_check_and_clear_faults(struct drm_device *dev) > +{ > + struct drm_i915_private *dev_priv = dev->dev_private; > + struct intel_ring_buffer *ring; > + int i; > + > + if (INTEL_INFO(dev)->gen < 6) > + return; > + > + for_each_ring(ring, dev_priv, i) { > + u32 fault_reg; > + fault_reg = I915_READ(RING_FAULT_REG(ring)); > + if (fault_reg & RING_FAULT_VALID) { > + DRM_DEBUG_DRIVER("Unexpected fault\n" > + "\tAddr: 0x%08lx\\n" > + "\tAddress space: %s\n" > + "\tSource ID: %d\n" > + "\tType: %d\n", > + fault_reg & PAGE_MASK, > + fault_reg & RING_FAULT_GTTSEL_MASK ? "GGTT" : "PPGTT", > + RING_FAULT_SRCID(fault_reg), > + RING_FAULT_FAULT_TYPE(fault_reg)); > + I915_WRITE(RING_FAULT_REG(ring), > + fault_reg & ~RING_FAULT_VALID); > + } > + } > + POSTING_READ(RING_FAULT_REG(&dev_priv->ring[RCS])); > +} > + > +void i915_gem_suspend_gtt_mappings(struct drm_device *dev) > +{ > + struct drm_i915_private *dev_priv = dev->dev_private; > + > + /* Don't bother messing with faults pre GEN6 as we have little > + * documentation supporting that it's a good idea. > + */ > + if (INTEL_INFO(dev)->gen < 6) > + return; > + > + i915_check_and_clear_faults(dev); > + > + dev_priv->gtt.base.clear_range(&dev_priv->gtt.base, > + dev_priv->gtt.base.start / PAGE_SIZE, > + dev_priv->gtt.base.total / PAGE_SIZE, > + false); > +} > + > void i915_gem_restore_gtt_mappings(struct drm_device *dev) > { > struct drm_i915_private *dev_priv = dev->dev_private; > struct drm_i915_gem_object *obj; > > + i915_check_and_clear_faults(dev); > + > /* First fill our portion of the GTT with scratch pages */ > dev_priv->gtt.base.clear_range(&dev_priv->gtt.base, > dev_priv->gtt.base.start / PAGE_SIZE, > - dev_priv->gtt.base.total / PAGE_SIZE); > + dev_priv->gtt.base.total / PAGE_SIZE, > + true); > > list_for_each_entry(obj, &dev_priv->mm.bound_list, global_list) { > i915_gem_clflush_object(obj, obj->pin_display); > @@ -565,7 +617,8 @@ static void gen6_ggtt_insert_entries(struct i915_address_space *vm, > > static void gen6_ggtt_clear_range(struct i915_address_space *vm, > unsigned int first_entry, > - unsigned int num_entries) > + unsigned int num_entries, > + bool use_scratch) > { > struct drm_i915_private *dev_priv = vm->dev->dev_private; > gen6_gtt_pte_t scratch_pte, __iomem *gtt_base = > @@ -578,7 +631,8 @@ static void gen6_ggtt_clear_range(struct i915_address_space *vm, > first_entry, num_entries, max_entries)) > num_entries = max_entries; > > - scratch_pte = vm->pte_encode(vm->scratch.addr, I915_CACHE_LLC, true); > + scratch_pte = vm->pte_encode(vm->scratch.addr, I915_CACHE_LLC, use_scratch); > + > for (i = 0; i < num_entries; i++) > iowrite32(scratch_pte, >t_base[i]); > readl(gtt_base); > @@ -599,7 +653,8 @@ static void i915_ggtt_insert_entries(struct i915_address_space *vm, > > static void i915_ggtt_clear_range(struct i915_address_space *vm, > unsigned int first_entry, > - unsigned int num_entries) > + unsigned int num_entries, > + bool unused) > { > intel_gtt_clear_range(first_entry, num_entries); > } > @@ -627,7 +682,8 @@ void i915_gem_gtt_unbind_object(struct drm_i915_gem_object *obj) > > dev_priv->gtt.base.clear_range(&dev_priv->gtt.base, > entry, > - obj->base.size >> PAGE_SHIFT); > + obj->base.size >> PAGE_SHIFT, > + true); > > obj->has_global_gtt_mapping = 0; > } > @@ -714,11 +770,11 @@ void i915_gem_setup_global_gtt(struct drm_device *dev, > const unsigned long count = (hole_end - hole_start) / PAGE_SIZE; > DRM_DEBUG_KMS("clearing unused GTT space: [%lx, %lx]\n", > hole_start, hole_end); > - ggtt_vm->clear_range(ggtt_vm, hole_start / PAGE_SIZE, count); > + ggtt_vm->clear_range(ggtt_vm, hole_start / PAGE_SIZE, count, true); > } > > /* And finally clear the reserved guard page */ > - ggtt_vm->clear_range(ggtt_vm, end / PAGE_SIZE - 1, 1); > + ggtt_vm->clear_range(ggtt_vm, end / PAGE_SIZE - 1, 1, true); > } > > static bool > diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h > index 13153c3..f98f584 100644 > --- a/drivers/gpu/drm/i915/i915_reg.h > +++ b/drivers/gpu/drm/i915/i915_reg.h > @@ -656,6 +656,10 @@ > #define ARB_MODE_SWIZZLE_IVB (1<<5) > #define RENDER_HWS_PGA_GEN7 (0x04080) > #define RING_FAULT_REG(ring) (0x4094 + 0x100*(ring)->id) > +#define RING_FAULT_GTTSEL_MASK (1<<11) > +#define RING_FAULT_SRCID(x) ((x >> 3) & 0xff) > +#define RING_FAULT_FAULT_TYPE(x) ((x >> 1) & 0x3) > +#define RING_FAULT_VALID (1<<0) > #define DONE_REG 0x40b0 > #define BSD_HWS_PGA_GEN7 (0x04180) > #define BLT_HWS_PGA_GEN7 (0x04280) ^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [PATCH 1/2] drm/i915: Make PTE valid encoding optional 2013-10-16 16:18 [PATCH 1/2] drm/i915: Make PTE valid encoding optional Ben Widawsky 2013-10-16 16:18 ` [PATCH 2/2] drm/i915: Disable GGTT PTEs on GEN6+ suspend Ben Widawsky @ 2013-10-18 13:40 ` Daniel Vetter 1 sibling, 0 replies; 12+ messages in thread From: Daniel Vetter @ 2013-10-18 13:40 UTC (permalink / raw) To: Ben Widawsky; +Cc: Intel GFX, Ben Widawsky On Wed, Oct 16, 2013 at 09:18:21AM -0700, Ben Widawsky wrote: Empty commit messages aren't cool, especially for a fix this late in the -rc cycle. Patches simply need to be justified. I've filled something in when merging. > Signed-off-by: Ben Widawsky <ben@bwidawsk.net> > --- > drivers/gpu/drm/i915/i915_drv.h | 3 ++- > drivers/gpu/drm/i915/i915_gem_gtt.c | 35 ++++++++++++++++++++--------------- > 2 files changed, 22 insertions(+), 16 deletions(-) > > diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h > index 6106d3d..0cbeb0e 100644 > --- a/drivers/gpu/drm/i915/i915_drv.h > +++ b/drivers/gpu/drm/i915/i915_drv.h > @@ -517,7 +517,8 @@ struct i915_address_space { > > /* FIXME: Need a more generic return type */ > gen6_gtt_pte_t (*pte_encode)(dma_addr_t addr, > - enum i915_cache_level level); > + enum i915_cache_level level, > + bool valid); /* Create a valid PTE */ > void (*clear_range)(struct i915_address_space *vm, > unsigned int first_entry, > unsigned int num_entries); > diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c > index e999496..81dce29 100644 > --- a/drivers/gpu/drm/i915/i915_gem_gtt.c > +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c > @@ -58,9 +58,10 @@ > #define HSW_WT_ELLC_LLC_AGE0 HSW_CACHEABILITY_CONTROL(0x6) > > static gen6_gtt_pte_t snb_pte_encode(dma_addr_t addr, > - enum i915_cache_level level) > + enum i915_cache_level level, > + bool valid) > { > - gen6_gtt_pte_t pte = GEN6_PTE_VALID; > + gen6_gtt_pte_t pte = valid ? GEN6_PTE_VALID : 0; > pte |= GEN6_PTE_ADDR_ENCODE(addr); > > switch (level) { > @@ -79,9 +80,10 @@ static gen6_gtt_pte_t snb_pte_encode(dma_addr_t addr, > } > > static gen6_gtt_pte_t ivb_pte_encode(dma_addr_t addr, > - enum i915_cache_level level) > + enum i915_cache_level level, > + bool valid) > { > - gen6_gtt_pte_t pte = GEN6_PTE_VALID; > + gen6_gtt_pte_t pte = valid ? GEN6_PTE_VALID : 0; > pte |= GEN6_PTE_ADDR_ENCODE(addr); > > switch (level) { > @@ -105,9 +107,10 @@ static gen6_gtt_pte_t ivb_pte_encode(dma_addr_t addr, > #define BYT_PTE_SNOOPED_BY_CPU_CACHES (1 << 2) > > static gen6_gtt_pte_t byt_pte_encode(dma_addr_t addr, > - enum i915_cache_level level) > + enum i915_cache_level level, > + bool valid) > { > - gen6_gtt_pte_t pte = GEN6_PTE_VALID; > + gen6_gtt_pte_t pte = valid ? GEN6_PTE_VALID : 0; > pte |= GEN6_PTE_ADDR_ENCODE(addr); > > /* Mark the page as writeable. Other platforms don't have a > @@ -122,9 +125,10 @@ static gen6_gtt_pte_t byt_pte_encode(dma_addr_t addr, > } > > static gen6_gtt_pte_t hsw_pte_encode(dma_addr_t addr, > - enum i915_cache_level level) > + enum i915_cache_level level, > + bool valid) > { > - gen6_gtt_pte_t pte = GEN6_PTE_VALID; > + gen6_gtt_pte_t pte = valid ? GEN6_PTE_VALID : 0; > pte |= HSW_PTE_ADDR_ENCODE(addr); > > if (level != I915_CACHE_NONE) > @@ -134,9 +138,10 @@ static gen6_gtt_pte_t hsw_pte_encode(dma_addr_t addr, > } > > static gen6_gtt_pte_t iris_pte_encode(dma_addr_t addr, > - enum i915_cache_level level) > + enum i915_cache_level level, > + bool valid) > { > - gen6_gtt_pte_t pte = GEN6_PTE_VALID; > + gen6_gtt_pte_t pte = valid ? GEN6_PTE_VALID : 0; > pte |= HSW_PTE_ADDR_ENCODE(addr); > > switch (level) { > @@ -245,7 +250,7 @@ static void gen6_ppgtt_clear_range(struct i915_address_space *vm, > unsigned first_pte = first_entry % I915_PPGTT_PT_ENTRIES; > unsigned last_pte, i; > > - scratch_pte = vm->pte_encode(vm->scratch.addr, I915_CACHE_LLC); > + scratch_pte = vm->pte_encode(vm->scratch.addr, I915_CACHE_LLC, true); > > while (num_entries) { > last_pte = first_pte + num_entries; > @@ -282,7 +287,7 @@ static void gen6_ppgtt_insert_entries(struct i915_address_space *vm, > dma_addr_t page_addr; > > page_addr = sg_page_iter_dma_address(&sg_iter); > - pt_vaddr[act_pte] = vm->pte_encode(page_addr, cache_level); > + pt_vaddr[act_pte] = vm->pte_encode(page_addr, cache_level, true); > if (++act_pte == I915_PPGTT_PT_ENTRIES) { > kunmap_atomic(pt_vaddr); > act_pt++; > @@ -536,7 +541,7 @@ static void gen6_ggtt_insert_entries(struct i915_address_space *vm, > > for_each_sg_page(st->sgl, &sg_iter, st->nents, 0) { > addr = sg_page_iter_dma_address(&sg_iter); > - iowrite32(vm->pte_encode(addr, level), >t_entries[i]); > + iowrite32(vm->pte_encode(addr, level, true), >t_entries[i]); > i++; > } > > @@ -548,7 +553,7 @@ static void gen6_ggtt_insert_entries(struct i915_address_space *vm, > */ > if (i != 0) > WARN_ON(readl(>t_entries[i-1]) != > - vm->pte_encode(addr, level)); > + vm->pte_encode(addr, level, true)); > > /* This next bit makes the above posting read even more important. We > * want to flush the TLBs only after we're certain all the PTE updates > @@ -573,7 +578,7 @@ static void gen6_ggtt_clear_range(struct i915_address_space *vm, > first_entry, num_entries, max_entries)) > num_entries = max_entries; > > - scratch_pte = vm->pte_encode(vm->scratch.addr, I915_CACHE_LLC); > + scratch_pte = vm->pte_encode(vm->scratch.addr, I915_CACHE_LLC, true); > for (i = 0; i < num_entries; i++) > iowrite32(scratch_pte, >t_base[i]); > readl(gtt_base); > -- > 1.8.4 > > _______________________________________________ > Intel-gfx mailing list > Intel-gfx@lists.freedesktop.org > http://lists.freedesktop.org/mailman/listinfo/intel-gfx -- Daniel Vetter Software Engineer, Intel Corporation +41 (0) 79 365 57 48 - http://blog.ffwll.ch ^ permalink raw reply [flat|nested] 12+ messages in thread
end of thread, other threads:[~2013-10-18 13:45 UTC | newest] Thread overview: 12+ messages (download: mbox.gz follow: Atom feed -- links below jump to the message on this page -- 2013-10-16 16:18 [PATCH 1/2] drm/i915: Make PTE valid encoding optional Ben Widawsky 2013-10-16 16:18 ` [PATCH 2/2] drm/i915: Disable GGTT PTEs on GEN6+ suspend Ben Widawsky 2013-10-16 16:21 ` [PATCH 2/2] [v2] " Ben Widawsky 2013-10-16 16:58 ` Chris Wilson 2013-10-16 17:06 ` Ben Widawsky 2013-10-16 17:27 ` Chris Wilson 2013-10-17 7:41 ` Takashi Iwai 2013-10-17 9:24 ` Chris Wilson 2013-10-17 10:06 ` Takashi Iwai 2013-10-18 13:45 ` Daniel Vetter 2013-10-18 0:20 ` Todd Previte 2013-10-18 13:40 ` [PATCH 1/2] drm/i915: Make PTE valid encoding optional Daniel Vetter
This is a public inbox, see mirroring instructions for how to clone and mirror all data and code used for this inbox