From mboxrd@z Thu Jan 1 00:00:00 1970 From: Todd Previte Subject: Re: [PATCH 2/2] [v2] drm/i915: Disable GGTT PTEs on GEN6+ suspend Date: Thu, 17 Oct 2013 17:20:33 -0700 Message-ID: <52607ED1.8010904@gmail.com> References: <1381940302-1920-2-git-send-email-benjamin.widawsky@intel.com> <1381940490-2082-1-git-send-email-benjamin.widawsky@intel.com> Mime-Version: 1.0 Content-Type: text/plain; charset="us-ascii"; Format="flowed" Content-Transfer-Encoding: 7bit Return-path: Received: from mail-pd0-f175.google.com (mail-pd0-f175.google.com [209.85.192.175]) by gabe.freedesktop.org (Postfix) with ESMTP id D6C13E7103 for ; Thu, 17 Oct 2013 17:20:36 -0700 (PDT) Received: by mail-pd0-f175.google.com with SMTP id g10so2895408pdj.20 for ; Thu, 17 Oct 2013 17:20:36 -0700 (PDT) In-Reply-To: <1381940490-2082-1-git-send-email-benjamin.widawsky@intel.com> List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Sender: intel-gfx-bounces+gcfxdi-intel-gfx=m.gmane.org@lists.freedesktop.org Errors-To: intel-gfx-bounces+gcfxdi-intel-gfx=m.gmane.org@lists.freedesktop.org To: intel-gfx@lists.freedesktop.org List-Id: intel-gfx@lists.freedesktop.org On 10/16/13 9:21 AM, Ben Widawsky wrote: > Once the machine gets to a certain point in the suspend process, we > expect the GPU to be idle. If it is not, we might corrupt memory. > Empirically (with an early version of this patch) we have seen this is > not the case. We cannot currently explain why the latent GPU writes > occur. > > In the technical sense, this patch is a workaround in that we have an > issue we can't explain, and the patch indirectly solves the issue. > However, it's really better than a workaround because we understand why > it works, and it really should be a safe thing to do in all cases. > > The noticeable effect other than the debug messages would be an increase > in the suspend time. I have not measure how expensive it actually is. > > I think it would be good to spend further time to root cause why we're > seeing these latent writes, but it shouldn't preclude preventing the > fallout. > > NOTE: It should be safe (and makes some sense IMO) to also keep the > VALID bit unset on resume when we clear_range(). I've opted not to do > this as properly clearing those bits at some later point would be extra > work. > > v2: Fix bugzilla link > > Bugzilla: http://bugs.freedesktop.org/6549://bugs.freedesktop.org/show_bug.cgi?id=65496 > Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=59321 > Tested-by: Takashi Iwai > Tested-by: Paulo Zanoni > Signed-off-by: Ben Widawsky Tested-By: Todd Previte > --- > drivers/gpu/drm/i915/i915_drv.c | 5 ++- > drivers/gpu/drm/i915/i915_drv.h | 5 ++- > drivers/gpu/drm/i915/i915_gem_gtt.c | 76 ++++++++++++++++++++++++++++++++----- > drivers/gpu/drm/i915/i915_reg.h | 4 ++ > 4 files changed, 78 insertions(+), 12 deletions(-) > > diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c > index 59649c0..ea90c5f 100644 > --- a/drivers/gpu/drm/i915/i915_drv.c > +++ b/drivers/gpu/drm/i915/i915_drv.c > @@ -502,6 +502,8 @@ static int i915_drm_freeze(struct drm_device *dev) > intel_modeset_suspend_hw(dev); > } > > + i915_gem_suspend_gtt_mappings(dev); > + > i915_save_state(dev); > > intel_opregion_fini(dev); > @@ -587,7 +589,8 @@ static int __i915_drm_thaw(struct drm_device *dev, bool restore_gtt_mappings) > mutex_lock(&dev->struct_mutex); > i915_gem_restore_gtt_mappings(dev); > mutex_unlock(&dev->struct_mutex); > - } > + } else if (drm_core_check_feature(dev, DRIVER_MODESET)) > + i915_check_and_clear_faults(dev); > > intel_init_power_well(dev); > > diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h > index 0cbeb0e..7ca99350 100644 > --- a/drivers/gpu/drm/i915/i915_drv.h > +++ b/drivers/gpu/drm/i915/i915_drv.h > @@ -521,7 +521,8 @@ struct i915_address_space { > bool valid); /* Create a valid PTE */ > void (*clear_range)(struct i915_address_space *vm, > unsigned int first_entry, > - unsigned int num_entries); > + unsigned int num_entries, > + bool use_scratch); > void (*insert_entries)(struct i915_address_space *vm, > struct sg_table *st, > unsigned int first_entry, > @@ -2143,6 +2144,8 @@ void i915_ppgtt_bind_object(struct i915_hw_ppgtt *ppgtt, > void i915_ppgtt_unbind_object(struct i915_hw_ppgtt *ppgtt, > struct drm_i915_gem_object *obj); > > +void i915_check_and_clear_faults(struct drm_device *dev); > +void i915_gem_suspend_gtt_mappings(struct drm_device *dev); > void i915_gem_restore_gtt_mappings(struct drm_device *dev); > int __must_check i915_gem_gtt_prepare_object(struct drm_i915_gem_object *obj); > void i915_gem_gtt_bind_object(struct drm_i915_gem_object *obj, > diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c > index 81dce29..c4c42e7 100644 > --- a/drivers/gpu/drm/i915/i915_gem_gtt.c > +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c > @@ -241,7 +241,8 @@ static int gen6_ppgtt_enable(struct drm_device *dev) > /* PPGTT support for Sandybdrige/Gen6 and later */ > static void gen6_ppgtt_clear_range(struct i915_address_space *vm, > unsigned first_entry, > - unsigned num_entries) > + unsigned num_entries, > + bool use_scratch) > { > struct i915_hw_ppgtt *ppgtt = > container_of(vm, struct i915_hw_ppgtt, base); > @@ -372,7 +373,7 @@ static int gen6_ppgtt_init(struct i915_hw_ppgtt *ppgtt) > } > > ppgtt->base.clear_range(&ppgtt->base, 0, > - ppgtt->num_pd_entries * I915_PPGTT_PT_ENTRIES); > + ppgtt->num_pd_entries * I915_PPGTT_PT_ENTRIES, true); > > ppgtt->pd_offset = first_pd_entry_in_global_pt * sizeof(gen6_gtt_pte_t); > > @@ -449,7 +450,8 @@ void i915_ppgtt_unbind_object(struct i915_hw_ppgtt *ppgtt, > { > ppgtt->base.clear_range(&ppgtt->base, > i915_gem_obj_ggtt_offset(obj) >> PAGE_SHIFT, > - obj->base.size >> PAGE_SHIFT); > + obj->base.size >> PAGE_SHIFT, > + true); > } > > extern int intel_iommu_gfx_mapped; > @@ -490,15 +492,65 @@ static void undo_idling(struct drm_i915_private *dev_priv, bool interruptible) > dev_priv->mm.interruptible = interruptible; > } > > +void i915_check_and_clear_faults(struct drm_device *dev) > +{ > + struct drm_i915_private *dev_priv = dev->dev_private; > + struct intel_ring_buffer *ring; > + int i; > + > + if (INTEL_INFO(dev)->gen < 6) > + return; > + > + for_each_ring(ring, dev_priv, i) { > + u32 fault_reg; > + fault_reg = I915_READ(RING_FAULT_REG(ring)); > + if (fault_reg & RING_FAULT_VALID) { > + DRM_DEBUG_DRIVER("Unexpected fault\n" > + "\tAddr: 0x%08lx\\n" > + "\tAddress space: %s\n" > + "\tSource ID: %d\n" > + "\tType: %d\n", > + fault_reg & PAGE_MASK, > + fault_reg & RING_FAULT_GTTSEL_MASK ? "GGTT" : "PPGTT", > + RING_FAULT_SRCID(fault_reg), > + RING_FAULT_FAULT_TYPE(fault_reg)); > + I915_WRITE(RING_FAULT_REG(ring), > + fault_reg & ~RING_FAULT_VALID); > + } > + } > + POSTING_READ(RING_FAULT_REG(&dev_priv->ring[RCS])); > +} > + > +void i915_gem_suspend_gtt_mappings(struct drm_device *dev) > +{ > + struct drm_i915_private *dev_priv = dev->dev_private; > + > + /* Don't bother messing with faults pre GEN6 as we have little > + * documentation supporting that it's a good idea. > + */ > + if (INTEL_INFO(dev)->gen < 6) > + return; > + > + i915_check_and_clear_faults(dev); > + > + dev_priv->gtt.base.clear_range(&dev_priv->gtt.base, > + dev_priv->gtt.base.start / PAGE_SIZE, > + dev_priv->gtt.base.total / PAGE_SIZE, > + false); > +} > + > void i915_gem_restore_gtt_mappings(struct drm_device *dev) > { > struct drm_i915_private *dev_priv = dev->dev_private; > struct drm_i915_gem_object *obj; > > + i915_check_and_clear_faults(dev); > + > /* First fill our portion of the GTT with scratch pages */ > dev_priv->gtt.base.clear_range(&dev_priv->gtt.base, > dev_priv->gtt.base.start / PAGE_SIZE, > - dev_priv->gtt.base.total / PAGE_SIZE); > + dev_priv->gtt.base.total / PAGE_SIZE, > + true); > > list_for_each_entry(obj, &dev_priv->mm.bound_list, global_list) { > i915_gem_clflush_object(obj, obj->pin_display); > @@ -565,7 +617,8 @@ static void gen6_ggtt_insert_entries(struct i915_address_space *vm, > > static void gen6_ggtt_clear_range(struct i915_address_space *vm, > unsigned int first_entry, > - unsigned int num_entries) > + unsigned int num_entries, > + bool use_scratch) > { > struct drm_i915_private *dev_priv = vm->dev->dev_private; > gen6_gtt_pte_t scratch_pte, __iomem *gtt_base = > @@ -578,7 +631,8 @@ static void gen6_ggtt_clear_range(struct i915_address_space *vm, > first_entry, num_entries, max_entries)) > num_entries = max_entries; > > - scratch_pte = vm->pte_encode(vm->scratch.addr, I915_CACHE_LLC, true); > + scratch_pte = vm->pte_encode(vm->scratch.addr, I915_CACHE_LLC, use_scratch); > + > for (i = 0; i < num_entries; i++) > iowrite32(scratch_pte, >t_base[i]); > readl(gtt_base); > @@ -599,7 +653,8 @@ static void i915_ggtt_insert_entries(struct i915_address_space *vm, > > static void i915_ggtt_clear_range(struct i915_address_space *vm, > unsigned int first_entry, > - unsigned int num_entries) > + unsigned int num_entries, > + bool unused) > { > intel_gtt_clear_range(first_entry, num_entries); > } > @@ -627,7 +682,8 @@ void i915_gem_gtt_unbind_object(struct drm_i915_gem_object *obj) > > dev_priv->gtt.base.clear_range(&dev_priv->gtt.base, > entry, > - obj->base.size >> PAGE_SHIFT); > + obj->base.size >> PAGE_SHIFT, > + true); > > obj->has_global_gtt_mapping = 0; > } > @@ -714,11 +770,11 @@ void i915_gem_setup_global_gtt(struct drm_device *dev, > const unsigned long count = (hole_end - hole_start) / PAGE_SIZE; > DRM_DEBUG_KMS("clearing unused GTT space: [%lx, %lx]\n", > hole_start, hole_end); > - ggtt_vm->clear_range(ggtt_vm, hole_start / PAGE_SIZE, count); > + ggtt_vm->clear_range(ggtt_vm, hole_start / PAGE_SIZE, count, true); > } > > /* And finally clear the reserved guard page */ > - ggtt_vm->clear_range(ggtt_vm, end / PAGE_SIZE - 1, 1); > + ggtt_vm->clear_range(ggtt_vm, end / PAGE_SIZE - 1, 1, true); > } > > static bool > diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h > index 13153c3..f98f584 100644 > --- a/drivers/gpu/drm/i915/i915_reg.h > +++ b/drivers/gpu/drm/i915/i915_reg.h > @@ -656,6 +656,10 @@ > #define ARB_MODE_SWIZZLE_IVB (1<<5) > #define RENDER_HWS_PGA_GEN7 (0x04080) > #define RING_FAULT_REG(ring) (0x4094 + 0x100*(ring)->id) > +#define RING_FAULT_GTTSEL_MASK (1<<11) > +#define RING_FAULT_SRCID(x) ((x >> 3) & 0xff) > +#define RING_FAULT_FAULT_TYPE(x) ((x >> 1) & 0x3) > +#define RING_FAULT_VALID (1<<0) > #define DONE_REG 0x40b0 > #define BSD_HWS_PGA_GEN7 (0x04180) > #define BLT_HWS_PGA_GEN7 (0x04280)