All of lore.kernel.org
 help / color / mirror / Atom feed
From: Yu Dai <yu.dai@intel.com>
To: Arun Siluvery <arun.siluvery@linux.intel.com>,
	intel-gfx@lists.freedesktop.org
Cc: Mika Kuoppala <mika.kuoppala@intel.com>
Subject: Re: [PATCH v2 2/2] drm/i915/guc: Reset GuC and retry on firmware load failure
Date: Thu, 10 Mar 2016 09:57:46 -0800	[thread overview]
Message-ID: <56E1B59A.7050208@intel.com> (raw)
In-Reply-To: <1457437098-21689-3-git-send-email-arun.siluvery@linux.intel.com>

LGTM. Reviewed-by: Alex Dai <yu.dai@intel.com>

On 03/08/2016 03:38 AM, Arun Siluvery wrote:
> Due to timing issues in the HW some of the status bits required for GuC
> authentication doesn't get set occassionally, when that happens, GuC cannot
> be initialized and we will be left with a wedged GPU. The WA suggested is
> to perform a soft reset of GuC and attempt to reload the fw again for few
> times before giving up.
>
> As the failure is dependent on timing, tests performed by triggering manual
> full gpu reset (i915_wedged) showed that we could sometimes hit this after
> several thousand iterations but sometimes tests ran even longer without any
> issues. Reset and reload mechanism proved helpful when we indeed hit fw
> load failure so it is better to include this to improve driver stability.
>
> This change implements the following WA,
>
> WaEnableuKernelHeaderValidFix:skl,bxt
> WaEnableGuCBootHashCheckNotSet:skl,bxt
>
> Cc: Dave Gordon <david.s.gordon@intel.com>
> Cc: Alex Dai <yu.dai@intel.com>
> Signed-off-by: Arun Siluvery <arun.siluvery@linux.intel.com>
> ---
>   drivers/gpu/drm/i915/i915_drv.h         |  1 +
>   drivers/gpu/drm/i915/i915_guc_reg.h     |  1 +
>   drivers/gpu/drm/i915/i915_reg.h         |  1 +
>   drivers/gpu/drm/i915/intel_guc_loader.c | 49 +++++++++++++++++++++++++++++++--
>   drivers/gpu/drm/i915/intel_uncore.c     | 19 +++++++++++++
>   5 files changed, 69 insertions(+), 2 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index f37ac12..0df7c82 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -2757,6 +2757,7 @@ extern long i915_compat_ioctl(struct file *filp, unsigned int cmd,
>   extern int intel_gpu_reset(struct drm_device *dev);
>   extern bool intel_has_gpu_reset(struct drm_device *dev);
>   extern int i915_reset(struct drm_device *dev);
> +extern int intel_guc_reset(struct drm_i915_private *dev_priv);
>   extern unsigned long i915_chipset_val(struct drm_i915_private *dev_priv);
>   extern unsigned long i915_mch_val(struct drm_i915_private *dev_priv);
>   extern unsigned long i915_gfx_val(struct drm_i915_private *dev_priv);
> diff --git a/drivers/gpu/drm/i915/i915_guc_reg.h b/drivers/gpu/drm/i915/i915_guc_reg.h
> index e4ba582..94ceee5 100644
> --- a/drivers/gpu/drm/i915/i915_guc_reg.h
> +++ b/drivers/gpu/drm/i915/i915_guc_reg.h
> @@ -27,6 +27,7 @@
>   /* Definitions of GuC H/W registers, bits, etc */
>   
>   #define GUC_STATUS			_MMIO(0xc000)
> +#define   GS_MIA_IN_RESET		(1 << 0)
>   #define   GS_BOOTROM_SHIFT		1
>   #define   GS_BOOTROM_MASK		  (0x7F << GS_BOOTROM_SHIFT)
>   #define   GS_BOOTROM_RSA_FAILED		  (0x50 << GS_BOOTROM_SHIFT)
> diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
> index 7dfc400..48a23de 100644
> --- a/drivers/gpu/drm/i915/i915_reg.h
> +++ b/drivers/gpu/drm/i915/i915_reg.h
> @@ -164,6 +164,7 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg)
>   #define  GEN6_GRDOM_RENDER		(1 << 1)
>   #define  GEN6_GRDOM_MEDIA		(1 << 2)
>   #define  GEN6_GRDOM_BLT			(1 << 3)
> +#define  GEN9_GRDOM_GUC			(1 << 5)
>   
>   #define RING_PP_DIR_BASE(ring)		_MMIO((ring)->mmio_base+0x228)
>   #define RING_PP_DIR_BASE_READ(ring)	_MMIO((ring)->mmio_base+0x518)
> diff --git a/drivers/gpu/drm/i915/intel_guc_loader.c b/drivers/gpu/drm/i915/intel_guc_loader.c
> index 82a3c03..f9cb814 100644
> --- a/drivers/gpu/drm/i915/intel_guc_loader.c
> +++ b/drivers/gpu/drm/i915/intel_guc_loader.c
> @@ -353,6 +353,24 @@ static int guc_ucode_xfer(struct drm_i915_private *dev_priv)
>   	return ret;
>   }
>   
> +static int i915_reset_guc(struct drm_i915_private *dev_priv)
> +{
> +	int ret;
> +	u32 guc_status;
> +
> +	ret = intel_guc_reset(dev_priv);
> +	if (ret) {
> +		DRM_ERROR("GuC reset failed, ret = %d\n", ret);
> +		return ret;
> +	}
> +
> +	guc_status = I915_READ(GUC_STATUS);
> +	WARN(!(guc_status & GS_MIA_IN_RESET),
> +	     "GuC status: 0x%x, MIA core expected to be in reset\n", guc_status);
> +
> +	return ret;
> +}
> +
>   /**
>    * intel_guc_ucode_load() - load GuC uCode into the device
>    * @dev:	drm device
> @@ -417,9 +435,36 @@ int intel_guc_ucode_load(struct drm_device *dev)
>   	if (err)
>   		goto fail;
>   
> +	/*
> +	 * WaEnableuKernelHeaderValidFix:skl,bxt
> +	 * For BXT, this is only upto B0 but below WA is required for later
> +	 * steppings also so this is extended as well.
> +	 */
> +	/* WaEnableGuCBootHashCheckNotSet:skl,bxt */
>   	err = guc_ucode_xfer(dev_priv);
> -	if (err)
> -		goto fail;
> +	if (err) {
> +		int retries = 3;
> +
> +		DRM_ERROR("GuC fw load failed, err=%d, attempting reset and retry\n", err);
> +
> +		while (retries--) {
> +			err = i915_reset_guc(dev_priv);
> +			if (err)
> +				break;
> +
> +			err = guc_ucode_xfer(dev_priv);
> +			if (!err) {
> +				DRM_DEBUG_DRIVER("GuC fw reload succeeded after reset\n");
> +				break;
> +			}
> +			DRM_DEBUG_DRIVER("GuC fw reload retries left: %d\n", retries);
> +		}
> +
> +		if (err) {
> +			DRM_ERROR("GuC fw reload attempt failed, ret=%d\n", err);
> +			goto fail;
> +		}
> +	}
>   
>   	guc_fw->guc_fw_load_status = GUC_FIRMWARE_SUCCESS;
>   
> diff --git a/drivers/gpu/drm/i915/intel_uncore.c b/drivers/gpu/drm/i915/intel_uncore.c
> index 80e38d5..92be76a 100644
> --- a/drivers/gpu/drm/i915/intel_uncore.c
> +++ b/drivers/gpu/drm/i915/intel_uncore.c
> @@ -1647,6 +1647,25 @@ bool intel_has_gpu_reset(struct drm_device *dev)
>   	return intel_get_gpu_reset(dev) != NULL;
>   }
>   
> +int intel_guc_reset(struct drm_i915_private *dev_priv)
> +{
> +	int ret;
> +	unsigned long irqflags;
> +
> +	if (!i915.enable_guc_submission)
> +		return -EINVAL;
> +
> +	intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
> +	spin_lock_irqsave(&dev_priv->uncore.lock, irqflags);
> +
> +	ret = gen6_domain_reset(dev_priv, GEN9_GRDOM_GUC);
> +
> +	spin_unlock_irqrestore(&dev_priv->uncore.lock, irqflags);
> +	intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
> +
> +	return ret;
> +}
> +
>   bool intel_uncore_unclaimed_mmio(struct drm_i915_private *dev_priv)
>   {
>   	return check_for_unclaimed_mmio(dev_priv);

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

  reply	other threads:[~2016-03-10 18:08 UTC|newest]

Thread overview: 6+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2016-03-08 11:38 [PATCH v2 0/2] Reset GuC and retry on fw load failure Arun Siluvery
2016-03-08 11:38 ` [PATCH v2 1/2] drm/i915: Add function to reset an engine domain Arun Siluvery
2016-03-08 11:38 ` [PATCH v2 2/2] drm/i915/guc: Reset GuC and retry on firmware load failure Arun Siluvery
2016-03-10 17:57   ` Yu Dai [this message]
2016-03-11 11:03     ` Arun Siluvery
2016-03-08 11:55 ` ✗ Fi.CI.BAT: warning for Reset GuC and retry on fw load failure (rev2) Patchwork

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=56E1B59A.7050208@intel.com \
    --to=yu.dai@intel.com \
    --cc=arun.siluvery@linux.intel.com \
    --cc=intel-gfx@lists.freedesktop.org \
    --cc=mika.kuoppala@intel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.