All of lore.kernel.org
 help / color / mirror / Atom feed
From: Rodrigo Vivi <rodrigo.vivi@intel.com>
To: Riana Tauro <riana.tauro@intel.com>
Cc: <intel-xe@lists.freedesktop.org>, <anshuman.gupta@intel.com>,
	<badal.nilawar@intel.com>
Subject: Re: [PATCH v2 2/2] drm/xe/xe_survivability: Add support for survivability mode v2
Date: Mon, 1 Dec 2025 12:43:53 -0500	[thread overview]
Message-ID: <aS3T2crA95cvORV0@intel.com> (raw)
In-Reply-To: <20251201143420.3158372-6-riana.tauro@intel.com>

On Mon, Dec 01, 2025 at 08:04:23PM +0530, Riana Tauro wrote:
> v2 survivability breadcrumbs introduces a new mode called
> SPI Flash Descriptor Override mode (FDO). This is enabled by
> PCODE when MEI itself fails and firmware cannot be updated via
> MEI using igsc. This mode provides the ability to update
> the firmware directly via SPI driver.
> 
> Xe KMD initializes the nvm aux driver if FDO mode is enabled.
> 
> Userspace should check FDO mode entry in survivability info sysfs before
> using the SPI driver to update firmware.
> 
> 	/sys/bus/pci/devices/<device>/survivability_info/fdo_mode
> 
> v2 also supports survivability mode for critical boot errors.
> 
> Signed-off-by: Riana Tauro <riana.tauro@intel.com>
> ---
> v2: do not exit survivability mode in case of mei init failure (Rodrigo)

I still see the goto err there, what am I missing?
I mean, no strong feeling about what is right or wrong to do here,
I just want to make sure the code matches the doc and expectations...

> ---
>  drivers/gpu/drm/xe/xe_pcode_api.h             |  2 +
>  drivers/gpu/drm/xe/xe_survivability_mode.c    | 44 ++++++++++++++++---
>  .../gpu/drm/xe/xe_survivability_mode_types.h  |  6 +++
>  3 files changed, 46 insertions(+), 6 deletions(-)
> 
> diff --git a/drivers/gpu/drm/xe/xe_pcode_api.h b/drivers/gpu/drm/xe/xe_pcode_api.h
> index 70dcd6625680..975892d6b230 100644
> --- a/drivers/gpu/drm/xe/xe_pcode_api.h
> +++ b/drivers/gpu/drm/xe/xe_pcode_api.h
> @@ -77,11 +77,13 @@
>  
>  #define PCODE_SCRATCH(x)		XE_REG(0x138320 + ((x) * 4))
>  /* PCODE_SCRATCH0 */
> +#define   BREADCRUMB_VERSION		REG_GENMASK(31, 29)
>  #define   AUXINFO_REG_OFFSET		REG_GENMASK(17, 15)
>  #define   OVERFLOW_REG_OFFSET		REG_GENMASK(14, 12)
>  #define   HISTORY_TRACKING		REG_BIT(11)
>  #define   OVERFLOW_SUPPORT		REG_BIT(10)
>  #define   AUXINFO_SUPPORT		REG_BIT(9)
> +#define   FDO_MODE			REG_BIT(4)
>  #define   BOOT_STATUS			REG_GENMASK(3, 1)
>  #define      CRITICAL_FAILURE		4
>  #define      NON_CRITICAL_FAILURE	7
> diff --git a/drivers/gpu/drm/xe/xe_survivability_mode.c b/drivers/gpu/drm/xe/xe_survivability_mode.c
> index b5b582442637..8d409bc9c14b 100644
> --- a/drivers/gpu/drm/xe/xe_survivability_mode.c
> +++ b/drivers/gpu/drm/xe/xe_survivability_mode.c
> @@ -16,6 +16,7 @@
>  #include "xe_heci_gsc.h"
>  #include "xe_i2c.h"
>  #include "xe_mmio.h"
> +#include "xe_nvm.h"
>  #include "xe_pcode_api.h"
>  #include "xe_vsec.h"
>  
> @@ -66,6 +67,11 @@
>   *
>   * - ``aux_info<n>`` : Some failures have additional debug information
>   *
> + * - ``fdo_mode`` : To allow recovery in scenarios where MEI itself fails, a new SPI Flash
> + *   Descriptor Override (FDO) mode is added in v2 survivability breadcrumbs. This mode is enabled
> + *   by PCODE and provides the ability to directly update the firmware via SPI Driver without
> + *   any dependency on MEI. Xe KMD initializes the nvm aux driver if FDO mode is enabled.
> + *
>   * Runtime Survivability
>   * =====================
>   *
> @@ -94,6 +100,8 @@ static const char * const reg_map[] = {
>  	[AUX_INFO4]               = "Auxiliary Info 4",
>  };
>  
> +#define FDO_INFO	(MAX_SCRATCH_REG + 1)
> +
>  struct xe_survivability_attribute {
>  	struct device_attribute attr;
>  	u8 index;
> @@ -127,6 +135,11 @@ static void populate_survivability_info(struct xe_device *xe)
>  	set_survivability_info(mmio, info, CAPABILITY_INFO);
>  	reg_value = info[CAPABILITY_INFO];
>  
> +	survivability->version = REG_FIELD_GET(BREADCRUMB_VERSION, reg_value);
> +	/* FDO mode is exposed only from version 2 */
> +	if (survivability->version >= 2)
> +		survivability->fdo_mode = REG_FIELD_GET(FDO_MODE, reg_value);
> +
>  	if (reg_value & HISTORY_TRACKING) {
>  		set_survivability_info(mmio, info, POSTCODE_TRACE);
>  
> @@ -188,6 +201,9 @@ static ssize_t survivability_info_show(struct device *dev,
>  	struct xe_survivability *survivability = &xe->survivability;
>  	u32 *info = survivability->info;
>  
> +	if (sa->index == FDO_INFO)
> +		return sysfs_emit(buff, "%s\n", str_enabled_disabled(survivability->fdo_mode));
> +
>  	return sysfs_emit(buff, "0x%x\n", info[sa->index]);
>  }
>  
> @@ -205,13 +221,18 @@ SURVIVABILITY_ATTR_RO(aux_info1, AUX_INFO1);
>  SURVIVABILITY_ATTR_RO(aux_info2, AUX_INFO2);
>  SURVIVABILITY_ATTR_RO(aux_info3, AUX_INFO3);
>  SURVIVABILITY_ATTR_RO(aux_info4, AUX_INFO4);
> +SURVIVABILITY_ATTR_RO(fdo_mode, FDO_INFO);
>  
>  static void xe_survivability_mode_fini(void *arg)
>  {
>  	struct xe_device *xe = arg;
> +	struct xe_survivability *survivability = &xe->survivability;
>  	struct pci_dev *pdev = to_pci_dev(xe->drm.dev);
>  	struct device *dev = &pdev->dev;
>  
> +	if (survivability->fdo_mode)
> +		xe_nvm_fini(xe);
> +
>  	device_remove_file(dev, &dev_attr_survivability_mode);
>  }
>  
> @@ -222,7 +243,10 @@ static umode_t survivability_info_attrs_visible(struct kobject *kobj, struct att
>  	struct xe_survivability *survivability = &xe->survivability;
>  	u32 *info = survivability->info;
>  
> -	if (info[idx])
> +	/* FDO mode is visible only when supported */
> +	if (idx >= MAX_SCRATCH_REG && survivability->version >= 2)
> +		return 0400;
> +	else if (info[idx])
>  		return 0400;
>  
>  	return 0;
> @@ -238,6 +262,7 @@ static struct attribute *survivability_info_attrs[] = {
>  	&attr_aux_info2.attr.attr,
>  	&attr_aux_info3.attr.attr,
>  	&attr_aux_info4.attr.attr,
> +	&attr_fdo_mode.attr.attr,
>  	NULL,
>  };
>  
> @@ -287,12 +312,16 @@ static int enable_boot_survivability_mode(struct pci_dev *pdev)
>  	/* Make sure xe_heci_gsc_init() knows about survivability mode */
>  	survivability->mode = true;
>  
> -	ret = xe_heci_gsc_init(xe);
> -	if (ret)
> -		goto err;
> +	xe_heci_gsc_init(xe);
>  
>  	xe_vsec_init(xe);
>  
> +	if (survivability->fdo_mode) {
> +		ret = xe_nvm_init(xe);
> +		if (ret)
> +			goto err;
> +	}
> +
>  	ret = xe_i2c_probe(xe);
>  	if (ret)
>  		goto err;
> @@ -302,6 +331,7 @@ static int enable_boot_survivability_mode(struct pci_dev *pdev)
>  	return 0;
>  
>  err:
> +	dev_err(dev, "Failed to enable Survivability Mode\n");
>  	survivability->mode = false;
>  	return ret;
>  }
> @@ -408,8 +438,10 @@ int xe_survivability_mode_boot_enable(struct xe_device *xe)
>  
>  	populate_survivability_info(xe);
>  
> -	/* Log breadcrumbs but do not enter survivability mode for Critical boot errors */
> -	if (survivability->boot_status == CRITICAL_FAILURE) {
> +	/*
> +	 * v2 supports survivability mode for critical errors
> +	 */
> +	if (survivability->version < 2  && survivability->boot_status == CRITICAL_FAILURE) {
>  		log_survivability_info(pdev);
>  		return -ENXIO;
>  	}
> diff --git a/drivers/gpu/drm/xe/xe_survivability_mode_types.h b/drivers/gpu/drm/xe/xe_survivability_mode_types.h
> index f31b3907d933..bd5dc1c955ff 100644
> --- a/drivers/gpu/drm/xe/xe_survivability_mode_types.h
> +++ b/drivers/gpu/drm/xe/xe_survivability_mode_types.h
> @@ -44,6 +44,12 @@ struct xe_survivability {
>  
>  	/** @type: survivability type */
>  	enum xe_survivability_type type;
> +
> +	/** @fdo_mode: indicates if FDO mode is enabled */
> +	bool fdo_mode;
> +
> +	/** @version: breadcrumb version of survivability mode  */
> +	u8 version;
>  };
>  
>  #endif /* _XE_SURVIVABILITY_MODE_TYPES_H_ */
> -- 
> 2.47.1
> 

  reply	other threads:[~2025-12-01 17:44 UTC|newest]

Thread overview: 11+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2025-12-01 14:34 [PATCH v2 0/2] Redesign survivability mode Riana Tauro
2025-12-01 14:34 ` [PATCH v2 1/2] drm/xe/xe_survivability: " Riana Tauro
2025-12-01 17:48   ` Rodrigo Vivi
2025-12-02  3:10     ` Riana Tauro
2025-12-01 14:34 ` [PATCH v2 2/2] drm/xe/xe_survivability: Add support for survivability mode v2 Riana Tauro
2025-12-01 17:43   ` Rodrigo Vivi [this message]
2025-12-02  3:07     ` Riana Tauro
2025-12-02 20:31       ` Rodrigo Vivi
2025-12-01 17:05 ` ✓ CI.KUnit: success for Redesign survivability mode (rev2) Patchwork
2025-12-01 17:45 ` ✓ Xe.CI.BAT: " Patchwork
2025-12-01 20:12 ` ✗ Xe.CI.Full: failure " Patchwork

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=aS3T2crA95cvORV0@intel.com \
    --to=rodrigo.vivi@intel.com \
    --cc=anshuman.gupta@intel.com \
    --cc=badal.nilawar@intel.com \
    --cc=intel-xe@lists.freedesktop.org \
    --cc=riana.tauro@intel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.