Intel-XE Archive on lore.kernel.org
 help / color / mirror / Atom feed
From: Raag Jadav <raag.jadav@intel.com>
To: lucas.demarchi@intel.com, rodrigo.vivi@intel.com
Cc: intel-xe@lists.freedesktop.org, anshuman.gupta@intel.com,
	badal.nilawar@intel.com, riana.tauro@intel.com,
	Raag Jadav <raag.jadav@intel.com>
Subject: [PATCH v1 1/2] drm/xe/debugfs: Expose PCIe Gen5 update telemetry
Date: Mon, 31 Mar 2025 19:53:35 +0530	[thread overview]
Message-ID: <20250331142336.640226-2-raag.jadav@intel.com> (raw)
In-Reply-To: <20250331142336.640226-1-raag.jadav@intel.com>

Expose debugfs telemetry required for PCIe Gen5 firmware update for
discrete GPUs.

Signed-off-by: Raag Jadav <raag.jadav@intel.com>
---
 drivers/gpu/drm/xe/xe_debugfs.c   | 93 +++++++++++++++++++++++++++++++
 drivers/gpu/drm/xe/xe_pcode_api.h |  4 ++
 2 files changed, 97 insertions(+)

diff --git a/drivers/gpu/drm/xe/xe_debugfs.c b/drivers/gpu/drm/xe/xe_debugfs.c
index d0503959a8ed..67c941abf4fe 100644
--- a/drivers/gpu/drm/xe/xe_debugfs.c
+++ b/drivers/gpu/drm/xe/xe_debugfs.c
@@ -17,6 +17,9 @@
 #include "xe_gt_debugfs.h"
 #include "xe_gt_printk.h"
 #include "xe_guc_ads.h"
+#include "xe_mmio.h"
+#include "xe_pcode_api.h"
+#include "xe_pcode.h"
 #include "xe_pm.h"
 #include "xe_pxp_debugfs.h"
 #include "xe_sriov.h"
@@ -191,6 +194,89 @@ static const struct file_operations wedged_mode_fops = {
 	.write = wedged_mode_set,
 };
 
+/**
+ * DOC: PCIe Gen5 Update Limitations
+ *
+ * Default link speed of discrete GPUs is determined by FIT parameters stored
+ * in their flash memory, which are subject to override through user initiated
+ * firmware updates. It has been observed that devices configured with PCIe
+ * Gen5 as their default speed can come across link quality issues due to host
+ * or motherboard limitations and may have to auto-downspeed to PCIe Gen4 when
+ * faced with unstable link at Gen5. The users are required to ensure that the
+ * device is capable of auto-downspeeding to PCIe Gen4 before pushing the image
+ * with Gen5 as default configuration. This can be done by reading
+ * ``pcie_gen4_downspeed_capable`` debugfs entry, which will denote PCIe Gen4
+ * auto-downspeed capability of the device with boolean output value of ``0``
+ * or ``1``, meaning `incapable` or `capable` respectively.
+ *
+ * .. code-block:: shell
+ *
+ *    $ cat /sys/kernel/debug/dri/<N>/pcie_gen4_downspeed_capable
+ *
+ * Pushing PCIe Gen5 update on a auto-downspeed incapable device and facing
+ * link instability due to host or motherboard limitations can result in driver
+ * not being able to successfully bind to the device, making further firmware
+ * updates impossible with RMA being the only last resort.
+ *
+ * Link downspeed status of auto-downspeed capable devices is available through
+ * ``pcie_gen4_downspeed_status`` debugfs entry with boolean output value of
+ * ``0`` or ``1``, with ``0`` meaning no downspeeding was required during link
+ * training (which is the optimal scenario) and ``1`` meaning the device has
+ * downsped to PCIe Gen4 due to unstable Gen5 link.
+ *
+ * .. code-block:: shell
+ *
+ *    $ cat /sys/kernel/debug/dri/<N>/pcie_gen4_downspeed_status
+ */
+
+static ssize_t pcie_gen4_downspeed_capable_show(struct file *f, char __user *ubuf,
+						size_t size, loff_t *pos)
+{
+	struct xe_device *xe = file_inode(f)->i_private;
+	struct xe_mmio *mmio = xe_root_tile_mmio(xe);
+	char buf[16];
+	u32 len, val;
+
+	xe_pm_runtime_get(xe);
+	val = xe_mmio_read32(mmio, PCODE_SCRATCH(16));
+	xe_pm_runtime_put(xe);
+
+	len = scnprintf(buf, sizeof(buf), "%u\n",
+			REG_FIELD_GET(PCIE_GEN4_DOWNGRADE, val) == DOWNGRADE_CAPABLE ? 1 : 0);
+
+	return simple_read_from_buffer(ubuf, size, pos, buf, len);
+}
+
+static const struct file_operations pcie_gen4_downspeed_capable_fops = {
+	.owner = THIS_MODULE,
+	.read = pcie_gen4_downspeed_capable_show,
+};
+
+static ssize_t pcie_gen4_downspeed_status_show(struct file *f, char __user *ubuf,
+					       size_t size, loff_t *pos)
+{
+	struct xe_device *xe = file_inode(f)->i_private;
+	struct xe_tile *root_tile = xe_device_get_root_tile(xe);
+	char buf[16];
+	u32 len, val;
+	int ret;
+
+	xe_pm_runtime_get(xe);
+	ret = xe_pcode_read(root_tile, PCODE_MBOX(DGFX_PCODE_STATUS,
+			    DGFX_GET_INIT_STATUS, 0), &val, NULL);
+	xe_pm_runtime_put(xe);
+	if (ret)
+		return ret;
+
+	len = scnprintf(buf, sizeof(buf), "%u\n", REG_FIELD_GET(REG_BIT(31), val));
+	return simple_read_from_buffer(ubuf, size, pos, buf, len);
+}
+
+static const struct file_operations pcie_gen4_downspeed_status_fops = {
+	.owner = THIS_MODULE,
+	.read = pcie_gen4_downspeed_status_show,
+};
+
 void xe_debugfs_register(struct xe_device *xe)
 {
 	struct ttm_device *bdev = &xe->ttm;
@@ -211,6 +297,13 @@ void xe_debugfs_register(struct xe_device *xe)
 	debugfs_create_file("wedged_mode", 0600, root, xe,
 			    &wedged_mode_fops);
 
+	if (IS_DGFX(xe)) {
+		debugfs_create_file("pcie_gen4_downspeed_capable", 0400, root, xe,
+				    &pcie_gen4_downspeed_capable_fops);
+		debugfs_create_file("pcie_gen4_downspeed_status", 0400, root, xe,
+				    &pcie_gen4_downspeed_status_fops);
+	}
+
 	for (mem_type = XE_PL_VRAM0; mem_type <= XE_PL_VRAM1; ++mem_type) {
 		man = ttm_manager_type(bdev, mem_type);
 
diff --git a/drivers/gpu/drm/xe/xe_pcode_api.h b/drivers/gpu/drm/xe/xe_pcode_api.h
index e622ae17f08d..1f802d9793ad 100644
--- a/drivers/gpu/drm/xe/xe_pcode_api.h
+++ b/drivers/gpu/drm/xe/xe_pcode_api.h
@@ -66,6 +66,10 @@
 /* Auxiliary info bits */
 #define   AUXINFO_HISTORY_OFFSET	REG_GENMASK(31, 29)
 
+/* PCIe Gen4 downgrade capability bits */
+#define   PCIE_GEN4_DOWNGRADE		REG_GENMASK(1, 0)
+#define      DOWNGRADE_CAPABLE		2
+
 struct pcode_err_decode {
 	int errno;
 	const char *str;
-- 
2.34.1


  reply	other threads:[~2025-03-31 14:24 UTC|newest]

Thread overview: 18+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2025-03-31 14:23 [PATCH v1 0/2] DGFX PCIe Gen5 update telemetry and usage Raag Jadav
2025-03-31 14:23 ` Raag Jadav [this message]
2025-03-31 14:52   ` [PATCH v1 1/2] drm/xe/debugfs: Expose PCIe Gen5 update telemetry Lucas De Marchi
2025-03-31 15:23     ` Rodrigo Vivi
2025-03-31 15:15   ` Rodrigo Vivi
2025-04-02 18:24   ` Nilawar, Badal
2025-04-03  3:38     ` Raag Jadav
2025-03-31 14:23 ` [PATCH v1 2/2] drm/xe/doc: Wire up PCIe Gen5 update limitations Raag Jadav
2025-03-31 15:24   ` Rodrigo Vivi
2025-04-02 10:22     ` Raag Jadav
2025-03-31 14:29 ` ✓ CI.Patch_applied: success for DGFX PCIe Gen5 update telemetry and usage Patchwork
2025-03-31 14:30 ` ✗ CI.checkpatch: warning " Patchwork
2025-03-31 14:31 ` ✓ CI.KUnit: success " Patchwork
2025-03-31 14:47 ` ✓ CI.Build: " Patchwork
2025-03-31 14:50 ` ✓ CI.Hooks: " Patchwork
2025-03-31 14:51 ` ✓ CI.checksparse: " Patchwork
2025-03-31 15:37 ` ✓ Xe.CI.BAT: " Patchwork
2025-03-31 16:54 ` ✗ Xe.CI.Full: failure " Patchwork

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20250331142336.640226-2-raag.jadav@intel.com \
    --to=raag.jadav@intel.com \
    --cc=anshuman.gupta@intel.com \
    --cc=badal.nilawar@intel.com \
    --cc=intel-xe@lists.freedesktop.org \
    --cc=lucas.demarchi@intel.com \
    --cc=riana.tauro@intel.com \
    --cc=rodrigo.vivi@intel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox