Intel-XE Archive on lore.kernel.org
 help / color / mirror / Atom feed
From: Tejas Upadhyay <tejas.upadhyay@intel.com>
To: intel-xe@lists.freedesktop.org
Cc: matthew.auld@intel.com, matthew.brost@intel.com,
	himal.prasad.ghimiray@intel.com,
	Tejas Upadhyay <tejas.upadhyay@intel.com>
Subject: [RFC PATCH 3/5] [DO_NOT_REVIEW]drm/xe/cri: Add debugfs to inject faulty vram address
Date: Wed, 11 Feb 2026 10:31:36 +0530	[thread overview]
Message-ID: <20260211050132.1332599-10-tejas.upadhyay@intel.com> (raw)
In-Reply-To: <20260211050132.1332599-7-tejas.upadhyay@intel.com>

Add debugfs which can help testing feature with manual error injection.
Adding a debugfs interface to the drm/xe driver allows manual injection
of faulty VRAM addresses, facilitating the testing of the CRI memory
page offline feature before it is fully functional. The implementation
involves creating a debugfs entry, likely under
/sys/kernel/debug/dri/bdf/invalid_addr_vram0,
to accept specific faulty addresses for validation.

For example,
echo 0x1000 > /sys/kernel/debug/dri/bdf/invalid_addr_vram0
where 0x1000 is faulty adress being injected.

Signed-off-by: Tejas Upadhyay <tejas.upadhyay@intel.com>
---
 drivers/gpu/drm/xe/xe_debugfs.c            | 49 ++++++++++++++++++++++
 drivers/gpu/drm/xe/xe_ttm_vram_mgr_types.h |  2 +
 2 files changed, 51 insertions(+)

diff --git a/drivers/gpu/drm/xe/xe_debugfs.c b/drivers/gpu/drm/xe/xe_debugfs.c
index 844cfafe1ec7..d9dc1acebbce 100644
--- a/drivers/gpu/drm/xe/xe_debugfs.c
+++ b/drivers/gpu/drm/xe/xe_debugfs.c
@@ -27,6 +27,7 @@
 #include "xe_sriov_vf.h"
 #include "xe_step.h"
 #include "xe_tile_debugfs.h"
+#include "xe_ttm_vram_mgr.h"
 #include "xe_vsec.h"
 #include "xe_wa.h"
 
@@ -509,12 +510,48 @@ static const struct file_operations disable_late_binding_fops = {
 	.write = disable_late_binding_set,
 };
 
+static ssize_t addr_fault_reporting_show(struct file *f, char __user *ubuf,
+					 size_t size, loff_t *pos)
+{
+	struct xe_device *xe = file_inode(f)->i_private;
+	char buf[32];
+	int len;
+
+	len = scnprintf(buf, sizeof(buf), "%lld\n", xe->mem.vram->ttm.fault_addr);
+
+	return simple_read_from_buffer(ubuf, size, pos, buf, len);
+}
+
+static ssize_t addr_fault_reporting_set(struct file *f, const char __user *ubuf,
+					size_t size, loff_t *pos)
+{
+	struct xe_device *xe = file_inode(f)->i_private;
+	u64 addr;
+	int ret;
+
+	ret = kstrtou64_from_user(ubuf, size, 0, &addr);
+	if (ret)
+		return ret;
+
+	xe->mem.vram->ttm.fault_addr = addr;
+	xe_ttm_tbo_handle_addr_fault(xe_device_get_root_tile(xe), xe->mem.vram->ttm.fault_addr);
+
+	return size;
+}
+
+static const struct file_operations addr_fault_reporting_fops = {
+	.owner = THIS_MODULE,
+	.read = addr_fault_reporting_show,
+	.write = addr_fault_reporting_set,
+};
+
 void xe_debugfs_register(struct xe_device *xe)
 {
 	struct ttm_device *bdev = &xe->ttm;
 	struct drm_minor *minor = xe->drm.primary;
 	struct dentry *root = minor->debugfs_root;
 	struct ttm_resource_manager *man;
+	u8 mem_type = XE_PL_VRAM1;
 	struct xe_tile *tile;
 	struct xe_gt *gt;
 	u8 tile_id;
@@ -565,6 +602,18 @@ void xe_debugfs_register(struct xe_device *xe)
 	if (man)
 		ttm_resource_manager_create_debugfs(man, root, "stolen_mm");
 
+	do {
+		man = ttm_manager_type(bdev, mem_type);
+		if (man) {
+			char name[20];
+
+			snprintf(name, sizeof(name), "invalid_addr_vram%d", mem_type - XE_PL_VRAM0);
+			debugfs_create_file(name, 0600, root, xe,
+					    &addr_fault_reporting_fops);
+		}
+		--mem_type;
+	} while (mem_type >= XE_PL_VRAM0);
+
 	for_each_tile(tile, xe, tile_id)
 		xe_tile_debugfs_register(tile);
 
diff --git a/drivers/gpu/drm/xe/xe_ttm_vram_mgr_types.h b/drivers/gpu/drm/xe/xe_ttm_vram_mgr_types.h
index 85511b51af75..c93573b9aab2 100644
--- a/drivers/gpu/drm/xe/xe_ttm_vram_mgr_types.h
+++ b/drivers/gpu/drm/xe/xe_ttm_vram_mgr_types.h
@@ -37,6 +37,8 @@ struct xe_ttm_vram_mgr {
 	struct mutex lock;
 	/** @mem_type: The TTM memory type */
 	u32 mem_type;
+	/** @fault_addr: debugfs hook for setting faulty address */
+	u64 fault_addr;
 };
 
 /**
-- 
2.52.0


  parent reply	other threads:[~2026-02-11  5:02 UTC|newest]

Thread overview: 8+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2026-02-11  5:01 [RFC PATCH 0/5] Add memory page offlining support Tejas Upadhyay
2026-02-11  5:01 ` [RFC PATCH 1/5] drm/xe: Implement VRAM object tracking ability using physical address Tejas Upadhyay
2026-02-11  6:26   ` Matthew Brost
2026-02-12  4:49     ` Upadhyay, Tejas
2026-02-11  5:01 ` [RFC PATCH 2/5] drm/xe: Handle physical memory address error Tejas Upadhyay
2026-02-11  5:01 ` Tejas Upadhyay [this message]
2026-02-11  5:01 ` [RFC PATCH 4/5] drm/xe: Add routine to dump allocated VRAM blocks Tejas Upadhyay
2026-02-11  5:01 ` [RFC PATCH 5/5] [DO NOT REVIEW]drm/xe/cri: Add sysfs interface for bad gpu vram pages Tejas Upadhyay

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20260211050132.1332599-10-tejas.upadhyay@intel.com \
    --to=tejas.upadhyay@intel.com \
    --cc=himal.prasad.ghimiray@intel.com \
    --cc=intel-xe@lists.freedesktop.org \
    --cc=matthew.auld@intel.com \
    --cc=matthew.brost@intel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox