From: Tejas Upadhyay <tejas.upadhyay@intel.com>
To: intel-xe@lists.freedesktop.org
Cc: matthew.auld@intel.com, matthew.brost@intel.com,
himal.prasad.ghimiray@intel.com,
Tejas Upadhyay <tejas.upadhyay@intel.com>
Subject: [RFC PATCH 1/5] drm/xe: Implement VRAM object tracking ability using physical address
Date: Wed, 11 Feb 2026 10:31:34 +0530 [thread overview]
Message-ID: <20260211050132.1332599-8-tejas.upadhyay@intel.com> (raw)
In-Reply-To: <20260211050132.1332599-7-tejas.upadhyay@intel.com>
Implement the capability to track and identify TTM buffer objects
using a specific faulty memory address in VRAM. This functionality
is critical for supporting the memory page offline feature on CRI,
where identified faulty pages must be traced back to their
originating buffer for safe removal.
Signed-off-by: Tejas Upadhyay <tejas.upadhyay@intel.com>
---
drivers/gpu/drm/xe/xe_ttm_vram_mgr.c | 75 ++++++++++++++++++++++++++++
drivers/gpu/drm/xe/xe_ttm_vram_mgr.h | 2 +-
2 files changed, 76 insertions(+), 1 deletion(-)
diff --git a/drivers/gpu/drm/xe/xe_ttm_vram_mgr.c b/drivers/gpu/drm/xe/xe_ttm_vram_mgr.c
index d6aa61e55f4d..4e852eed5170 100644
--- a/drivers/gpu/drm/xe/xe_ttm_vram_mgr.c
+++ b/drivers/gpu/drm/xe/xe_ttm_vram_mgr.c
@@ -56,6 +56,7 @@ static int xe_ttm_vram_mgr_new(struct ttm_resource_manager *man,
u64 size, min_page_size;
unsigned long lpfn;
int err;
+ struct drm_buddy_block *block;
lpfn = place->lpfn;
if (!lpfn || lpfn > man->size >> PAGE_SHIFT)
@@ -137,6 +138,8 @@ static int xe_ttm_vram_mgr_new(struct ttm_resource_manager *man,
}
mgr->visible_avail -= vres->used_visible_size;
+ list_for_each_entry(block, &vres->blocks, link)
+ block->private = tbo;
mutex_unlock(&mgr->lock);
if (!(vres->base.placement & TTM_PL_FLAG_CONTIGUOUS) &&
@@ -467,3 +470,75 @@ u64 xe_ttm_vram_get_avail(struct ttm_resource_manager *man)
return avail;
}
+
+static inline bool overlaps(u64 s1, u64 e1, u64 s2, u64 e2)
+{
+ return s1 <= e2 && e1 >= s2;
+}
+
+static inline bool contains(u64 s1, u64 e1, u64 s2, u64 e2)
+{
+ return s1 <= s2 && e1 <= e2;
+}
+
+static struct ttm_buffer_object *xe_ttm_vram_addr_to_tbo(struct drm_buddy *mm, u64 start)
+{
+ struct drm_buddy_block *block;
+ u64 end;
+ LIST_HEAD(dfs);
+ int i;
+
+ end = start + SZ_4K - 1;
+ for (i = 0; i < mm->n_roots; ++i)
+ list_add_tail(&mm->roots[i]->tmp_link, &dfs);
+
+ do {
+ u64 block_start;
+ u64 block_end;
+
+ block = list_first_entry_or_null(&dfs,
+ struct drm_buddy_block,
+ tmp_link);
+ if (!block)
+ break;
+
+ list_del(&block->tmp_link);
+
+ block_start = drm_buddy_block_offset(block);
+ block_end = block_start + drm_buddy_block_size(mm, block) - 1;
+
+ if (!overlaps(start, end, block_start, block_end))
+ continue;
+
+ if (contains(start, end, block_start, block_end) &&
+ !drm_buddy_block_is_split(block)) {
+ if (drm_buddy_block_is_free(block)) {
+ return NULL;
+ } else if (drm_buddy_block_is_allocated(block) && !mm->clear_avail) {
+ struct ttm_buffer_object *tbo = block->private;
+
+ WARN_ON(!tbo);
+ return tbo;
+ }
+ }
+
+ if (drm_buddy_block_is_split(block)) {
+ list_add(&block->right->tmp_link, &dfs);
+ list_add(&block->left->tmp_link, &dfs);
+ }
+ } while (1);
+
+ return NULL;
+}
+
+int xe_ttm_tbo_handle_addr_fault(struct xe_tile *tile, unsigned long addr)
+{
+ struct xe_ttm_vram_mgr *vram_mgr = &tile->mem.vram->ttm;
+ struct drm_buddy mm = vram_mgr->mm;
+ struct ttm_buffer_object *tbo;
+
+ tbo = xe_ttm_vram_addr_to_tbo(&mm, addr);
+
+ return 0;
+}
+EXPORT_SYMBOL(xe_ttm_tbo_handle_addr_fault);
diff --git a/drivers/gpu/drm/xe/xe_ttm_vram_mgr.h b/drivers/gpu/drm/xe/xe_ttm_vram_mgr.h
index 87b7fae5edba..1d6075411ebf 100644
--- a/drivers/gpu/drm/xe/xe_ttm_vram_mgr.h
+++ b/drivers/gpu/drm/xe/xe_ttm_vram_mgr.h
@@ -30,7 +30,7 @@ u64 xe_ttm_vram_get_avail(struct ttm_resource_manager *man);
u64 xe_ttm_vram_get_cpu_visible_size(struct ttm_resource_manager *man);
void xe_ttm_vram_get_used(struct ttm_resource_manager *man,
u64 *used, u64 *used_visible);
-
+int xe_ttm_tbo_handle_addr_fault(struct xe_tile *tile, unsigned long addr);
static inline struct xe_ttm_vram_mgr_resource *
to_xe_ttm_vram_mgr_resource(struct ttm_resource *res)
{
--
2.52.0
next prev parent reply other threads:[~2026-02-11 5:02 UTC|newest]
Thread overview: 8+ messages / expand[flat|nested] mbox.gz Atom feed top
2026-02-11 5:01 [RFC PATCH 0/5] Add memory page offlining support Tejas Upadhyay
2026-02-11 5:01 ` Tejas Upadhyay [this message]
2026-02-11 6:26 ` [RFC PATCH 1/5] drm/xe: Implement VRAM object tracking ability using physical address Matthew Brost
2026-02-12 4:49 ` Upadhyay, Tejas
2026-02-11 5:01 ` [RFC PATCH 2/5] drm/xe: Handle physical memory address error Tejas Upadhyay
2026-02-11 5:01 ` [RFC PATCH 3/5] [DO_NOT_REVIEW]drm/xe/cri: Add debugfs to inject faulty vram address Tejas Upadhyay
2026-02-11 5:01 ` [RFC PATCH 4/5] drm/xe: Add routine to dump allocated VRAM blocks Tejas Upadhyay
2026-02-11 5:01 ` [RFC PATCH 5/5] [DO NOT REVIEW]drm/xe/cri: Add sysfs interface for bad gpu vram pages Tejas Upadhyay
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20260211050132.1332599-8-tejas.upadhyay@intel.com \
--to=tejas.upadhyay@intel.com \
--cc=himal.prasad.ghimiray@intel.com \
--cc=intel-xe@lists.freedesktop.org \
--cc=matthew.auld@intel.com \
--cc=matthew.brost@intel.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox