* [PATCH v5 0/2] drm/xe/pagefault: Add SRCID to pagefault reporting @ 2026-06-23 16:46 Jonathan Cavitt 2026-06-23 16:46 ` [PATCH v5 1/2] drm/xe/pagefault: Add SRCID to pagefault struct Jonathan Cavitt 2026-06-23 16:46 ` [PATCH v5 2/2] drm/xe/vm: Add srcid to xe_vm_get_property_ioctl fault report Jonathan Cavitt 0 siblings, 2 replies; 4+ messages in thread From: Jonathan Cavitt @ 2026-06-23 16:46 UTC (permalink / raw) To: dri-devel Cc: saurabhg.gupta, alex.zuo, jonathan.cavitt, mripard, airlied, simona, linux-kernel, intel-xe, Rodrigo.vivi, matthew.brost, maarten.lankhorst, thomas.hellstrom, tzimmermann Add SRCID to the xe_pagefault struct, which reports the ID of the faulting hardware unit. This will be passed on to the xe_vm_get_property_ioctl for reading per-vm faults and will assist in diagnosing pagefaults. v2: - Readd pad check, as the pad in the ioctl struct was not changed (jcavitt) v3: - Rebase v4: - Squash SRCID with ASID to keep the struct compact (Matthew) v5: - Use BUILD_BUG_ON and move ASID definition in one function (Matthew) Jonathan Cavitt (2): drm/xe/pagefault: Add SRCID to pagefault struct drm/xe/vm: Add srcid to xe_vm_get_property_ioctl fault report drivers/gpu/drm/xe/xe_guc_pagefault.c | 8 +++++++- drivers/gpu/drm/xe/xe_pagefault.c | 23 +++++++++++++++-------- drivers/gpu/drm/xe/xe_pagefault_types.h | 9 +++++++-- drivers/gpu/drm/xe/xe_vm.c | 8 ++++++++ drivers/gpu/drm/xe/xe_vm_types.h | 2 ++ include/uapi/drm/xe_drm.h | 4 ++-- 6 files changed, 41 insertions(+), 13 deletions(-) -- 2.53.0 ^ permalink raw reply [flat|nested] 4+ messages in thread
* [PATCH v5 1/2] drm/xe/pagefault: Add SRCID to pagefault struct 2026-06-23 16:46 [PATCH v5 0/2] drm/xe/pagefault: Add SRCID to pagefault reporting Jonathan Cavitt @ 2026-06-23 16:46 ` Jonathan Cavitt 2026-06-24 2:55 ` Matthew Brost 2026-06-23 16:46 ` [PATCH v5 2/2] drm/xe/vm: Add srcid to xe_vm_get_property_ioctl fault report Jonathan Cavitt 1 sibling, 1 reply; 4+ messages in thread From: Jonathan Cavitt @ 2026-06-23 16:46 UTC (permalink / raw) To: dri-devel Cc: saurabhg.gupta, alex.zuo, jonathan.cavitt, mripard, airlied, simona, linux-kernel, intel-xe, Rodrigo.vivi, matthew.brost, maarten.lankhorst, thomas.hellstrom, tzimmermann Add SRCID information to pagefault struct for the purpose of reporting the hardware unit that resulted in the pagefault. v2: - Squash SRCID with ASID to keep the struct compact (Matthew) v3: - Use BUILD_BUG_ON and move ASID definition in one function (Matthew) Signed-off-by: Jonathan Cavitt <jonathan.cavitt@intel.com> Cc: Matthew Brost <matthew.brost@intel.com> Cc: Thomas Hellström <thomas.hellstrom@linux.intel.com> Cc: Rodrigo Vivi <rodrigo.vivi@intel.com> Cc: David Airlie <airlied@gmail.com> Cc: Simona Vetter <simona@ffwll.ch> --- drivers/gpu/drm/xe/xe_guc_pagefault.c | 8 +++++++- drivers/gpu/drm/xe/xe_pagefault.c | 23 +++++++++++++++-------- drivers/gpu/drm/xe/xe_pagefault_types.h | 9 +++++++-- 3 files changed, 29 insertions(+), 11 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_guc_pagefault.c b/drivers/gpu/drm/xe/xe_guc_pagefault.c index 607e32392f46..f36aa17b5b3e 100644 --- a/drivers/gpu/drm/xe/xe_guc_pagefault.c +++ b/drivers/gpu/drm/xe/xe_guc_pagefault.c @@ -78,7 +78,13 @@ int xe_guc_pagefault_handler(struct xe_guc *guc, u32 *msg, u32 len) << PFD_VIRTUAL_ADDR_HI_SHIFT) | (FIELD_GET(PFD_VIRTUAL_ADDR_LO, msg[2]) << PFD_VIRTUAL_ADDR_LO_SHIFT); - pf.consumer.asid = FIELD_GET(PFD_ASID, msg[1]); + + BUILD_BUG_ON(XE_MAX_ASID > XE_PAGEFAULT_ASID_MASK); + + pf.consumer.id = FIELD_PREP(XE_PAGEFAULT_ASID_MASK, + FIELD_GET(PFD_ASID, msg[1])) | + FIELD_PREP(XE_PAGEFAULT_SRCID_MASK, + FIELD_GET(PFD_SRC_ID, msg[0])); pf.consumer.access_type = FIELD_GET(PFD_ACCESS_TYPE, msg[2]) | (FIELD_GET(PFD_PREFETCH, msg[2]) ? XE_PAGEFAULT_ACCESS_PREFETCH : 0); if (FIELD_GET(XE2_PFD_TRVA_FAULT, msg[0])) diff --git a/drivers/gpu/drm/xe/xe_pagefault.c b/drivers/gpu/drm/xe/xe_pagefault.c index dd3c068e1a39..3b0510f2a72e 100644 --- a/drivers/gpu/drm/xe/xe_pagefault.c +++ b/drivers/gpu/drm/xe/xe_pagefault.c @@ -175,12 +175,13 @@ static int xe_pagefault_service(struct xe_pagefault *pf) struct xe_vma *vma = NULL; int err; bool atomic; + u32 asid = FIELD_GET(XE_PAGEFAULT_ASID_MASK, pf->consumer.id); /* Producer flagged this fault to be nacked */ if (pf->consumer.fault_type_level == XE_PAGEFAULT_TYPE_LEVEL_NACK) return -EFAULT; - vm = xe_pagefault_asid_to_vm(xe, pf->consumer.asid); + vm = xe_pagefault_asid_to_vm(xe, asid); if (IS_ERR(vm)) return PTR_ERR(vm); @@ -242,14 +243,16 @@ static bool xe_pagefault_queue_pop(struct xe_pagefault_queue *pf_queue, static void xe_pagefault_print(struct xe_pagefault *pf) { - xe_gt_info(pf->gt, "\n\tASID: %d\n" + xe_gt_info(pf->gt, "\n\tASID: %ld\n" "\tFaulted Address: 0x%08x%08x\n" "\tFaultType: %lu\n" "\tAccessType: %lu\n" "\tFaultLevel: %lu\n" "\tEngineClass: %d %s\n" - "\tEngineInstance: %d\n", - pf->consumer.asid, + "\tEngineInstance: %d\n" + "\tSRCID: 0x%02lx\n", + FIELD_GET(XE_PAGEFAULT_ASID_MASK, + pf->consumer.id), upper_32_bits(pf->consumer.page_addr), lower_32_bits(pf->consumer.page_addr), FIELD_GET(XE_PAGEFAULT_TYPE_MASK, @@ -260,7 +263,9 @@ static void xe_pagefault_print(struct xe_pagefault *pf) pf->consumer.fault_type_level), pf->consumer.engine_class, xe_hw_engine_class_to_str(pf->consumer.engine_class), - pf->consumer.engine_instance); + pf->consumer.engine_instance, + FIELD_GET(XE_PAGEFAULT_SRCID_MASK, + pf->consumer.id)); } static void xe_pagefault_save_to_vm(struct xe_device *xe, struct xe_pagefault *pf) @@ -273,7 +278,8 @@ static void xe_pagefault_save_to_vm(struct xe_device *xe, struct xe_pagefault *p * mode, return VM anyways. */ down_read(&xe->usm.lock); - vm = xa_load(&xe->usm.asid_to_vm, pf->consumer.asid); + vm = xa_load(&xe->usm.asid_to_vm, + FIELD_GET(XE_PAGEFAULT_ASID_MASK, pf->consumer.id)); if (vm) xe_vm_get(vm); else @@ -474,8 +480,9 @@ static bool xe_pagefault_queue_full(struct xe_pagefault_queue *pf_queue) */ int xe_pagefault_handler(struct xe_device *xe, struct xe_pagefault *pf) { + u32 asid = FIELD_GET(XE_PAGEFAULT_ASID_MASK, pf->consumer.id); struct xe_pagefault_queue *pf_queue = xe->usm.pf_queue + - (pf->consumer.asid % XE_PAGEFAULT_QUEUE_COUNT); + (asid % XE_PAGEFAULT_QUEUE_COUNT); unsigned long flags; bool full; @@ -489,7 +496,7 @@ int xe_pagefault_handler(struct xe_device *xe, struct xe_pagefault *pf) } else { drm_warn(&xe->drm, "PageFault Queue (%d) full, shouldn't be possible\n", - pf->consumer.asid % XE_PAGEFAULT_QUEUE_COUNT); + asid % XE_PAGEFAULT_QUEUE_COUNT); } spin_unlock_irqrestore(&pf_queue->lock, flags); diff --git a/drivers/gpu/drm/xe/xe_pagefault_types.h b/drivers/gpu/drm/xe/xe_pagefault_types.h index c4ee625b93dd..2213cea886c7 100644 --- a/drivers/gpu/drm/xe/xe_pagefault_types.h +++ b/drivers/gpu/drm/xe/xe_pagefault_types.h @@ -65,8 +65,13 @@ struct xe_pagefault { struct { /** @consumer.page_addr: address of page fault */ u64 page_addr; - /** @consumer.asid: address space ID */ - u32 asid; +#define XE_PAGEFAULT_ASID_MASK GENMASK(23, 0) +#define XE_PAGEFAULT_SRCID_MASK GENMASK(31, 24) + /** + * @consumer.id: address space ID and SRCID, folded into one + * to keep size compact + */ + u32 id; /** * @consumer.access_type: access type and prefetch flag packed * into a u8. -- 2.53.0 ^ permalink raw reply related [flat|nested] 4+ messages in thread
* Re: [PATCH v5 1/2] drm/xe/pagefault: Add SRCID to pagefault struct 2026-06-23 16:46 ` [PATCH v5 1/2] drm/xe/pagefault: Add SRCID to pagefault struct Jonathan Cavitt @ 2026-06-24 2:55 ` Matthew Brost 0 siblings, 0 replies; 4+ messages in thread From: Matthew Brost @ 2026-06-24 2:55 UTC (permalink / raw) To: Jonathan Cavitt Cc: dri-devel, saurabhg.gupta, alex.zuo, mripard, airlied, simona, linux-kernel, intel-xe, Rodrigo.vivi, maarten.lankhorst, thomas.hellstrom, tzimmermann On Wed, Jun 24, 2026 at 12:46:09AM +0800, Jonathan Cavitt wrote: > Add SRCID information to pagefault struct for the purpose of reporting > the hardware unit that resulted in the pagefault. > > v2: > - Squash SRCID with ASID to keep the struct compact (Matthew) > > v3: > - Use BUILD_BUG_ON and move ASID definition in one function (Matthew) > > Signed-off-by: Jonathan Cavitt <jonathan.cavitt@intel.com> > Cc: Matthew Brost <matthew.brost@intel.com> Reviewed-by: Matthew Brost <matthew.brost@intel.com> > Cc: Thomas Hellström <thomas.hellstrom@linux.intel.com> > Cc: Rodrigo Vivi <rodrigo.vivi@intel.com> > Cc: David Airlie <airlied@gmail.com> > Cc: Simona Vetter <simona@ffwll.ch> > --- > drivers/gpu/drm/xe/xe_guc_pagefault.c | 8 +++++++- > drivers/gpu/drm/xe/xe_pagefault.c | 23 +++++++++++++++-------- > drivers/gpu/drm/xe/xe_pagefault_types.h | 9 +++++++-- > 3 files changed, 29 insertions(+), 11 deletions(-) > > diff --git a/drivers/gpu/drm/xe/xe_guc_pagefault.c b/drivers/gpu/drm/xe/xe_guc_pagefault.c > index 607e32392f46..f36aa17b5b3e 100644 > --- a/drivers/gpu/drm/xe/xe_guc_pagefault.c > +++ b/drivers/gpu/drm/xe/xe_guc_pagefault.c > @@ -78,7 +78,13 @@ int xe_guc_pagefault_handler(struct xe_guc *guc, u32 *msg, u32 len) > << PFD_VIRTUAL_ADDR_HI_SHIFT) | > (FIELD_GET(PFD_VIRTUAL_ADDR_LO, msg[2]) << > PFD_VIRTUAL_ADDR_LO_SHIFT); > - pf.consumer.asid = FIELD_GET(PFD_ASID, msg[1]); > + > + BUILD_BUG_ON(XE_MAX_ASID > XE_PAGEFAULT_ASID_MASK); > + > + pf.consumer.id = FIELD_PREP(XE_PAGEFAULT_ASID_MASK, > + FIELD_GET(PFD_ASID, msg[1])) | > + FIELD_PREP(XE_PAGEFAULT_SRCID_MASK, > + FIELD_GET(PFD_SRC_ID, msg[0])); > pf.consumer.access_type = FIELD_GET(PFD_ACCESS_TYPE, msg[2]) | > (FIELD_GET(PFD_PREFETCH, msg[2]) ? XE_PAGEFAULT_ACCESS_PREFETCH : 0); > if (FIELD_GET(XE2_PFD_TRVA_FAULT, msg[0])) > diff --git a/drivers/gpu/drm/xe/xe_pagefault.c b/drivers/gpu/drm/xe/xe_pagefault.c > index dd3c068e1a39..3b0510f2a72e 100644 > --- a/drivers/gpu/drm/xe/xe_pagefault.c > +++ b/drivers/gpu/drm/xe/xe_pagefault.c > @@ -175,12 +175,13 @@ static int xe_pagefault_service(struct xe_pagefault *pf) > struct xe_vma *vma = NULL; > int err; > bool atomic; > + u32 asid = FIELD_GET(XE_PAGEFAULT_ASID_MASK, pf->consumer.id); > > /* Producer flagged this fault to be nacked */ > if (pf->consumer.fault_type_level == XE_PAGEFAULT_TYPE_LEVEL_NACK) > return -EFAULT; > > - vm = xe_pagefault_asid_to_vm(xe, pf->consumer.asid); > + vm = xe_pagefault_asid_to_vm(xe, asid); > if (IS_ERR(vm)) > return PTR_ERR(vm); > > @@ -242,14 +243,16 @@ static bool xe_pagefault_queue_pop(struct xe_pagefault_queue *pf_queue, > > static void xe_pagefault_print(struct xe_pagefault *pf) > { > - xe_gt_info(pf->gt, "\n\tASID: %d\n" > + xe_gt_info(pf->gt, "\n\tASID: %ld\n" > "\tFaulted Address: 0x%08x%08x\n" > "\tFaultType: %lu\n" > "\tAccessType: %lu\n" > "\tFaultLevel: %lu\n" > "\tEngineClass: %d %s\n" > - "\tEngineInstance: %d\n", > - pf->consumer.asid, > + "\tEngineInstance: %d\n" > + "\tSRCID: 0x%02lx\n", > + FIELD_GET(XE_PAGEFAULT_ASID_MASK, > + pf->consumer.id), > upper_32_bits(pf->consumer.page_addr), > lower_32_bits(pf->consumer.page_addr), > FIELD_GET(XE_PAGEFAULT_TYPE_MASK, > @@ -260,7 +263,9 @@ static void xe_pagefault_print(struct xe_pagefault *pf) > pf->consumer.fault_type_level), > pf->consumer.engine_class, > xe_hw_engine_class_to_str(pf->consumer.engine_class), > - pf->consumer.engine_instance); > + pf->consumer.engine_instance, > + FIELD_GET(XE_PAGEFAULT_SRCID_MASK, > + pf->consumer.id)); > } > > static void xe_pagefault_save_to_vm(struct xe_device *xe, struct xe_pagefault *pf) > @@ -273,7 +278,8 @@ static void xe_pagefault_save_to_vm(struct xe_device *xe, struct xe_pagefault *p > * mode, return VM anyways. > */ > down_read(&xe->usm.lock); > - vm = xa_load(&xe->usm.asid_to_vm, pf->consumer.asid); > + vm = xa_load(&xe->usm.asid_to_vm, > + FIELD_GET(XE_PAGEFAULT_ASID_MASK, pf->consumer.id)); > if (vm) > xe_vm_get(vm); > else > @@ -474,8 +480,9 @@ static bool xe_pagefault_queue_full(struct xe_pagefault_queue *pf_queue) > */ > int xe_pagefault_handler(struct xe_device *xe, struct xe_pagefault *pf) > { > + u32 asid = FIELD_GET(XE_PAGEFAULT_ASID_MASK, pf->consumer.id); > struct xe_pagefault_queue *pf_queue = xe->usm.pf_queue + > - (pf->consumer.asid % XE_PAGEFAULT_QUEUE_COUNT); > + (asid % XE_PAGEFAULT_QUEUE_COUNT); > unsigned long flags; > bool full; > > @@ -489,7 +496,7 @@ int xe_pagefault_handler(struct xe_device *xe, struct xe_pagefault *pf) > } else { > drm_warn(&xe->drm, > "PageFault Queue (%d) full, shouldn't be possible\n", > - pf->consumer.asid % XE_PAGEFAULT_QUEUE_COUNT); > + asid % XE_PAGEFAULT_QUEUE_COUNT); > } > spin_unlock_irqrestore(&pf_queue->lock, flags); > > diff --git a/drivers/gpu/drm/xe/xe_pagefault_types.h b/drivers/gpu/drm/xe/xe_pagefault_types.h > index c4ee625b93dd..2213cea886c7 100644 > --- a/drivers/gpu/drm/xe/xe_pagefault_types.h > +++ b/drivers/gpu/drm/xe/xe_pagefault_types.h > @@ -65,8 +65,13 @@ struct xe_pagefault { > struct { > /** @consumer.page_addr: address of page fault */ > u64 page_addr; > - /** @consumer.asid: address space ID */ > - u32 asid; > +#define XE_PAGEFAULT_ASID_MASK GENMASK(23, 0) > +#define XE_PAGEFAULT_SRCID_MASK GENMASK(31, 24) > + /** > + * @consumer.id: address space ID and SRCID, folded into one > + * to keep size compact > + */ > + u32 id; > /** > * @consumer.access_type: access type and prefetch flag packed > * into a u8. > -- > 2.53.0 > ^ permalink raw reply [flat|nested] 4+ messages in thread
* [PATCH v5 2/2] drm/xe/vm: Add srcid to xe_vm_get_property_ioctl fault report 2026-06-23 16:46 [PATCH v5 0/2] drm/xe/pagefault: Add SRCID to pagefault reporting Jonathan Cavitt 2026-06-23 16:46 ` [PATCH v5 1/2] drm/xe/pagefault: Add SRCID to pagefault struct Jonathan Cavitt @ 2026-06-23 16:46 ` Jonathan Cavitt 1 sibling, 0 replies; 4+ messages in thread From: Jonathan Cavitt @ 2026-06-23 16:46 UTC (permalink / raw) To: dri-devel Cc: saurabhg.gupta, alex.zuo, jonathan.cavitt, mripard, airlied, simona, linux-kernel, intel-xe, Rodrigo.vivi, matthew.brost, maarten.lankhorst, thomas.hellstrom, tzimmermann Add the SRCID of the faulting hardware unit to the return of the xe_vm_get_property_ioctl fault report. v2: - Readd pad check, as the pad in the ioctl struct was not changed (jcavitt) v3: - Squash SRCID with ASID to keep the struct compact (Matthew) Signed-off-by: Jonathan Cavitt <jonathan.cavitt@intel.com> Reviewed-by: Matthew Brost <matthew.brost@intel.com> Cc: Matthew Brost <matthew.brost@intel.com> Cc: Thomas Hellström <thomas.hellstrom@linux.intel.com> Cc: Rodrigo Vivi <rodrigo.vivi@intel.com> Cc: David Airlie <airlied@gmail.com> Cc: Simona Vetter <simona@ffwll.ch> Cc: Maarten Lankhorst <maarten.lankhorst@linux.intel.com> Cc: Maxime Ripard <mripard@kernel.org> Cc: Thomas Zimmermann <tzimmermann@suse.de> --- drivers/gpu/drm/xe/xe_vm.c | 8 ++++++++ drivers/gpu/drm/xe/xe_vm_types.h | 2 ++ include/uapi/drm/xe_drm.h | 4 ++-- 3 files changed, 12 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c index 080c2fff0e95..acbfce19b356 100644 --- a/drivers/gpu/drm/xe/xe_vm.c +++ b/drivers/gpu/drm/xe/xe_vm.c @@ -636,6 +636,7 @@ void xe_vm_add_fault_entry_pf(struct xe_vm *vm, struct xe_pagefault *pf) pf->consumer.fault_type_level); e->fault_level = FIELD_GET(XE_PAGEFAULT_LEVEL_MASK, pf->consumer.fault_type_level); + e->srcid = FIELD_GET(XE_PAGEFAULT_SRCID_MASK, pf->consumer.id); list_add_tail(&e->list, &vm->faults.list); vm->faults.len++; @@ -4108,6 +4109,11 @@ static u8 xe_to_user_fault_level(u8 fault_level) return fault_level; } +static u8 xe_to_user_srcid(u8 srcid) +{ + return srcid; +} + static int fill_faults(struct xe_vm *vm, struct drm_xe_vm_get_property *args) { @@ -4135,6 +4141,8 @@ static int fill_faults(struct xe_vm *vm, fault_entry.fault_type = xe_to_user_fault_type(entry->fault_type); fault_entry.fault_level = xe_to_user_fault_level(entry->fault_level); + fault_entry.srcid = xe_to_user_srcid(entry->srcid); + memcpy(&fault_list[i], &fault_entry, entry_size); i++; diff --git a/drivers/gpu/drm/xe/xe_vm_types.h b/drivers/gpu/drm/xe/xe_vm_types.h index 635ed29b9a69..e8aea75341cc 100644 --- a/drivers/gpu/drm/xe/xe_vm_types.h +++ b/drivers/gpu/drm/xe/xe_vm_types.h @@ -196,6 +196,7 @@ struct xe_device; * @access_type: type of address access that resulted in fault * @fault_type: type of fault reported * @fault_level: fault level of the fault + * @srcid: ID of the faulting hardware unit */ struct xe_vm_fault_entry { struct list_head list; @@ -204,6 +205,7 @@ struct xe_vm_fault_entry { u8 access_type; u8 fault_type; u8 fault_level; + u8 srcid; }; struct xe_vm { diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h index 50c80af4ad4e..ee4cfd6b2246 100644 --- a/include/uapi/drm/xe_drm.h +++ b/include/uapi/drm/xe_drm.h @@ -1294,8 +1294,8 @@ struct xe_vm_fault { #define FAULT_LEVEL_PML4 3 #define FAULT_LEVEL_PML5 4 __u8 fault_level; - /** @pad: MBZ */ - __u8 pad; + /** @srcid: ID of the faulting hardware unit */ + __u8 srcid; /** @reserved: MBZ */ __u64 reserved[4]; }; -- 2.53.0 ^ permalink raw reply related [flat|nested] 4+ messages in thread
end of thread, other threads:[~2026-06-24 2:55 UTC | newest] Thread overview: 4+ messages (download: mbox.gz follow: Atom feed -- links below jump to the message on this page -- 2026-06-23 16:46 [PATCH v5 0/2] drm/xe/pagefault: Add SRCID to pagefault reporting Jonathan Cavitt 2026-06-23 16:46 ` [PATCH v5 1/2] drm/xe/pagefault: Add SRCID to pagefault struct Jonathan Cavitt 2026-06-24 2:55 ` Matthew Brost 2026-06-23 16:46 ` [PATCH v5 2/2] drm/xe/vm: Add srcid to xe_vm_get_property_ioctl fault report Jonathan Cavitt
This is a public inbox, see mirroring instructions for how to clone and mirror all data and code used for this inbox