* [PATCH v6 0/3] drm/xe/oa: Wa_14026633728
@ 2026-04-27 19:02 Ashutosh Dixit
2026-04-27 19:02 ` [PATCH 1/3] drm/xe/oa: Use xe_map layer Ashutosh Dixit
` (2 more replies)
0 siblings, 3 replies; 16+ messages in thread
From: Ashutosh Dixit @ 2026-04-27 19:02 UTC (permalink / raw)
To: intel-xe; +Cc: Umesh Nerlige Ramappa
v6 though v2: Change to Patch 1 (see changelog in Patch 1)
Ashutosh Dixit (3):
drm/xe/oa: Use xe_map layer
drm/xe/oa: Use drm_gem_mmap_obj for OA buffer mmap
drm/xe/oa: Implement Wa_14026633728
drivers/gpu/drm/xe/xe_oa.c | 123 ++++++++++++++++-------------
drivers/gpu/drm/xe/xe_oa_types.h | 4 +-
drivers/gpu/drm/xe/xe_wa_oob.rules | 1 +
3 files changed, 70 insertions(+), 58 deletions(-)
--
2.54.0
^ permalink raw reply [flat|nested] 16+ messages in thread* [PATCH 1/3] drm/xe/oa: Use xe_map layer 2026-04-27 19:02 [PATCH v6 0/3] drm/xe/oa: Wa_14026633728 Ashutosh Dixit @ 2026-04-27 19:02 ` Ashutosh Dixit 2026-04-27 19:34 ` Umesh Nerlige Ramappa 2026-04-27 19:02 ` [PATCH 2/3] drm/xe/oa: Use drm_gem_mmap_obj for OA buffer mmap Ashutosh Dixit 2026-04-27 19:02 ` [PATCH 3/3] drm/xe/oa: Implement Wa_14026633728 Ashutosh Dixit 2 siblings, 1 reply; 16+ messages in thread From: Ashutosh Dixit @ 2026-04-27 19:02 UTC (permalink / raw) To: intel-xe; +Cc: Umesh Nerlige Ramappa OA code should have used xe_map layer to begin with. In CRI, the OA buffer can be located both in system and device memory. For these reasons, move OA code to use the xe_map layer when accessing the OA buffer. v2: Use xe_map layer and put_user in xe_oa_copy_to_user (Umesh) v3: To avoid performance impact in v2, revert to v1 but move xe_map_copy_to_user() to xe_map.h v4: Use bounce buffer and copy_to_user in xe_oa_copy_to_user v5: Fix offsets in oa_timestamp() and oa_timestamp_clear() (Umesh) v6: Rename head/tail in helper args to report_offset (Umesh) Signed-off-by: Ashutosh Dixit <ashutosh.dixit@intel.com> --- drivers/gpu/drm/xe/xe_oa.c | 95 +++++++++++++++++++------------- drivers/gpu/drm/xe/xe_oa_types.h | 4 +- 2 files changed, 58 insertions(+), 41 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_oa.c b/drivers/gpu/drm/xe/xe_oa.c index 6337e671c97ae..1fff0e8e9e78e 100644 --- a/drivers/gpu/drm/xe/xe_oa.c +++ b/drivers/gpu/drm/xe/xe_oa.c @@ -213,32 +213,40 @@ static u32 xe_oa_hw_tail_read(struct xe_oa_stream *stream) #define oa_report_header_64bit(__s) \ ((__s)->oa_buffer.format->header == HDR_64_BIT) -static u64 oa_report_id(struct xe_oa_stream *stream, void *report) +static u64 oa_report_id(struct xe_oa_stream *stream, u32 report_offset) { - return oa_report_header_64bit(stream) ? *(u64 *)report : *(u32 *)report; + struct iosys_map *map = &stream->oa_buffer.bo->vmap; + + return oa_report_header_64bit(stream) ? + xe_map_rd(stream->oa->xe, map, report_offset, u64) : + xe_map_rd(stream->oa->xe, map, report_offset, u32); } -static void oa_report_id_clear(struct xe_oa_stream *stream, u32 *report) +static void oa_report_id_clear(struct xe_oa_stream *stream, u32 report_offset) { - if (oa_report_header_64bit(stream)) - *(u64 *)report = 0; - else - *report = 0; + struct iosys_map *map = &stream->oa_buffer.bo->vmap; + + oa_report_header_64bit(stream) ? + xe_map_wr(stream->oa->xe, map, report_offset, u64, 0) : + xe_map_wr(stream->oa->xe, map, report_offset, u32, 0); } -static u64 oa_timestamp(struct xe_oa_stream *stream, void *report) +static u64 oa_timestamp(struct xe_oa_stream *stream, u32 report_offset) { + struct iosys_map *map = &stream->oa_buffer.bo->vmap; + return oa_report_header_64bit(stream) ? - *((u64 *)report + 1) : - *((u32 *)report + 1); + xe_map_rd(stream->oa->xe, map, report_offset + 8, u64) : + xe_map_rd(stream->oa->xe, map, report_offset + 4, u32); } -static void oa_timestamp_clear(struct xe_oa_stream *stream, u32 *report) +static void oa_timestamp_clear(struct xe_oa_stream *stream, u32 report_offset) { - if (oa_report_header_64bit(stream)) - *(u64 *)&report[2] = 0; - else - report[1] = 0; + struct iosys_map *map = &stream->oa_buffer.bo->vmap; + + oa_report_header_64bit(stream) ? + xe_map_wr(stream->oa->xe, map, report_offset + 8, u64, 0) : + xe_map_wr(stream->oa->xe, map, report_offset + 4, u32, 0); } static bool xe_oa_buffer_check_unlocked(struct xe_oa_stream *stream) @@ -275,9 +283,7 @@ static bool xe_oa_buffer_check_unlocked(struct xe_oa_stream *stream) * they were written. If not : (╯°□°)╯︵ ┻━┻ */ while (xe_oa_circ_diff(stream, tail, stream->oa_buffer.tail) >= report_size) { - void *report = stream->oa_buffer.vaddr + tail; - - if (oa_report_id(stream, report) || oa_timestamp(stream, report)) + if (oa_report_id(stream, tail) || oa_timestamp(stream, tail)) break; tail = xe_oa_circ_diff(stream, tail, report_size); @@ -311,30 +317,35 @@ static enum hrtimer_restart xe_oa_poll_check_timer_cb(struct hrtimer *hrtimer) return HRTIMER_RESTART; } +static unsigned long +xe_oa_copy_to_user(struct xe_oa_stream *stream, void __user *dst, u32 report_offset, u32 len) +{ + xe_map_memcpy_from(stream->oa->xe, stream->oa_buffer.bounce, + &stream->oa_buffer.bo->vmap, report_offset, len); + return copy_to_user(dst, stream->oa_buffer.bounce, len); +} + static int xe_oa_append_report(struct xe_oa_stream *stream, char __user *buf, - size_t count, size_t *offset, const u8 *report) + size_t count, size_t *offset, u32 report_offset) { int report_size = stream->oa_buffer.format->size; int report_size_partial; - u8 *oa_buf_end; if ((count - *offset) < report_size) return -ENOSPC; buf += *offset; - oa_buf_end = stream->oa_buffer.vaddr + stream->oa_buffer.circ_size; - report_size_partial = oa_buf_end - report; + report_size_partial = stream->oa_buffer.circ_size - report_offset; if (report_size_partial < report_size) { - if (copy_to_user(buf, report, report_size_partial)) + if (xe_oa_copy_to_user(stream, buf, report_offset, report_size_partial)) return -EFAULT; buf += report_size_partial; - if (copy_to_user(buf, stream->oa_buffer.vaddr, - report_size - report_size_partial)) + if (xe_oa_copy_to_user(stream, buf, 0, report_size - report_size_partial)) return -EFAULT; - } else if (copy_to_user(buf, report, report_size)) { + } else if (xe_oa_copy_to_user(stream, buf, report_offset, report_size)) { return -EFAULT; } @@ -347,7 +358,6 @@ static int xe_oa_append_reports(struct xe_oa_stream *stream, char __user *buf, size_t count, size_t *offset) { int report_size = stream->oa_buffer.format->size; - u8 *oa_buf_base = stream->oa_buffer.vaddr; u32 gtt_offset = xe_bo_ggtt_addr(stream->oa_buffer.bo); size_t start_offset = *offset; unsigned long flags; @@ -364,26 +374,24 @@ static int xe_oa_append_reports(struct xe_oa_stream *stream, char __user *buf, for (; xe_oa_circ_diff(stream, tail, head); head = xe_oa_circ_incr(stream, head, report_size)) { - u8 *report = oa_buf_base + head; - - ret = xe_oa_append_report(stream, buf, count, offset, report); + ret = xe_oa_append_report(stream, buf, count, offset, head); if (ret) break; if (!(stream->oa_buffer.circ_size % report_size)) { /* Clear out report id and timestamp to detect unlanded reports */ - oa_report_id_clear(stream, (void *)report); - oa_timestamp_clear(stream, (void *)report); + oa_report_id_clear(stream, head); + oa_timestamp_clear(stream, head); } else { - u8 *oa_buf_end = stream->oa_buffer.vaddr + stream->oa_buffer.circ_size; - u32 part = oa_buf_end - report; + struct iosys_map *map = &stream->oa_buffer.bo->vmap; + u32 part = stream->oa_buffer.circ_size - head; /* Zero out the entire report */ if (report_size <= part) { - memset(report, 0, report_size); + xe_map_memset(stream->oa->xe, map, head, 0, report_size); } else { - memset(report, 0, part); - memset(oa_buf_base, 0, report_size - part); + xe_map_memset(stream->oa->xe, map, head, 0, part); + xe_map_memset(stream->oa->xe, map, 0, 0, report_size - part); } } } @@ -436,7 +444,8 @@ static void xe_oa_init_oa_buffer(struct xe_oa_stream *stream) spin_unlock_irqrestore(&stream->oa_buffer.ptr_lock, flags); /* Zero out the OA buffer since we rely on zero report id and timestamp fields */ - memset(stream->oa_buffer.vaddr, 0, xe_bo_size(stream->oa_buffer.bo)); + xe_map_memset(stream->oa->xe, &stream->oa_buffer.bo->vmap, 0, 0, + xe_bo_size(stream->oa_buffer.bo)); } static u32 __format_to_oactrl(const struct xe_oa_format *format, int counter_sel_mask) @@ -699,6 +708,7 @@ static int num_lri_dwords(int num_regs) static void xe_oa_free_oa_buffer(struct xe_oa_stream *stream) { xe_bo_unpin_map_no_vm(stream->oa_buffer.bo); + kfree(stream->oa_buffer.bounce); } static void xe_oa_free_configs(struct xe_oa_stream *stream) @@ -889,9 +899,16 @@ static int xe_oa_alloc_oa_buffer(struct xe_oa_stream *stream, size_t size) return PTR_ERR(bo); stream->oa_buffer.bo = bo; + /* mmap implementation requires OA buffer to be in system memory */ xe_assert(stream->oa->xe, bo->vmap.is_iomem == 0); - stream->oa_buffer.vaddr = bo->vmap.vaddr; + + stream->oa_buffer.bounce = kmalloc(stream->oa_buffer.format->size, GFP_KERNEL); + if (!stream->oa_buffer.bounce) { + xe_bo_unpin_map_no_vm(stream->oa_buffer.bo); + return -ENOMEM; + } + return 0; } diff --git a/drivers/gpu/drm/xe/xe_oa_types.h b/drivers/gpu/drm/xe/xe_oa_types.h index 8906c3084b5f8..3d9ec8490899c 100644 --- a/drivers/gpu/drm/xe/xe_oa_types.h +++ b/drivers/gpu/drm/xe/xe_oa_types.h @@ -164,8 +164,8 @@ struct xe_oa_buffer { /** @bo: xe_bo backing the OA buffer */ struct xe_bo *bo; - /** @vaddr: mapped vaddr of the OA buffer */ - u8 *vaddr; + /** @bounce: bounce buffer used with xe_map layer */ + void *bounce; /** @ptr_lock: Lock protecting reads/writes to head/tail pointers */ spinlock_t ptr_lock; -- 2.54.0 ^ permalink raw reply related [flat|nested] 16+ messages in thread
* Re: [PATCH 1/3] drm/xe/oa: Use xe_map layer 2026-04-27 19:02 ` [PATCH 1/3] drm/xe/oa: Use xe_map layer Ashutosh Dixit @ 2026-04-27 19:34 ` Umesh Nerlige Ramappa 2026-04-27 20:30 ` Dixit, Ashutosh 0 siblings, 1 reply; 16+ messages in thread From: Umesh Nerlige Ramappa @ 2026-04-27 19:34 UTC (permalink / raw) To: Ashutosh Dixit; +Cc: intel-xe On Mon, Apr 27, 2026 at 12:02:21PM -0700, Ashutosh Dixit wrote: >OA code should have used xe_map layer to begin with. In CRI, the OA buffer >can be located both in system and device memory. For these reasons, move OA >code to use the xe_map layer when accessing the OA buffer. > >v2: Use xe_map layer and put_user in xe_oa_copy_to_user (Umesh) >v3: To avoid performance impact in v2, revert to v1 but move > xe_map_copy_to_user() to xe_map.h >v4: Use bounce buffer and copy_to_user in xe_oa_copy_to_user >v5: Fix offsets in oa_timestamp() and oa_timestamp_clear() (Umesh) >v6: Rename head/tail in helper args to report_offset (Umesh) Thanks for renaming. My comments are in the v5 version of the series. With an assert/check in the xe_oa_copy_to_user(), this should be Reviewed-by: Umesh Nerlige Ramappa <umesh.nerlige.ramappa@intel.com> Umesh > >Signed-off-by: Ashutosh Dixit <ashutosh.dixit@intel.com> >--- > drivers/gpu/drm/xe/xe_oa.c | 95 +++++++++++++++++++------------- > drivers/gpu/drm/xe/xe_oa_types.h | 4 +- > 2 files changed, 58 insertions(+), 41 deletions(-) > >diff --git a/drivers/gpu/drm/xe/xe_oa.c b/drivers/gpu/drm/xe/xe_oa.c >index 6337e671c97ae..1fff0e8e9e78e 100644 >--- a/drivers/gpu/drm/xe/xe_oa.c >+++ b/drivers/gpu/drm/xe/xe_oa.c >@@ -213,32 +213,40 @@ static u32 xe_oa_hw_tail_read(struct xe_oa_stream *stream) > #define oa_report_header_64bit(__s) \ > ((__s)->oa_buffer.format->header == HDR_64_BIT) > >-static u64 oa_report_id(struct xe_oa_stream *stream, void *report) >+static u64 oa_report_id(struct xe_oa_stream *stream, u32 report_offset) > { >- return oa_report_header_64bit(stream) ? *(u64 *)report : *(u32 *)report; >+ struct iosys_map *map = &stream->oa_buffer.bo->vmap; >+ >+ return oa_report_header_64bit(stream) ? >+ xe_map_rd(stream->oa->xe, map, report_offset, u64) : >+ xe_map_rd(stream->oa->xe, map, report_offset, u32); > } > >-static void oa_report_id_clear(struct xe_oa_stream *stream, u32 *report) >+static void oa_report_id_clear(struct xe_oa_stream *stream, u32 report_offset) > { >- if (oa_report_header_64bit(stream)) >- *(u64 *)report = 0; >- else >- *report = 0; >+ struct iosys_map *map = &stream->oa_buffer.bo->vmap; >+ >+ oa_report_header_64bit(stream) ? >+ xe_map_wr(stream->oa->xe, map, report_offset, u64, 0) : >+ xe_map_wr(stream->oa->xe, map, report_offset, u32, 0); > } > >-static u64 oa_timestamp(struct xe_oa_stream *stream, void *report) >+static u64 oa_timestamp(struct xe_oa_stream *stream, u32 report_offset) > { >+ struct iosys_map *map = &stream->oa_buffer.bo->vmap; >+ > return oa_report_header_64bit(stream) ? >- *((u64 *)report + 1) : >- *((u32 *)report + 1); >+ xe_map_rd(stream->oa->xe, map, report_offset + 8, u64) : >+ xe_map_rd(stream->oa->xe, map, report_offset + 4, u32); > } > >-static void oa_timestamp_clear(struct xe_oa_stream *stream, u32 *report) >+static void oa_timestamp_clear(struct xe_oa_stream *stream, u32 report_offset) > { >- if (oa_report_header_64bit(stream)) >- *(u64 *)&report[2] = 0; >- else >- report[1] = 0; >+ struct iosys_map *map = &stream->oa_buffer.bo->vmap; >+ >+ oa_report_header_64bit(stream) ? >+ xe_map_wr(stream->oa->xe, map, report_offset + 8, u64, 0) : >+ xe_map_wr(stream->oa->xe, map, report_offset + 4, u32, 0); > } > > static bool xe_oa_buffer_check_unlocked(struct xe_oa_stream *stream) >@@ -275,9 +283,7 @@ static bool xe_oa_buffer_check_unlocked(struct xe_oa_stream *stream) > * they were written. If not : (╯°□°)╯︵ ┻━┻ > */ > while (xe_oa_circ_diff(stream, tail, stream->oa_buffer.tail) >= report_size) { >- void *report = stream->oa_buffer.vaddr + tail; >- >- if (oa_report_id(stream, report) || oa_timestamp(stream, report)) >+ if (oa_report_id(stream, tail) || oa_timestamp(stream, tail)) > break; > > tail = xe_oa_circ_diff(stream, tail, report_size); >@@ -311,30 +317,35 @@ static enum hrtimer_restart xe_oa_poll_check_timer_cb(struct hrtimer *hrtimer) > return HRTIMER_RESTART; > } > >+static unsigned long >+xe_oa_copy_to_user(struct xe_oa_stream *stream, void __user *dst, u32 report_offset, u32 len) >+{ >+ xe_map_memcpy_from(stream->oa->xe, stream->oa_buffer.bounce, >+ &stream->oa_buffer.bo->vmap, report_offset, len); >+ return copy_to_user(dst, stream->oa_buffer.bounce, len); >+} >+ > static int xe_oa_append_report(struct xe_oa_stream *stream, char __user *buf, >- size_t count, size_t *offset, const u8 *report) >+ size_t count, size_t *offset, u32 report_offset) > { > int report_size = stream->oa_buffer.format->size; > int report_size_partial; >- u8 *oa_buf_end; > > if ((count - *offset) < report_size) > return -ENOSPC; > > buf += *offset; > >- oa_buf_end = stream->oa_buffer.vaddr + stream->oa_buffer.circ_size; >- report_size_partial = oa_buf_end - report; >+ report_size_partial = stream->oa_buffer.circ_size - report_offset; > > if (report_size_partial < report_size) { >- if (copy_to_user(buf, report, report_size_partial)) >+ if (xe_oa_copy_to_user(stream, buf, report_offset, report_size_partial)) > return -EFAULT; > buf += report_size_partial; > >- if (copy_to_user(buf, stream->oa_buffer.vaddr, >- report_size - report_size_partial)) >+ if (xe_oa_copy_to_user(stream, buf, 0, report_size - report_size_partial)) > return -EFAULT; >- } else if (copy_to_user(buf, report, report_size)) { >+ } else if (xe_oa_copy_to_user(stream, buf, report_offset, report_size)) { > return -EFAULT; > } > >@@ -347,7 +358,6 @@ static int xe_oa_append_reports(struct xe_oa_stream *stream, char __user *buf, > size_t count, size_t *offset) > { > int report_size = stream->oa_buffer.format->size; >- u8 *oa_buf_base = stream->oa_buffer.vaddr; > u32 gtt_offset = xe_bo_ggtt_addr(stream->oa_buffer.bo); > size_t start_offset = *offset; > unsigned long flags; >@@ -364,26 +374,24 @@ static int xe_oa_append_reports(struct xe_oa_stream *stream, char __user *buf, > > for (; xe_oa_circ_diff(stream, tail, head); > head = xe_oa_circ_incr(stream, head, report_size)) { >- u8 *report = oa_buf_base + head; >- >- ret = xe_oa_append_report(stream, buf, count, offset, report); >+ ret = xe_oa_append_report(stream, buf, count, offset, head); > if (ret) > break; > > if (!(stream->oa_buffer.circ_size % report_size)) { > /* Clear out report id and timestamp to detect unlanded reports */ >- oa_report_id_clear(stream, (void *)report); >- oa_timestamp_clear(stream, (void *)report); >+ oa_report_id_clear(stream, head); >+ oa_timestamp_clear(stream, head); > } else { >- u8 *oa_buf_end = stream->oa_buffer.vaddr + stream->oa_buffer.circ_size; >- u32 part = oa_buf_end - report; >+ struct iosys_map *map = &stream->oa_buffer.bo->vmap; >+ u32 part = stream->oa_buffer.circ_size - head; > > /* Zero out the entire report */ > if (report_size <= part) { >- memset(report, 0, report_size); >+ xe_map_memset(stream->oa->xe, map, head, 0, report_size); > } else { >- memset(report, 0, part); >- memset(oa_buf_base, 0, report_size - part); >+ xe_map_memset(stream->oa->xe, map, head, 0, part); >+ xe_map_memset(stream->oa->xe, map, 0, 0, report_size - part); > } > } > } >@@ -436,7 +444,8 @@ static void xe_oa_init_oa_buffer(struct xe_oa_stream *stream) > spin_unlock_irqrestore(&stream->oa_buffer.ptr_lock, flags); > > /* Zero out the OA buffer since we rely on zero report id and timestamp fields */ >- memset(stream->oa_buffer.vaddr, 0, xe_bo_size(stream->oa_buffer.bo)); >+ xe_map_memset(stream->oa->xe, &stream->oa_buffer.bo->vmap, 0, 0, >+ xe_bo_size(stream->oa_buffer.bo)); > } > > static u32 __format_to_oactrl(const struct xe_oa_format *format, int counter_sel_mask) >@@ -699,6 +708,7 @@ static int num_lri_dwords(int num_regs) > static void xe_oa_free_oa_buffer(struct xe_oa_stream *stream) > { > xe_bo_unpin_map_no_vm(stream->oa_buffer.bo); >+ kfree(stream->oa_buffer.bounce); > } > > static void xe_oa_free_configs(struct xe_oa_stream *stream) >@@ -889,9 +899,16 @@ static int xe_oa_alloc_oa_buffer(struct xe_oa_stream *stream, size_t size) > return PTR_ERR(bo); > > stream->oa_buffer.bo = bo; >+ > /* mmap implementation requires OA buffer to be in system memory */ > xe_assert(stream->oa->xe, bo->vmap.is_iomem == 0); >- stream->oa_buffer.vaddr = bo->vmap.vaddr; >+ >+ stream->oa_buffer.bounce = kmalloc(stream->oa_buffer.format->size, GFP_KERNEL); >+ if (!stream->oa_buffer.bounce) { >+ xe_bo_unpin_map_no_vm(stream->oa_buffer.bo); >+ return -ENOMEM; >+ } >+ > return 0; > } > >diff --git a/drivers/gpu/drm/xe/xe_oa_types.h b/drivers/gpu/drm/xe/xe_oa_types.h >index 8906c3084b5f8..3d9ec8490899c 100644 >--- a/drivers/gpu/drm/xe/xe_oa_types.h >+++ b/drivers/gpu/drm/xe/xe_oa_types.h >@@ -164,8 +164,8 @@ struct xe_oa_buffer { > /** @bo: xe_bo backing the OA buffer */ > struct xe_bo *bo; > >- /** @vaddr: mapped vaddr of the OA buffer */ >- u8 *vaddr; >+ /** @bounce: bounce buffer used with xe_map layer */ >+ void *bounce; > > /** @ptr_lock: Lock protecting reads/writes to head/tail pointers */ > spinlock_t ptr_lock; >-- >2.54.0 > ^ permalink raw reply [flat|nested] 16+ messages in thread
* Re: [PATCH 1/3] drm/xe/oa: Use xe_map layer 2026-04-27 19:34 ` Umesh Nerlige Ramappa @ 2026-04-27 20:30 ` Dixit, Ashutosh 0 siblings, 0 replies; 16+ messages in thread From: Dixit, Ashutosh @ 2026-04-27 20:30 UTC (permalink / raw) To: Umesh Nerlige Ramappa; +Cc: intel-xe On Mon, 27 Apr 2026 12:34:24 -0700, Umesh Nerlige Ramappa wrote: > > On Mon, Apr 27, 2026 at 12:02:21PM -0700, Ashutosh Dixit wrote: > > OA code should have used xe_map layer to begin with. In CRI, the OA buffer > > can be located both in system and device memory. For these reasons, move OA > > code to use the xe_map layer when accessing the OA buffer. > > > > v2: Use xe_map layer and put_user in xe_oa_copy_to_user (Umesh) > > v3: To avoid performance impact in v2, revert to v1 but move > > xe_map_copy_to_user() to xe_map.h > > v4: Use bounce buffer and copy_to_user in xe_oa_copy_to_user > > v5: Fix offsets in oa_timestamp() and oa_timestamp_clear() (Umesh) > > v6: Rename head/tail in helper args to report_offset (Umesh) > > Thanks for renaming. My comments are in the v5 version of the series. > > With an assert/check in the xe_oa_copy_to_user(), this should be Add in v7. The only remaining comment is the one about the test, will make a note of it. > > Reviewed-by: Umesh Nerlige Ramappa <umesh.nerlige.ramappa@intel.com> Thanks. -- Ashutosh > > > > > Signed-off-by: Ashutosh Dixit <ashutosh.dixit@intel.com> > > --- > > drivers/gpu/drm/xe/xe_oa.c | 95 +++++++++++++++++++------------- > > drivers/gpu/drm/xe/xe_oa_types.h | 4 +- > > 2 files changed, 58 insertions(+), 41 deletions(-) > > > > diff --git a/drivers/gpu/drm/xe/xe_oa.c b/drivers/gpu/drm/xe/xe_oa.c > > index 6337e671c97ae..1fff0e8e9e78e 100644 > > --- a/drivers/gpu/drm/xe/xe_oa.c > > +++ b/drivers/gpu/drm/xe/xe_oa.c > > @@ -213,32 +213,40 @@ static u32 xe_oa_hw_tail_read(struct xe_oa_stream *stream) > > #define oa_report_header_64bit(__s) \ > > ((__s)->oa_buffer.format->header == HDR_64_BIT) > > > > -static u64 oa_report_id(struct xe_oa_stream *stream, void *report) > > +static u64 oa_report_id(struct xe_oa_stream *stream, u32 report_offset) > > { > > - return oa_report_header_64bit(stream) ? *(u64 *)report : *(u32 *)report; > > + struct iosys_map *map = &stream->oa_buffer.bo->vmap; > > + > > + return oa_report_header_64bit(stream) ? > > + xe_map_rd(stream->oa->xe, map, report_offset, u64) : > > + xe_map_rd(stream->oa->xe, map, report_offset, u32); > > } > > > > -static void oa_report_id_clear(struct xe_oa_stream *stream, u32 *report) > > +static void oa_report_id_clear(struct xe_oa_stream *stream, u32 report_offset) > > { > > - if (oa_report_header_64bit(stream)) > > - *(u64 *)report = 0; > > - else > > - *report = 0; > > + struct iosys_map *map = &stream->oa_buffer.bo->vmap; > > + > > + oa_report_header_64bit(stream) ? > > + xe_map_wr(stream->oa->xe, map, report_offset, u64, 0) : > > + xe_map_wr(stream->oa->xe, map, report_offset, u32, 0); > > } > > > > -static u64 oa_timestamp(struct xe_oa_stream *stream, void *report) > > +static u64 oa_timestamp(struct xe_oa_stream *stream, u32 report_offset) > > { > > + struct iosys_map *map = &stream->oa_buffer.bo->vmap; > > + > > return oa_report_header_64bit(stream) ? > > - *((u64 *)report + 1) : > > - *((u32 *)report + 1); > > + xe_map_rd(stream->oa->xe, map, report_offset + 8, u64) : > > + xe_map_rd(stream->oa->xe, map, report_offset + 4, u32); > > } > > > > -static void oa_timestamp_clear(struct xe_oa_stream *stream, u32 *report) > > +static void oa_timestamp_clear(struct xe_oa_stream *stream, u32 report_offset) > > { > > - if (oa_report_header_64bit(stream)) > > - *(u64 *)&report[2] = 0; > > - else > > - report[1] = 0; > > + struct iosys_map *map = &stream->oa_buffer.bo->vmap; > > + > > + oa_report_header_64bit(stream) ? > > + xe_map_wr(stream->oa->xe, map, report_offset + 8, u64, 0) : > > + xe_map_wr(stream->oa->xe, map, report_offset + 4, u32, 0); > > } > > > > static bool xe_oa_buffer_check_unlocked(struct xe_oa_stream *stream) > > @@ -275,9 +283,7 @@ static bool xe_oa_buffer_check_unlocked(struct xe_oa_stream *stream) > > * they were written. If not : (╯°□°)╯︵ ┻━┻ > > */ > > while (xe_oa_circ_diff(stream, tail, stream->oa_buffer.tail) >= report_size) { > > - void *report = stream->oa_buffer.vaddr + tail; > > - > > - if (oa_report_id(stream, report) || oa_timestamp(stream, report)) > > + if (oa_report_id(stream, tail) || oa_timestamp(stream, tail)) > > break; > > > > tail = xe_oa_circ_diff(stream, tail, report_size); > > @@ -311,30 +317,35 @@ static enum hrtimer_restart xe_oa_poll_check_timer_cb(struct hrtimer *hrtimer) > > return HRTIMER_RESTART; > > } > > > > +static unsigned long > > +xe_oa_copy_to_user(struct xe_oa_stream *stream, void __user *dst, u32 report_offset, u32 len) > > +{ > > + xe_map_memcpy_from(stream->oa->xe, stream->oa_buffer.bounce, > > + &stream->oa_buffer.bo->vmap, report_offset, len); > > + return copy_to_user(dst, stream->oa_buffer.bounce, len); > > +} > > + > > static int xe_oa_append_report(struct xe_oa_stream *stream, char __user *buf, > > - size_t count, size_t *offset, const u8 *report) > > + size_t count, size_t *offset, u32 report_offset) > > { > > int report_size = stream->oa_buffer.format->size; > > int report_size_partial; > > - u8 *oa_buf_end; > > > > if ((count - *offset) < report_size) > > return -ENOSPC; > > > > buf += *offset; > > > > - oa_buf_end = stream->oa_buffer.vaddr + stream->oa_buffer.circ_size; > > - report_size_partial = oa_buf_end - report; > > + report_size_partial = stream->oa_buffer.circ_size - report_offset; > > > > if (report_size_partial < report_size) { > > - if (copy_to_user(buf, report, report_size_partial)) > > + if (xe_oa_copy_to_user(stream, buf, report_offset, report_size_partial)) > > return -EFAULT; > > buf += report_size_partial; > > > > - if (copy_to_user(buf, stream->oa_buffer.vaddr, > > - report_size - report_size_partial)) > > + if (xe_oa_copy_to_user(stream, buf, 0, report_size - report_size_partial)) > > return -EFAULT; > > - } else if (copy_to_user(buf, report, report_size)) { > > + } else if (xe_oa_copy_to_user(stream, buf, report_offset, report_size)) { > > return -EFAULT; > > } > > > > @@ -347,7 +358,6 @@ static int xe_oa_append_reports(struct xe_oa_stream *stream, char __user *buf, > > size_t count, size_t *offset) > > { > > int report_size = stream->oa_buffer.format->size; > > - u8 *oa_buf_base = stream->oa_buffer.vaddr; > > u32 gtt_offset = xe_bo_ggtt_addr(stream->oa_buffer.bo); > > size_t start_offset = *offset; > > unsigned long flags; > > @@ -364,26 +374,24 @@ static int xe_oa_append_reports(struct xe_oa_stream *stream, char __user *buf, > > > > for (; xe_oa_circ_diff(stream, tail, head); > > head = xe_oa_circ_incr(stream, head, report_size)) { > > - u8 *report = oa_buf_base + head; > > - > > - ret = xe_oa_append_report(stream, buf, count, offset, report); > > + ret = xe_oa_append_report(stream, buf, count, offset, head); > > if (ret) > > break; > > > > if (!(stream->oa_buffer.circ_size % report_size)) { > > /* Clear out report id and timestamp to detect unlanded reports */ > > - oa_report_id_clear(stream, (void *)report); > > - oa_timestamp_clear(stream, (void *)report); > > + oa_report_id_clear(stream, head); > > + oa_timestamp_clear(stream, head); > > } else { > > - u8 *oa_buf_end = stream->oa_buffer.vaddr + stream->oa_buffer.circ_size; > > - u32 part = oa_buf_end - report; > > + struct iosys_map *map = &stream->oa_buffer.bo->vmap; > > + u32 part = stream->oa_buffer.circ_size - head; > > > > /* Zero out the entire report */ > > if (report_size <= part) { > > - memset(report, 0, report_size); > > + xe_map_memset(stream->oa->xe, map, head, 0, report_size); > > } else { > > - memset(report, 0, part); > > - memset(oa_buf_base, 0, report_size - part); > > + xe_map_memset(stream->oa->xe, map, head, 0, part); > > + xe_map_memset(stream->oa->xe, map, 0, 0, report_size - part); > > } > > } > > } > > @@ -436,7 +444,8 @@ static void xe_oa_init_oa_buffer(struct xe_oa_stream *stream) > > spin_unlock_irqrestore(&stream->oa_buffer.ptr_lock, flags); > > > > /* Zero out the OA buffer since we rely on zero report id and timestamp fields */ > > - memset(stream->oa_buffer.vaddr, 0, xe_bo_size(stream->oa_buffer.bo)); > > + xe_map_memset(stream->oa->xe, &stream->oa_buffer.bo->vmap, 0, 0, > > + xe_bo_size(stream->oa_buffer.bo)); > > } > > > > static u32 __format_to_oactrl(const struct xe_oa_format *format, int counter_sel_mask) > > @@ -699,6 +708,7 @@ static int num_lri_dwords(int num_regs) > > static void xe_oa_free_oa_buffer(struct xe_oa_stream *stream) > > { > > xe_bo_unpin_map_no_vm(stream->oa_buffer.bo); > > + kfree(stream->oa_buffer.bounce); > > } > > > > static void xe_oa_free_configs(struct xe_oa_stream *stream) > > @@ -889,9 +899,16 @@ static int xe_oa_alloc_oa_buffer(struct xe_oa_stream *stream, size_t size) > > return PTR_ERR(bo); > > > > stream->oa_buffer.bo = bo; > > + > > /* mmap implementation requires OA buffer to be in system memory */ > > xe_assert(stream->oa->xe, bo->vmap.is_iomem == 0); > > - stream->oa_buffer.vaddr = bo->vmap.vaddr; > > + > > + stream->oa_buffer.bounce = kmalloc(stream->oa_buffer.format->size, GFP_KERNEL); > > + if (!stream->oa_buffer.bounce) { > > + xe_bo_unpin_map_no_vm(stream->oa_buffer.bo); > > + return -ENOMEM; > > + } > > + > > return 0; > > } > > > > diff --git a/drivers/gpu/drm/xe/xe_oa_types.h b/drivers/gpu/drm/xe/xe_oa_types.h > > index 8906c3084b5f8..3d9ec8490899c 100644 > > --- a/drivers/gpu/drm/xe/xe_oa_types.h > > +++ b/drivers/gpu/drm/xe/xe_oa_types.h > > @@ -164,8 +164,8 @@ struct xe_oa_buffer { > > /** @bo: xe_bo backing the OA buffer */ > > struct xe_bo *bo; > > > > - /** @vaddr: mapped vaddr of the OA buffer */ > > - u8 *vaddr; > > + /** @bounce: bounce buffer used with xe_map layer */ > > + void *bounce; > > > > /** @ptr_lock: Lock protecting reads/writes to head/tail pointers */ > > spinlock_t ptr_lock; > > -- > > 2.54.0 > > ^ permalink raw reply [flat|nested] 16+ messages in thread
* [PATCH 2/3] drm/xe/oa: Use drm_gem_mmap_obj for OA buffer mmap 2026-04-27 19:02 [PATCH v6 0/3] drm/xe/oa: Wa_14026633728 Ashutosh Dixit 2026-04-27 19:02 ` [PATCH 1/3] drm/xe/oa: Use xe_map layer Ashutosh Dixit @ 2026-04-27 19:02 ` Ashutosh Dixit 2026-04-27 19:02 ` [PATCH 3/3] drm/xe/oa: Implement Wa_14026633728 Ashutosh Dixit 2 siblings, 0 replies; 16+ messages in thread From: Ashutosh Dixit @ 2026-04-27 19:02 UTC (permalink / raw) To: intel-xe; +Cc: Umesh Nerlige Ramappa OA buffer mmap can currently only mmap OA buffer in system memory. CRI MERTOA buffer can be located in device memory. Switch OA buffer mmap to using drm_gem_mmap_obj, which can handle mmap's of both system and device memory buffers. Signed-off-by: Ashutosh Dixit <ashutosh.dixit@intel.com> Reviewed-by: Umesh Nerlige Ramappa <umesh.nerlige.ramappa@intel.com> --- drivers/gpu/drm/xe/xe_oa.c | 20 +++----------------- 1 file changed, 3 insertions(+), 17 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_oa.c b/drivers/gpu/drm/xe/xe_oa.c index 1fff0e8e9e78e..d3994ae8bc82d 100644 --- a/drivers/gpu/drm/xe/xe_oa.c +++ b/drivers/gpu/drm/xe/xe_oa.c @@ -9,6 +9,7 @@ #include <linux/poll.h> #include <drm/drm_drv.h> +#include <drm/drm_gem.h> #include <drm/drm_managed.h> #include <drm/drm_syncobj.h> #include <uapi/drm/xe_drm.h> @@ -900,9 +901,6 @@ static int xe_oa_alloc_oa_buffer(struct xe_oa_stream *stream, size_t size) stream->oa_buffer.bo = bo; - /* mmap implementation requires OA buffer to be in system memory */ - xe_assert(stream->oa->xe, bo->vmap.is_iomem == 0); - stream->oa_buffer.bounce = kmalloc(stream->oa_buffer.format->size, GFP_KERNEL); if (!stream->oa_buffer.bounce) { xe_bo_unpin_map_no_vm(stream->oa_buffer.bo); @@ -1690,8 +1688,6 @@ static int xe_oa_mmap(struct file *file, struct vm_area_struct *vma) { struct xe_oa_stream *stream = file->private_data; struct xe_bo *bo = stream->oa_buffer.bo; - unsigned long start = vma->vm_start; - int i, ret; if (xe_observation_paranoid && !perfmon_capable()) { drm_dbg(&stream->oa->xe->drm, "Insufficient privilege to map OA buffer\n"); @@ -1699,7 +1695,7 @@ static int xe_oa_mmap(struct file *file, struct vm_area_struct *vma) } /* Can mmap the entire OA buffer or nothing (no partial OA buffer mmaps) */ - if (vma->vm_end - vma->vm_start != xe_bo_size(stream->oa_buffer.bo)) { + if (vma->vm_end - vma->vm_start != xe_bo_size(bo)) { drm_dbg(&stream->oa->xe->drm, "Wrong mmap size, must be OA buffer size\n"); return -EINVAL; } @@ -1715,17 +1711,7 @@ static int xe_oa_mmap(struct file *file, struct vm_area_struct *vma) vm_flags_mod(vma, VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP | VM_DONTCOPY, VM_MAYWRITE | VM_MAYEXEC); - xe_assert(stream->oa->xe, bo->ttm.ttm->num_pages == vma_pages(vma)); - for (i = 0; i < bo->ttm.ttm->num_pages; i++) { - ret = remap_pfn_range(vma, start, page_to_pfn(bo->ttm.ttm->pages[i]), - PAGE_SIZE, vma->vm_page_prot); - if (ret) - break; - - start += PAGE_SIZE; - } - - return ret; + return drm_gem_mmap_obj(&bo->ttm.base, xe_bo_size(bo), vma); } static const struct file_operations xe_oa_fops = { -- 2.54.0 ^ permalink raw reply related [flat|nested] 16+ messages in thread
* [PATCH 3/3] drm/xe/oa: Implement Wa_14026633728 2026-04-27 19:02 [PATCH v6 0/3] drm/xe/oa: Wa_14026633728 Ashutosh Dixit 2026-04-27 19:02 ` [PATCH 1/3] drm/xe/oa: Use xe_map layer Ashutosh Dixit 2026-04-27 19:02 ` [PATCH 2/3] drm/xe/oa: Use drm_gem_mmap_obj for OA buffer mmap Ashutosh Dixit @ 2026-04-27 19:02 ` Ashutosh Dixit 2 siblings, 0 replies; 16+ messages in thread From: Ashutosh Dixit @ 2026-04-27 19:02 UTC (permalink / raw) To: intel-xe; +Cc: Umesh Nerlige Ramappa CRI Wa_14026633728 requires MERTOA buffer to be allocated in device memory, not system memory (which is the default for OA buffers). Signed-off-by: Ashutosh Dixit <ashutosh.dixit@intel.com> Reviewed-by: Umesh Nerlige Ramappa <umesh.nerlige.ramappa@intel.com> --- drivers/gpu/drm/xe/xe_oa.c | 10 +++++++++- drivers/gpu/drm/xe/xe_wa_oob.rules | 1 + 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/xe/xe_oa.c b/drivers/gpu/drm/xe/xe_oa.c index d3994ae8bc82d..b988b02c661c7 100644 --- a/drivers/gpu/drm/xe/xe_oa.c +++ b/drivers/gpu/drm/xe/xe_oa.c @@ -250,6 +250,11 @@ static void oa_timestamp_clear(struct xe_oa_stream *stream, u32 report_offset) xe_map_wr(stream->oa->xe, map, report_offset + 4, u32, 0); } +static bool mert_wa_14026633728(struct xe_oa_stream *s) +{ + return s->oa_unit->type == DRM_XE_OA_UNIT_TYPE_MERT && XE_GT_WA(s->gt, 14026633728); +} + static bool xe_oa_buffer_check_unlocked(struct xe_oa_stream *stream) { u32 gtt_offset = xe_bo_ggtt_addr(stream->oa_buffer.bo); @@ -891,11 +896,14 @@ static void xe_oa_stream_destroy(struct xe_oa_stream *stream) static int xe_oa_alloc_oa_buffer(struct xe_oa_stream *stream, size_t size) { + u32 vram = mert_wa_14026633728(stream) ? + XE_BO_FLAG_VRAM_IF_DGFX(xe_device_get_root_tile(stream->oa->xe)) : + XE_BO_FLAG_SYSTEM; struct xe_bo *bo; bo = xe_bo_create_pin_map_novm(stream->oa->xe, stream->gt->tile, size, ttm_bo_type_kernel, - XE_BO_FLAG_SYSTEM | XE_BO_FLAG_GGTT, false); + vram | XE_BO_FLAG_GGTT, false); if (IS_ERR(bo)) return PTR_ERR(bo); diff --git a/drivers/gpu/drm/xe/xe_wa_oob.rules b/drivers/gpu/drm/xe/xe_wa_oob.rules index f8a185103b805..a7c1bd9bcb943 100644 --- a/drivers/gpu/drm/xe/xe_wa_oob.rules +++ b/drivers/gpu/drm/xe/xe_wa_oob.rules @@ -65,3 +65,4 @@ 14025883347 MEDIA_VERSION_RANGE(1301, 3503) GRAPHICS_VERSION_RANGE(2004, 3005) +14026633728 PLATFORM(CRESCENTISLAND) -- 2.54.0 ^ permalink raw reply related [flat|nested] 16+ messages in thread
* [PATCH v8 0/3] drm/xe/oa: Wa_14026633728 @ 2026-04-27 22:11 Ashutosh Dixit 2026-04-27 22:11 ` [PATCH 3/3] drm/xe/oa: Implement Wa_14026633728 Ashutosh Dixit 0 siblings, 1 reply; 16+ messages in thread From: Ashutosh Dixit @ 2026-04-27 22:11 UTC (permalink / raw) To: intel-xe v7 though v2: Change to Patch 1 (see changelog in Patch 1) v8: Change to Patch 3 (see changelog in Patch 3) Ashutosh Dixit (3): drm/xe/oa: Use xe_map layer drm/xe/oa: Use drm_gem_mmap_obj for OA buffer mmap drm/xe/oa: Implement Wa_14026633728 drivers/gpu/drm/xe/xe_device_wa_oob.rules | 1 + drivers/gpu/drm/xe/xe_oa.c | 126 ++++++++++++---------- drivers/gpu/drm/xe/xe_oa_types.h | 4 +- 3 files changed, 73 insertions(+), 58 deletions(-) -- 2.54.0 ^ permalink raw reply [flat|nested] 16+ messages in thread
* [PATCH 3/3] drm/xe/oa: Implement Wa_14026633728 2026-04-27 22:11 [PATCH v8 0/3] drm/xe/oa: Wa_14026633728 Ashutosh Dixit @ 2026-04-27 22:11 ` Ashutosh Dixit 0 siblings, 0 replies; 16+ messages in thread From: Ashutosh Dixit @ 2026-04-27 22:11 UTC (permalink / raw) To: intel-xe CRI Wa_14026633728 requires MERTOA buffer to be allocated in device memory, not system memory (which is the default for OA buffers). v2: Move WA to xe_device_wa_oob.rules and use XE_DEVICE_WA() (Matt Roper) Signed-off-by: Ashutosh Dixit <ashutosh.dixit@intel.com> Reviewed-by: Umesh Nerlige Ramappa <umesh.nerlige.ramappa@intel.com> --- drivers/gpu/drm/xe/xe_device_wa_oob.rules | 1 + drivers/gpu/drm/xe/xe_oa.c | 11 ++++++++++- 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/xe/xe_device_wa_oob.rules b/drivers/gpu/drm/xe/xe_device_wa_oob.rules index d129cddb6ead4..92371c4905290 100644 --- a/drivers/gpu/drm/xe/xe_device_wa_oob.rules +++ b/drivers/gpu/drm/xe/xe_device_wa_oob.rules @@ -4,3 +4,4 @@ 22019338487_display PLATFORM(LUNARLAKE) 14022085890 SUBPLATFORM(BATTLEMAGE, G21) 14026539277 PLATFORM(NOVALAKE_P), PLATFORM_STEP(A0, B0) +14026633728 PLATFORM(CRESCENTISLAND) diff --git a/drivers/gpu/drm/xe/xe_oa.c b/drivers/gpu/drm/xe/xe_oa.c index 83797c9479f7c..5de5bf19240a2 100644 --- a/drivers/gpu/drm/xe/xe_oa.c +++ b/drivers/gpu/drm/xe/xe_oa.c @@ -15,6 +15,7 @@ #include <uapi/drm/xe_drm.h> #include <generated/xe_wa_oob.h> +#include <generated/xe_device_wa_oob.h> #include "abi/guc_actions_slpc_abi.h" #include "instructions/xe_mi_commands.h" @@ -250,6 +251,11 @@ static void oa_timestamp_clear(struct xe_oa_stream *stream, u32 report_offset) xe_map_wr(stream->oa->xe, map, report_offset + 4, u32, 0); } +static bool mert_wa_14026633728(struct xe_oa_stream *s) +{ + return s->oa_unit->type == DRM_XE_OA_UNIT_TYPE_MERT && XE_DEVICE_WA(s->oa->xe, 14026633728); +} + static bool xe_oa_buffer_check_unlocked(struct xe_oa_stream *stream) { u32 gtt_offset = xe_bo_ggtt_addr(stream->oa_buffer.bo); @@ -893,11 +899,14 @@ static void xe_oa_stream_destroy(struct xe_oa_stream *stream) static int xe_oa_alloc_oa_buffer(struct xe_oa_stream *stream, size_t size) { + u32 vram = mert_wa_14026633728(stream) ? + XE_BO_FLAG_VRAM_IF_DGFX(xe_device_get_root_tile(stream->oa->xe)) : + XE_BO_FLAG_SYSTEM; struct xe_bo *bo; bo = xe_bo_create_pin_map_novm(stream->oa->xe, stream->gt->tile, size, ttm_bo_type_kernel, - XE_BO_FLAG_SYSTEM | XE_BO_FLAG_GGTT, false); + vram | XE_BO_FLAG_GGTT, false); if (IS_ERR(bo)) return PTR_ERR(bo); -- 2.54.0 ^ permalink raw reply related [flat|nested] 16+ messages in thread
* [PATCH v7 0/3] drm/xe/oa: Wa_14026633728 @ 2026-04-27 20:26 Ashutosh Dixit 2026-04-27 20:26 ` [PATCH 3/3] drm/xe/oa: Implement Wa_14026633728 Ashutosh Dixit 0 siblings, 1 reply; 16+ messages in thread From: Ashutosh Dixit @ 2026-04-27 20:26 UTC (permalink / raw) To: intel-xe; +Cc: Umesh Nerlige Ramappa v7 though v2: Change to Patch 1 (see changelog in Patch 1) Ashutosh Dixit (3): drm/xe/oa: Use xe_map layer drm/xe/oa: Use drm_gem_mmap_obj for OA buffer mmap drm/xe/oa: Implement Wa_14026633728 drivers/gpu/drm/xe/xe_oa.c | 125 ++++++++++++++++------------- drivers/gpu/drm/xe/xe_oa_types.h | 4 +- drivers/gpu/drm/xe/xe_wa_oob.rules | 1 + 3 files changed, 72 insertions(+), 58 deletions(-) -- 2.54.0 ^ permalink raw reply [flat|nested] 16+ messages in thread
* [PATCH 3/3] drm/xe/oa: Implement Wa_14026633728 2026-04-27 20:26 [PATCH v7 0/3] drm/xe/oa: Wa_14026633728 Ashutosh Dixit @ 2026-04-27 20:26 ` Ashutosh Dixit 2026-04-27 20:52 ` Matt Roper 0 siblings, 1 reply; 16+ messages in thread From: Ashutosh Dixit @ 2026-04-27 20:26 UTC (permalink / raw) To: intel-xe; +Cc: Umesh Nerlige Ramappa CRI Wa_14026633728 requires MERTOA buffer to be allocated in device memory, not system memory (which is the default for OA buffers). Signed-off-by: Ashutosh Dixit <ashutosh.dixit@intel.com> Reviewed-by: Umesh Nerlige Ramappa <umesh.nerlige.ramappa@intel.com> --- drivers/gpu/drm/xe/xe_oa.c | 10 +++++++++- drivers/gpu/drm/xe/xe_wa_oob.rules | 1 + 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/xe/xe_oa.c b/drivers/gpu/drm/xe/xe_oa.c index 83797c9479f7c..d905302f3f151 100644 --- a/drivers/gpu/drm/xe/xe_oa.c +++ b/drivers/gpu/drm/xe/xe_oa.c @@ -250,6 +250,11 @@ static void oa_timestamp_clear(struct xe_oa_stream *stream, u32 report_offset) xe_map_wr(stream->oa->xe, map, report_offset + 4, u32, 0); } +static bool mert_wa_14026633728(struct xe_oa_stream *s) +{ + return s->oa_unit->type == DRM_XE_OA_UNIT_TYPE_MERT && XE_GT_WA(s->gt, 14026633728); +} + static bool xe_oa_buffer_check_unlocked(struct xe_oa_stream *stream) { u32 gtt_offset = xe_bo_ggtt_addr(stream->oa_buffer.bo); @@ -893,11 +898,14 @@ static void xe_oa_stream_destroy(struct xe_oa_stream *stream) static int xe_oa_alloc_oa_buffer(struct xe_oa_stream *stream, size_t size) { + u32 vram = mert_wa_14026633728(stream) ? + XE_BO_FLAG_VRAM_IF_DGFX(xe_device_get_root_tile(stream->oa->xe)) : + XE_BO_FLAG_SYSTEM; struct xe_bo *bo; bo = xe_bo_create_pin_map_novm(stream->oa->xe, stream->gt->tile, size, ttm_bo_type_kernel, - XE_BO_FLAG_SYSTEM | XE_BO_FLAG_GGTT, false); + vram | XE_BO_FLAG_GGTT, false); if (IS_ERR(bo)) return PTR_ERR(bo); diff --git a/drivers/gpu/drm/xe/xe_wa_oob.rules b/drivers/gpu/drm/xe/xe_wa_oob.rules index f8a185103b805..a7c1bd9bcb943 100644 --- a/drivers/gpu/drm/xe/xe_wa_oob.rules +++ b/drivers/gpu/drm/xe/xe_wa_oob.rules @@ -65,3 +65,4 @@ 14025883347 MEDIA_VERSION_RANGE(1301, 3503) GRAPHICS_VERSION_RANGE(2004, 3005) +14026633728 PLATFORM(CRESCENTISLAND) -- 2.54.0 ^ permalink raw reply related [flat|nested] 16+ messages in thread
* Re: [PATCH 3/3] drm/xe/oa: Implement Wa_14026633728 2026-04-27 20:26 ` [PATCH 3/3] drm/xe/oa: Implement Wa_14026633728 Ashutosh Dixit @ 2026-04-27 20:52 ` Matt Roper 2026-04-27 22:13 ` Dixit, Ashutosh 0 siblings, 1 reply; 16+ messages in thread From: Matt Roper @ 2026-04-27 20:52 UTC (permalink / raw) To: Ashutosh Dixit; +Cc: intel-xe, Umesh Nerlige Ramappa On Mon, Apr 27, 2026 at 01:26:55PM -0700, Ashutosh Dixit wrote: > CRI Wa_14026633728 requires MERTOA buffer to be allocated in device memory, > not system memory (which is the default for OA buffers). > > Signed-off-by: Ashutosh Dixit <ashutosh.dixit@intel.com> > Reviewed-by: Umesh Nerlige Ramappa <umesh.nerlige.ramappa@intel.com> > --- > drivers/gpu/drm/xe/xe_oa.c | 10 +++++++++- > drivers/gpu/drm/xe/xe_wa_oob.rules | 1 + > 2 files changed, 10 insertions(+), 1 deletion(-) > > diff --git a/drivers/gpu/drm/xe/xe_oa.c b/drivers/gpu/drm/xe/xe_oa.c > index 83797c9479f7c..d905302f3f151 100644 > --- a/drivers/gpu/drm/xe/xe_oa.c > +++ b/drivers/gpu/drm/xe/xe_oa.c > @@ -250,6 +250,11 @@ static void oa_timestamp_clear(struct xe_oa_stream *stream, u32 report_offset) > xe_map_wr(stream->oa->xe, map, report_offset + 4, u32, 0); > } > > +static bool mert_wa_14026633728(struct xe_oa_stream *s) > +{ > + return s->oa_unit->type == DRM_XE_OA_UNIT_TYPE_MERT && XE_GT_WA(s->gt, 14026633728); > +} > + > static bool xe_oa_buffer_check_unlocked(struct xe_oa_stream *stream) > { > u32 gtt_offset = xe_bo_ggtt_addr(stream->oa_buffer.bo); > @@ -893,11 +898,14 @@ static void xe_oa_stream_destroy(struct xe_oa_stream *stream) > > static int xe_oa_alloc_oa_buffer(struct xe_oa_stream *stream, size_t size) > { > + u32 vram = mert_wa_14026633728(stream) ? > + XE_BO_FLAG_VRAM_IF_DGFX(xe_device_get_root_tile(stream->oa->xe)) : > + XE_BO_FLAG_SYSTEM; > struct xe_bo *bo; > > bo = xe_bo_create_pin_map_novm(stream->oa->xe, stream->gt->tile, > size, ttm_bo_type_kernel, > - XE_BO_FLAG_SYSTEM | XE_BO_FLAG_GGTT, false); > + vram | XE_BO_FLAG_GGTT, false); > if (IS_ERR(bo)) > return PTR_ERR(bo); > > diff --git a/drivers/gpu/drm/xe/xe_wa_oob.rules b/drivers/gpu/drm/xe/xe_wa_oob.rules > index f8a185103b805..a7c1bd9bcb943 100644 > --- a/drivers/gpu/drm/xe/xe_wa_oob.rules > +++ b/drivers/gpu/drm/xe/xe_wa_oob.rules > @@ -65,3 +65,4 @@ > > 14025883347 MEDIA_VERSION_RANGE(1301, 3503) > GRAPHICS_VERSION_RANGE(2004, 3005) > +14026633728 PLATFORM(CRESCENTISLAND) Is the underlying defect here tied to the graphics IP or the platform/SoC? If graphics IP, this should key off the graphics version instead of platform). If platform, we should treat it as a device workaround rather than a GT workaround. I.e., definition in xe_device_wa_oob.rules and use XE_DEVICE_WA() at the check sites. Matt > -- > 2.54.0 > -- Matt Roper Graphics Software Engineer Linux GPU Platform Enablement Intel Corporation ^ permalink raw reply [flat|nested] 16+ messages in thread
* Re: [PATCH 3/3] drm/xe/oa: Implement Wa_14026633728 2026-04-27 20:52 ` Matt Roper @ 2026-04-27 22:13 ` Dixit, Ashutosh 0 siblings, 0 replies; 16+ messages in thread From: Dixit, Ashutosh @ 2026-04-27 22:13 UTC (permalink / raw) To: Matt Roper; +Cc: intel-xe, Umesh Nerlige Ramappa On Mon, 27 Apr 2026 13:52:41 -0700, Matt Roper wrote: > Hi Matt, > On Mon, Apr 27, 2026 at 01:26:55PM -0700, Ashutosh Dixit wrote: > > CRI Wa_14026633728 requires MERTOA buffer to be allocated in device memory, > > not system memory (which is the default for OA buffers). > > > > Signed-off-by: Ashutosh Dixit <ashutosh.dixit@intel.com> > > Reviewed-by: Umesh Nerlige Ramappa <umesh.nerlige.ramappa@intel.com> > > --- > > drivers/gpu/drm/xe/xe_oa.c | 10 +++++++++- > > drivers/gpu/drm/xe/xe_wa_oob.rules | 1 + > > 2 files changed, 10 insertions(+), 1 deletion(-) > > > > diff --git a/drivers/gpu/drm/xe/xe_oa.c b/drivers/gpu/drm/xe/xe_oa.c > > index 83797c9479f7c..d905302f3f151 100644 > > --- a/drivers/gpu/drm/xe/xe_oa.c > > +++ b/drivers/gpu/drm/xe/xe_oa.c > > @@ -250,6 +250,11 @@ static void oa_timestamp_clear(struct xe_oa_stream *stream, u32 report_offset) > > xe_map_wr(stream->oa->xe, map, report_offset + 4, u32, 0); > > } > > > > +static bool mert_wa_14026633728(struct xe_oa_stream *s) > > +{ > > + return s->oa_unit->type == DRM_XE_OA_UNIT_TYPE_MERT && XE_GT_WA(s->gt, 14026633728); > > +} > > + > > static bool xe_oa_buffer_check_unlocked(struct xe_oa_stream *stream) > > { > > u32 gtt_offset = xe_bo_ggtt_addr(stream->oa_buffer.bo); > > @@ -893,11 +898,14 @@ static void xe_oa_stream_destroy(struct xe_oa_stream *stream) > > > > static int xe_oa_alloc_oa_buffer(struct xe_oa_stream *stream, size_t size) > > { > > + u32 vram = mert_wa_14026633728(stream) ? > > + XE_BO_FLAG_VRAM_IF_DGFX(xe_device_get_root_tile(stream->oa->xe)) : > > + XE_BO_FLAG_SYSTEM; > > struct xe_bo *bo; > > > > bo = xe_bo_create_pin_map_novm(stream->oa->xe, stream->gt->tile, > > size, ttm_bo_type_kernel, > > - XE_BO_FLAG_SYSTEM | XE_BO_FLAG_GGTT, false); > > + vram | XE_BO_FLAG_GGTT, false); > > if (IS_ERR(bo)) > > return PTR_ERR(bo); > > > > diff --git a/drivers/gpu/drm/xe/xe_wa_oob.rules b/drivers/gpu/drm/xe/xe_wa_oob.rules > > index f8a185103b805..a7c1bd9bcb943 100644 > > --- a/drivers/gpu/drm/xe/xe_wa_oob.rules > > +++ b/drivers/gpu/drm/xe/xe_wa_oob.rules > > @@ -65,3 +65,4 @@ > > > > 14025883347 MEDIA_VERSION_RANGE(1301, 3503) > > GRAPHICS_VERSION_RANGE(2004, 3005) > > +14026633728 PLATFORM(CRESCENTISLAND) > > Is the underlying defect here tied to the graphics IP or the > platform/SoC? If graphics IP, this should key off the graphics version > instead of platform). If platform, we should treat it as a device > workaround rather than a GT workaround. I.e., definition in > xe_device_wa_oob.rules and use XE_DEVICE_WA() at the check sites. Because the WA is in MERT I'd think it would be a device WA. So I've changed it to a device WA as suggested here in v8. Thanks. -- Ashutosh ^ permalink raw reply [flat|nested] 16+ messages in thread
* [PATCH v5 0/3] drm/xe/oa: Wa_14026633728 @ 2026-04-25 0:14 Ashutosh Dixit 2026-04-25 0:14 ` [PATCH 3/3] drm/xe/oa: Implement Wa_14026633728 Ashutosh Dixit 0 siblings, 1 reply; 16+ messages in thread From: Ashutosh Dixit @ 2026-04-25 0:14 UTC (permalink / raw) To: intel-xe; +Cc: Umesh Nerlige Ramappa v5 though v2: Change to Patch 1 (see changelog in Patch 1) Ashutosh Dixit (3): drm/xe/oa: Use xe_map layer drm/xe/oa: Use drm_gem_mmap_obj for OA buffer mmap drm/xe/oa: Implement Wa_14026633728 drivers/gpu/drm/xe/xe_oa.c | 123 ++++++++++++++++------------- drivers/gpu/drm/xe/xe_oa_types.h | 4 +- drivers/gpu/drm/xe/xe_wa_oob.rules | 1 + 3 files changed, 70 insertions(+), 58 deletions(-) -- 2.54.0 ^ permalink raw reply [flat|nested] 16+ messages in thread
* [PATCH 3/3] drm/xe/oa: Implement Wa_14026633728 2026-04-25 0:14 [PATCH v5 0/3] drm/xe/oa: Wa_14026633728 Ashutosh Dixit @ 2026-04-25 0:14 ` Ashutosh Dixit 0 siblings, 0 replies; 16+ messages in thread From: Ashutosh Dixit @ 2026-04-25 0:14 UTC (permalink / raw) To: intel-xe; +Cc: Umesh Nerlige Ramappa CRI Wa_14026633728 requires MERTOA buffer to be allocated in device memory, not system memory (which is the default for OA buffers). Signed-off-by: Ashutosh Dixit <ashutosh.dixit@intel.com> Reviewed-by: Umesh Nerlige Ramappa <umesh.nerlige.ramappa@intel.com> --- drivers/gpu/drm/xe/xe_oa.c | 10 +++++++++- drivers/gpu/drm/xe/xe_wa_oob.rules | 1 + 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/xe/xe_oa.c b/drivers/gpu/drm/xe/xe_oa.c index 5ccf63c807920..2a9b0d9884237 100644 --- a/drivers/gpu/drm/xe/xe_oa.c +++ b/drivers/gpu/drm/xe/xe_oa.c @@ -250,6 +250,11 @@ static void oa_timestamp_clear(struct xe_oa_stream *stream, u32 head) xe_map_wr(stream->oa->xe, map, head + 4, u32, 0); } +static bool mert_wa_14026633728(struct xe_oa_stream *s) +{ + return s->oa_unit->type == DRM_XE_OA_UNIT_TYPE_MERT && XE_GT_WA(s->gt, 14026633728); +} + static bool xe_oa_buffer_check_unlocked(struct xe_oa_stream *stream) { u32 gtt_offset = xe_bo_ggtt_addr(stream->oa_buffer.bo); @@ -891,11 +896,14 @@ static void xe_oa_stream_destroy(struct xe_oa_stream *stream) static int xe_oa_alloc_oa_buffer(struct xe_oa_stream *stream, size_t size) { + u32 vram = mert_wa_14026633728(stream) ? + XE_BO_FLAG_VRAM_IF_DGFX(xe_device_get_root_tile(stream->oa->xe)) : + XE_BO_FLAG_SYSTEM; struct xe_bo *bo; bo = xe_bo_create_pin_map_novm(stream->oa->xe, stream->gt->tile, size, ttm_bo_type_kernel, - XE_BO_FLAG_SYSTEM | XE_BO_FLAG_GGTT, false); + vram | XE_BO_FLAG_GGTT, false); if (IS_ERR(bo)) return PTR_ERR(bo); diff --git a/drivers/gpu/drm/xe/xe_wa_oob.rules b/drivers/gpu/drm/xe/xe_wa_oob.rules index f8a185103b805..a7c1bd9bcb943 100644 --- a/drivers/gpu/drm/xe/xe_wa_oob.rules +++ b/drivers/gpu/drm/xe/xe_wa_oob.rules @@ -65,3 +65,4 @@ 14025883347 MEDIA_VERSION_RANGE(1301, 3503) GRAPHICS_VERSION_RANGE(2004, 3005) +14026633728 PLATFORM(CRESCENTISLAND) -- 2.54.0 ^ permalink raw reply related [flat|nested] 16+ messages in thread
* [PATCH v4 0/3] drm/xe/oa: Wa_14026633728 @ 2026-04-15 2:03 Ashutosh Dixit 2026-04-15 2:03 ` [PATCH 3/3] drm/xe/oa: Implement Wa_14026633728 Ashutosh Dixit 0 siblings, 1 reply; 16+ messages in thread From: Ashutosh Dixit @ 2026-04-15 2:03 UTC (permalink / raw) To: intel-xe; +Cc: Umesh Nerlige Ramappa v4: Change to Patch 1 again (see changelog in Patch 1) v3: Change to Patch 1 again (see changelog in Patch 1) v2: Change to Patch 1 Ashutosh Dixit (3): drm/xe/oa: Use xe_map layer drm/xe/oa: Use drm_gem_mmap_obj for OA buffer mmap drm/xe/oa: Implement Wa_14026633728 drivers/gpu/drm/xe/xe_oa.c | 123 ++++++++++++++++------------- drivers/gpu/drm/xe/xe_oa_types.h | 4 +- drivers/gpu/drm/xe/xe_wa_oob.rules | 1 + 3 files changed, 70 insertions(+), 58 deletions(-) -- 2.48.1 ^ permalink raw reply [flat|nested] 16+ messages in thread
* [PATCH 3/3] drm/xe/oa: Implement Wa_14026633728 2026-04-15 2:03 [PATCH v4 0/3] drm/xe/oa: Wa_14026633728 Ashutosh Dixit @ 2026-04-15 2:03 ` Ashutosh Dixit 0 siblings, 0 replies; 16+ messages in thread From: Ashutosh Dixit @ 2026-04-15 2:03 UTC (permalink / raw) To: intel-xe; +Cc: Umesh Nerlige Ramappa CRI Wa_14026633728 requires MERTOA buffer to be allocated in device memory, not system memory (which is the default for OA buffers). Signed-off-by: Ashutosh Dixit <ashutosh.dixit@intel.com> Reviewed-by: Umesh Nerlige Ramappa <umesh.nerlige.ramappa@intel.com> --- drivers/gpu/drm/xe/xe_oa.c | 10 +++++++++- drivers/gpu/drm/xe/xe_wa_oob.rules | 1 + 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/xe/xe_oa.c b/drivers/gpu/drm/xe/xe_oa.c index 5b8a9bd26770c..6516e933b865c 100644 --- a/drivers/gpu/drm/xe/xe_oa.c +++ b/drivers/gpu/drm/xe/xe_oa.c @@ -250,6 +250,11 @@ static void oa_timestamp_clear(struct xe_oa_stream *stream, u32 head) xe_map_wr(stream->oa->xe, map, head + 1, u32, 0); } +static bool mert_wa_14026633728(struct xe_oa_stream *s) +{ + return s->oa_unit->type == DRM_XE_OA_UNIT_TYPE_MERT && XE_GT_WA(s->gt, 14026633728); +} + static bool xe_oa_buffer_check_unlocked(struct xe_oa_stream *stream) { u32 gtt_offset = xe_bo_ggtt_addr(stream->oa_buffer.bo); @@ -891,11 +896,14 @@ static void xe_oa_stream_destroy(struct xe_oa_stream *stream) static int xe_oa_alloc_oa_buffer(struct xe_oa_stream *stream, size_t size) { + u32 vram = mert_wa_14026633728(stream) ? + XE_BO_FLAG_VRAM_IF_DGFX(xe_device_get_root_tile(stream->oa->xe)) : + XE_BO_FLAG_SYSTEM; struct xe_bo *bo; bo = xe_bo_create_pin_map_novm(stream->oa->xe, stream->gt->tile, size, ttm_bo_type_kernel, - XE_BO_FLAG_SYSTEM | XE_BO_FLAG_GGTT, false); + vram | XE_BO_FLAG_GGTT, false); if (IS_ERR(bo)) return PTR_ERR(bo); diff --git a/drivers/gpu/drm/xe/xe_wa_oob.rules b/drivers/gpu/drm/xe/xe_wa_oob.rules index f8a185103b805..a7c1bd9bcb943 100644 --- a/drivers/gpu/drm/xe/xe_wa_oob.rules +++ b/drivers/gpu/drm/xe/xe_wa_oob.rules @@ -65,3 +65,4 @@ 14025883347 MEDIA_VERSION_RANGE(1301, 3503) GRAPHICS_VERSION_RANGE(2004, 3005) +14026633728 PLATFORM(CRESCENTISLAND) -- 2.48.1 ^ permalink raw reply related [flat|nested] 16+ messages in thread
* [PATCH v3 0/3] drm/xe/oa: Wa_14026633728 @ 2026-04-11 0:48 Ashutosh Dixit 2026-04-11 0:48 ` [PATCH 3/3] drm/xe/oa: Implement Wa_14026633728 Ashutosh Dixit 0 siblings, 1 reply; 16+ messages in thread From: Ashutosh Dixit @ 2026-04-11 0:48 UTC (permalink / raw) To: intel-xe; +Cc: Umesh Nerlige Ramappa v3: Change to Patch 1 again (see changelog in Patch 1) v2: Change to Patch 1 Ashutosh Dixit (3): drm/xe/oa: Use xe_map layer drm/xe/oa: Use drm_gem_mmap_obj for OA buffer mmap drm/xe/oa: Implement Wa_14026633728 drivers/gpu/drm/xe/xe_map.h | 14 ++++ drivers/gpu/drm/xe/xe_oa.c | 109 ++++++++++++++--------------- drivers/gpu/drm/xe/xe_oa_types.h | 3 - drivers/gpu/drm/xe/xe_wa_oob.rules | 1 + 4 files changed, 68 insertions(+), 59 deletions(-) -- 2.48.1 ^ permalink raw reply [flat|nested] 16+ messages in thread
* [PATCH 3/3] drm/xe/oa: Implement Wa_14026633728 2026-04-11 0:48 [PATCH v3 0/3] drm/xe/oa: Wa_14026633728 Ashutosh Dixit @ 2026-04-11 0:48 ` Ashutosh Dixit 0 siblings, 0 replies; 16+ messages in thread From: Ashutosh Dixit @ 2026-04-11 0:48 UTC (permalink / raw) To: intel-xe; +Cc: Umesh Nerlige Ramappa CRI Wa_14026633728 requires MERTOA buffer to be allocated in device memory, not system memory (which is the default for OA buffers). Signed-off-by: Ashutosh Dixit <ashutosh.dixit@intel.com> Reviewed-by: Umesh Nerlige Ramappa <umesh.nerlige.ramappa@intel.com> --- drivers/gpu/drm/xe/xe_oa.c | 10 +++++++++- drivers/gpu/drm/xe/xe_wa_oob.rules | 1 + 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/xe/xe_oa.c b/drivers/gpu/drm/xe/xe_oa.c index 7ada6e2a0d955..7610b2d744066 100644 --- a/drivers/gpu/drm/xe/xe_oa.c +++ b/drivers/gpu/drm/xe/xe_oa.c @@ -250,6 +250,11 @@ static void oa_timestamp_clear(struct xe_oa_stream *stream, u32 head) xe_map_wr(stream->oa->xe, map, head + 1, u32, 0); } +static bool mert_wa_14026633728(struct xe_oa_stream *s) +{ + return s->oa_unit->type == DRM_XE_OA_UNIT_TYPE_MERT && XE_GT_WA(s->gt, 14026633728); +} + static bool xe_oa_buffer_check_unlocked(struct xe_oa_stream *stream) { u32 gtt_offset = xe_bo_ggtt_addr(stream->oa_buffer.bo); @@ -884,11 +889,14 @@ static void xe_oa_stream_destroy(struct xe_oa_stream *stream) static int xe_oa_alloc_oa_buffer(struct xe_oa_stream *stream, size_t size) { + u32 vram = mert_wa_14026633728(stream) ? + XE_BO_FLAG_VRAM_IF_DGFX(xe_device_get_root_tile(stream->oa->xe)) : + XE_BO_FLAG_SYSTEM; struct xe_bo *bo; bo = xe_bo_create_pin_map_novm(stream->oa->xe, stream->gt->tile, size, ttm_bo_type_kernel, - XE_BO_FLAG_SYSTEM | XE_BO_FLAG_GGTT, false); + vram | XE_BO_FLAG_GGTT, false); if (IS_ERR(bo)) return PTR_ERR(bo); diff --git a/drivers/gpu/drm/xe/xe_wa_oob.rules b/drivers/gpu/drm/xe/xe_wa_oob.rules index f8a185103b805..a7c1bd9bcb943 100644 --- a/drivers/gpu/drm/xe/xe_wa_oob.rules +++ b/drivers/gpu/drm/xe/xe_wa_oob.rules @@ -65,3 +65,4 @@ 14025883347 MEDIA_VERSION_RANGE(1301, 3503) GRAPHICS_VERSION_RANGE(2004, 3005) +14026633728 PLATFORM(CRESCENTISLAND) -- 2.48.1 ^ permalink raw reply related [flat|nested] 16+ messages in thread
* [PATCH v2 0/3] drm/xe/oa: Wa_14026633728 @ 2026-04-09 23:17 Ashutosh Dixit 2026-04-09 23:17 ` [PATCH 3/3] drm/xe/oa: Implement Wa_14026633728 Ashutosh Dixit 0 siblings, 1 reply; 16+ messages in thread From: Ashutosh Dixit @ 2026-04-09 23:17 UTC (permalink / raw) To: intel-xe; +Cc: Umesh Nerlige Ramappa v2: Change to Patch 1 Ashutosh Dixit (3): drm/xe/oa: Use xe_map layer drm/xe/oa: Use drm_gem_mmap_obj for OA buffer mmap drm/xe/oa: Implement Wa_14026633728 drivers/gpu/drm/xe/xe_oa.c | 127 ++++++++++++++++------------- drivers/gpu/drm/xe/xe_oa_types.h | 3 - drivers/gpu/drm/xe/xe_wa_oob.rules | 1 + 3 files changed, 72 insertions(+), 59 deletions(-) -- 2.48.1 ^ permalink raw reply [flat|nested] 16+ messages in thread
* [PATCH 3/3] drm/xe/oa: Implement Wa_14026633728 2026-04-09 23:17 [PATCH v2 0/3] drm/xe/oa: Wa_14026633728 Ashutosh Dixit @ 2026-04-09 23:17 ` Ashutosh Dixit 0 siblings, 0 replies; 16+ messages in thread From: Ashutosh Dixit @ 2026-04-09 23:17 UTC (permalink / raw) To: intel-xe; +Cc: Umesh Nerlige Ramappa CRI Wa_14026633728 requires MERTOA buffer to be allocated in device memory, not system memory (which is the default for OA buffers). Signed-off-by: Ashutosh Dixit <ashutosh.dixit@intel.com> Reviewed-by: Umesh Nerlige Ramappa <umesh.nerlige.ramappa@intel.com> --- drivers/gpu/drm/xe/xe_oa.c | 10 +++++++++- drivers/gpu/drm/xe/xe_wa_oob.rules | 1 + 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/xe/xe_oa.c b/drivers/gpu/drm/xe/xe_oa.c index b24aae055c7df..7b31cf0a29ce1 100644 --- a/drivers/gpu/drm/xe/xe_oa.c +++ b/drivers/gpu/drm/xe/xe_oa.c @@ -250,6 +250,11 @@ static void oa_timestamp_clear(struct xe_oa_stream *stream, u32 head) xe_map_wr(stream->oa->xe, map, head + 1, u32, 0); } +static bool mert_wa_14026633728(struct xe_oa_stream *s) +{ + return s->oa_unit->type == DRM_XE_OA_UNIT_TYPE_MERT && XE_GT_WA(s->gt, 14026633728); +} + static bool xe_oa_buffer_check_unlocked(struct xe_oa_stream *stream) { u32 gtt_offset = xe_bo_ggtt_addr(stream->oa_buffer.bo); @@ -902,11 +907,14 @@ static void xe_oa_stream_destroy(struct xe_oa_stream *stream) static int xe_oa_alloc_oa_buffer(struct xe_oa_stream *stream, size_t size) { + u32 vram = mert_wa_14026633728(stream) ? + XE_BO_FLAG_VRAM_IF_DGFX(xe_device_get_root_tile(stream->oa->xe)) : + XE_BO_FLAG_SYSTEM; struct xe_bo *bo; bo = xe_bo_create_pin_map_novm(stream->oa->xe, stream->gt->tile, size, ttm_bo_type_kernel, - XE_BO_FLAG_SYSTEM | XE_BO_FLAG_GGTT, false); + vram | XE_BO_FLAG_GGTT, false); if (IS_ERR(bo)) return PTR_ERR(bo); diff --git a/drivers/gpu/drm/xe/xe_wa_oob.rules b/drivers/gpu/drm/xe/xe_wa_oob.rules index f8a185103b805..a7c1bd9bcb943 100644 --- a/drivers/gpu/drm/xe/xe_wa_oob.rules +++ b/drivers/gpu/drm/xe/xe_wa_oob.rules @@ -65,3 +65,4 @@ 14025883347 MEDIA_VERSION_RANGE(1301, 3503) GRAPHICS_VERSION_RANGE(2004, 3005) +14026633728 PLATFORM(CRESCENTISLAND) -- 2.48.1 ^ permalink raw reply related [flat|nested] 16+ messages in thread
* [PATCH 0/3] drm/xe/oa: Wa_14026633728 @ 2026-04-07 3:02 Ashutosh Dixit 2026-04-07 3:02 ` [PATCH 3/3] drm/xe/oa: Implement Wa_14026633728 Ashutosh Dixit 0 siblings, 1 reply; 16+ messages in thread From: Ashutosh Dixit @ 2026-04-07 3:02 UTC (permalink / raw) To: intel-xe; +Cc: Umesh Nerlige Ramappa Ashutosh Dixit (3): drm/xe/oa: Use xe_map layer drm/xe/oa: Use drm_gem_mmap_obj for OA buffer mmap drm/xe/oa: Implement Wa_14026633728 drivers/gpu/drm/xe/xe_oa.c | 117 +++++++++++++++-------------- drivers/gpu/drm/xe/xe_oa_types.h | 3 - drivers/gpu/drm/xe/xe_wa_oob.rules | 1 + 3 files changed, 62 insertions(+), 59 deletions(-) -- 2.48.1 ^ permalink raw reply [flat|nested] 16+ messages in thread
* [PATCH 3/3] drm/xe/oa: Implement Wa_14026633728 2026-04-07 3:02 [PATCH 0/3] drm/xe/oa: Wa_14026633728 Ashutosh Dixit @ 2026-04-07 3:02 ` Ashutosh Dixit 2026-04-07 23:17 ` Umesh Nerlige Ramappa 0 siblings, 1 reply; 16+ messages in thread From: Ashutosh Dixit @ 2026-04-07 3:02 UTC (permalink / raw) To: intel-xe; +Cc: Umesh Nerlige Ramappa CRI Wa_14026633728 requires MERTOA buffer to be allocated in device memory, not system memory (which is the default for OA buffers). Signed-off-by: Ashutosh Dixit <ashutosh.dixit@intel.com> --- drivers/gpu/drm/xe/xe_oa.c | 10 +++++++++- drivers/gpu/drm/xe/xe_wa_oob.rules | 1 + 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/xe/xe_oa.c b/drivers/gpu/drm/xe/xe_oa.c index 0e65141e57ee8..f65cec62a0d2d 100644 --- a/drivers/gpu/drm/xe/xe_oa.c +++ b/drivers/gpu/drm/xe/xe_oa.c @@ -250,6 +250,11 @@ static void oa_timestamp_clear(struct xe_oa_stream *stream, u32 head) xe_map_wr(stream->oa->xe, map, head + 1, u32, 0); } +static bool mert_wa_14026633728(struct xe_oa_stream *s) +{ + return s->oa_unit->type == DRM_XE_OA_UNIT_TYPE_MERT && XE_GT_WA(s->gt, 14026633728); +} + static bool xe_oa_buffer_check_unlocked(struct xe_oa_stream *stream) { u32 gtt_offset = xe_bo_ggtt_addr(stream->oa_buffer.bo); @@ -892,11 +897,14 @@ static void xe_oa_stream_destroy(struct xe_oa_stream *stream) static int xe_oa_alloc_oa_buffer(struct xe_oa_stream *stream, size_t size) { + u32 vram = mert_wa_14026633728(stream) ? + XE_BO_FLAG_VRAM_IF_DGFX(xe_device_get_root_tile(stream->oa->xe)) : + XE_BO_FLAG_SYSTEM; struct xe_bo *bo; bo = xe_bo_create_pin_map_novm(stream->oa->xe, stream->gt->tile, size, ttm_bo_type_kernel, - XE_BO_FLAG_SYSTEM | XE_BO_FLAG_GGTT, false); + vram | XE_BO_FLAG_GGTT, false); if (IS_ERR(bo)) return PTR_ERR(bo); diff --git a/drivers/gpu/drm/xe/xe_wa_oob.rules b/drivers/gpu/drm/xe/xe_wa_oob.rules index f8a185103b805..a7c1bd9bcb943 100644 --- a/drivers/gpu/drm/xe/xe_wa_oob.rules +++ b/drivers/gpu/drm/xe/xe_wa_oob.rules @@ -65,3 +65,4 @@ 14025883347 MEDIA_VERSION_RANGE(1301, 3503) GRAPHICS_VERSION_RANGE(2004, 3005) +14026633728 PLATFORM(CRESCENTISLAND) -- 2.48.1 ^ permalink raw reply related [flat|nested] 16+ messages in thread
* Re: [PATCH 3/3] drm/xe/oa: Implement Wa_14026633728 2026-04-07 3:02 ` [PATCH 3/3] drm/xe/oa: Implement Wa_14026633728 Ashutosh Dixit @ 2026-04-07 23:17 ` Umesh Nerlige Ramappa 0 siblings, 0 replies; 16+ messages in thread From: Umesh Nerlige Ramappa @ 2026-04-07 23:17 UTC (permalink / raw) To: Ashutosh Dixit; +Cc: intel-xe On Mon, Apr 06, 2026 at 08:02:19PM -0700, Ashutosh Dixit wrote: >CRI Wa_14026633728 requires MERTOA buffer to be allocated in device memory, >not system memory (which is the default for OA buffers). > >Signed-off-by: Ashutosh Dixit <ashutosh.dixit@intel.com> LGTM, Reviewed-by: Umesh Nerlige Ramappa <umesh.nerlige.ramappa@intel.com> >--- > drivers/gpu/drm/xe/xe_oa.c | 10 +++++++++- > drivers/gpu/drm/xe/xe_wa_oob.rules | 1 + > 2 files changed, 10 insertions(+), 1 deletion(-) > >diff --git a/drivers/gpu/drm/xe/xe_oa.c b/drivers/gpu/drm/xe/xe_oa.c >index 0e65141e57ee8..f65cec62a0d2d 100644 >--- a/drivers/gpu/drm/xe/xe_oa.c >+++ b/drivers/gpu/drm/xe/xe_oa.c >@@ -250,6 +250,11 @@ static void oa_timestamp_clear(struct xe_oa_stream *stream, u32 head) > xe_map_wr(stream->oa->xe, map, head + 1, u32, 0); > } > >+static bool mert_wa_14026633728(struct xe_oa_stream *s) >+{ >+ return s->oa_unit->type == DRM_XE_OA_UNIT_TYPE_MERT && XE_GT_WA(s->gt, 14026633728); >+} >+ > static bool xe_oa_buffer_check_unlocked(struct xe_oa_stream *stream) > { > u32 gtt_offset = xe_bo_ggtt_addr(stream->oa_buffer.bo); >@@ -892,11 +897,14 @@ static void xe_oa_stream_destroy(struct xe_oa_stream *stream) > > static int xe_oa_alloc_oa_buffer(struct xe_oa_stream *stream, size_t size) > { >+ u32 vram = mert_wa_14026633728(stream) ? >+ XE_BO_FLAG_VRAM_IF_DGFX(xe_device_get_root_tile(stream->oa->xe)) : >+ XE_BO_FLAG_SYSTEM; > struct xe_bo *bo; > > bo = xe_bo_create_pin_map_novm(stream->oa->xe, stream->gt->tile, > size, ttm_bo_type_kernel, >- XE_BO_FLAG_SYSTEM | XE_BO_FLAG_GGTT, false); >+ vram | XE_BO_FLAG_GGTT, false); > if (IS_ERR(bo)) > return PTR_ERR(bo); > >diff --git a/drivers/gpu/drm/xe/xe_wa_oob.rules b/drivers/gpu/drm/xe/xe_wa_oob.rules >index f8a185103b805..a7c1bd9bcb943 100644 >--- a/drivers/gpu/drm/xe/xe_wa_oob.rules >+++ b/drivers/gpu/drm/xe/xe_wa_oob.rules >@@ -65,3 +65,4 @@ > > 14025883347 MEDIA_VERSION_RANGE(1301, 3503) > GRAPHICS_VERSION_RANGE(2004, 3005) >+14026633728 PLATFORM(CRESCENTISLAND) >-- >2.48.1 > ^ permalink raw reply [flat|nested] 16+ messages in thread
end of thread, other threads:[~2026-04-27 22:13 UTC | newest] Thread overview: 16+ messages (download: mbox.gz follow: Atom feed -- links below jump to the message on this page -- 2026-04-27 19:02 [PATCH v6 0/3] drm/xe/oa: Wa_14026633728 Ashutosh Dixit 2026-04-27 19:02 ` [PATCH 1/3] drm/xe/oa: Use xe_map layer Ashutosh Dixit 2026-04-27 19:34 ` Umesh Nerlige Ramappa 2026-04-27 20:30 ` Dixit, Ashutosh 2026-04-27 19:02 ` [PATCH 2/3] drm/xe/oa: Use drm_gem_mmap_obj for OA buffer mmap Ashutosh Dixit 2026-04-27 19:02 ` [PATCH 3/3] drm/xe/oa: Implement Wa_14026633728 Ashutosh Dixit -- strict thread matches above, loose matches on Subject: below -- 2026-04-27 22:11 [PATCH v8 0/3] drm/xe/oa: Wa_14026633728 Ashutosh Dixit 2026-04-27 22:11 ` [PATCH 3/3] drm/xe/oa: Implement Wa_14026633728 Ashutosh Dixit 2026-04-27 20:26 [PATCH v7 0/3] drm/xe/oa: Wa_14026633728 Ashutosh Dixit 2026-04-27 20:26 ` [PATCH 3/3] drm/xe/oa: Implement Wa_14026633728 Ashutosh Dixit 2026-04-27 20:52 ` Matt Roper 2026-04-27 22:13 ` Dixit, Ashutosh 2026-04-25 0:14 [PATCH v5 0/3] drm/xe/oa: Wa_14026633728 Ashutosh Dixit 2026-04-25 0:14 ` [PATCH 3/3] drm/xe/oa: Implement Wa_14026633728 Ashutosh Dixit 2026-04-15 2:03 [PATCH v4 0/3] drm/xe/oa: Wa_14026633728 Ashutosh Dixit 2026-04-15 2:03 ` [PATCH 3/3] drm/xe/oa: Implement Wa_14026633728 Ashutosh Dixit 2026-04-11 0:48 [PATCH v3 0/3] drm/xe/oa: Wa_14026633728 Ashutosh Dixit 2026-04-11 0:48 ` [PATCH 3/3] drm/xe/oa: Implement Wa_14026633728 Ashutosh Dixit 2026-04-09 23:17 [PATCH v2 0/3] drm/xe/oa: Wa_14026633728 Ashutosh Dixit 2026-04-09 23:17 ` [PATCH 3/3] drm/xe/oa: Implement Wa_14026633728 Ashutosh Dixit 2026-04-07 3:02 [PATCH 0/3] drm/xe/oa: Wa_14026633728 Ashutosh Dixit 2026-04-07 3:02 ` [PATCH 3/3] drm/xe/oa: Implement Wa_14026633728 Ashutosh Dixit 2026-04-07 23:17 ` Umesh Nerlige Ramappa
This is a public inbox, see mirroring instructions for how to clone and mirror all data and code used for this inbox