* [PATCH 1/3] drm/xe/oa: Use xe_map layer
2026-04-07 3:02 [PATCH 0/3] drm/xe/oa: Wa_14026633728 Ashutosh Dixit
@ 2026-04-07 3:02 ` Ashutosh Dixit
2026-04-07 22:49 ` Umesh Nerlige Ramappa
0 siblings, 1 reply; 25+ messages in thread
From: Ashutosh Dixit @ 2026-04-07 3:02 UTC (permalink / raw)
To: intel-xe; +Cc: Umesh Nerlige Ramappa
OA code should have used xe_map layer to begin with. In CRI, the OA buffer
can be located both in system and device memory. For these reasons, move OA
code to use the xe_map layer when accessing the OA buffer.
Signed-off-by: Ashutosh Dixit <ashutosh.dixit@intel.com>
---
drivers/gpu/drm/xe/xe_oa.c | 88 ++++++++++++++++++--------------
drivers/gpu/drm/xe/xe_oa_types.h | 3 --
2 files changed, 49 insertions(+), 42 deletions(-)
diff --git a/drivers/gpu/drm/xe/xe_oa.c b/drivers/gpu/drm/xe/xe_oa.c
index 6337e671c97ae..dfd3b6a789cc3 100644
--- a/drivers/gpu/drm/xe/xe_oa.c
+++ b/drivers/gpu/drm/xe/xe_oa.c
@@ -213,32 +213,40 @@ static u32 xe_oa_hw_tail_read(struct xe_oa_stream *stream)
#define oa_report_header_64bit(__s) \
((__s)->oa_buffer.format->header == HDR_64_BIT)
-static u64 oa_report_id(struct xe_oa_stream *stream, void *report)
+static u64 oa_report_id(struct xe_oa_stream *stream, u32 tail)
{
- return oa_report_header_64bit(stream) ? *(u64 *)report : *(u32 *)report;
+ struct iosys_map *map = &stream->oa_buffer.bo->vmap;
+
+ return oa_report_header_64bit(stream) ?
+ xe_map_rd(stream->oa->xe, map, tail, u64) :
+ xe_map_rd(stream->oa->xe, map, tail, u32);
}
-static void oa_report_id_clear(struct xe_oa_stream *stream, u32 *report)
+static void oa_report_id_clear(struct xe_oa_stream *stream, u32 head)
{
- if (oa_report_header_64bit(stream))
- *(u64 *)report = 0;
- else
- *report = 0;
+ struct iosys_map *map = &stream->oa_buffer.bo->vmap;
+
+ oa_report_header_64bit(stream) ?
+ xe_map_wr(stream->oa->xe, map, head, u64, 0) :
+ xe_map_wr(stream->oa->xe, map, head, u32, 0);
}
-static u64 oa_timestamp(struct xe_oa_stream *stream, void *report)
+static u64 oa_timestamp(struct xe_oa_stream *stream, u32 tail)
{
+ struct iosys_map *map = &stream->oa_buffer.bo->vmap;
+
return oa_report_header_64bit(stream) ?
- *((u64 *)report + 1) :
- *((u32 *)report + 1);
+ xe_map_rd(stream->oa->xe, map, tail + 2, u64) :
+ xe_map_rd(stream->oa->xe, map, tail + 1, u32);
}
-static void oa_timestamp_clear(struct xe_oa_stream *stream, u32 *report)
+static void oa_timestamp_clear(struct xe_oa_stream *stream, u32 head)
{
- if (oa_report_header_64bit(stream))
- *(u64 *)&report[2] = 0;
- else
- report[1] = 0;
+ struct iosys_map *map = &stream->oa_buffer.bo->vmap;
+
+ oa_report_header_64bit(stream) ?
+ xe_map_wr(stream->oa->xe, map, head + 2, u64, 0) :
+ xe_map_wr(stream->oa->xe, map, head + 1, u32, 0);
}
static bool xe_oa_buffer_check_unlocked(struct xe_oa_stream *stream)
@@ -275,9 +283,7 @@ static bool xe_oa_buffer_check_unlocked(struct xe_oa_stream *stream)
* they were written. If not : (╯°□°)╯︵ ┻━┻
*/
while (xe_oa_circ_diff(stream, tail, stream->oa_buffer.tail) >= report_size) {
- void *report = stream->oa_buffer.vaddr + tail;
-
- if (oa_report_id(stream, report) || oa_timestamp(stream, report))
+ if (oa_report_id(stream, tail) || oa_timestamp(stream, tail))
break;
tail = xe_oa_circ_diff(stream, tail, report_size);
@@ -311,30 +317,37 @@ static enum hrtimer_restart xe_oa_poll_check_timer_cb(struct hrtimer *hrtimer)
return HRTIMER_RESTART;
}
+static inline unsigned long
+xe_oa_copy_to_user(void __user *dst, const struct iosys_map *src, size_t src_offset, size_t len)
+{
+ if (src->is_iomem)
+ return copy_to_user(dst, src->vaddr_iomem + src_offset, len);
+ else
+ return copy_to_user(dst, src->vaddr + src_offset, len);
+}
+
static int xe_oa_append_report(struct xe_oa_stream *stream, char __user *buf,
- size_t count, size_t *offset, const u8 *report)
+ size_t count, size_t *offset, u32 head)
{
+ struct iosys_map *map = &stream->oa_buffer.bo->vmap;
int report_size = stream->oa_buffer.format->size;
int report_size_partial;
- u8 *oa_buf_end;
if ((count - *offset) < report_size)
return -ENOSPC;
buf += *offset;
- oa_buf_end = stream->oa_buffer.vaddr + stream->oa_buffer.circ_size;
- report_size_partial = oa_buf_end - report;
+ report_size_partial = stream->oa_buffer.circ_size - head;
if (report_size_partial < report_size) {
- if (copy_to_user(buf, report, report_size_partial))
+ if (xe_oa_copy_to_user(buf, map, head, report_size_partial))
return -EFAULT;
buf += report_size_partial;
- if (copy_to_user(buf, stream->oa_buffer.vaddr,
- report_size - report_size_partial))
+ if (xe_oa_copy_to_user(buf, map, 0, report_size - report_size_partial))
return -EFAULT;
- } else if (copy_to_user(buf, report, report_size)) {
+ } else if (xe_oa_copy_to_user(buf, map, head, report_size)) {
return -EFAULT;
}
@@ -347,7 +360,6 @@ static int xe_oa_append_reports(struct xe_oa_stream *stream, char __user *buf,
size_t count, size_t *offset)
{
int report_size = stream->oa_buffer.format->size;
- u8 *oa_buf_base = stream->oa_buffer.vaddr;
u32 gtt_offset = xe_bo_ggtt_addr(stream->oa_buffer.bo);
size_t start_offset = *offset;
unsigned long flags;
@@ -364,26 +376,24 @@ static int xe_oa_append_reports(struct xe_oa_stream *stream, char __user *buf,
for (; xe_oa_circ_diff(stream, tail, head);
head = xe_oa_circ_incr(stream, head, report_size)) {
- u8 *report = oa_buf_base + head;
-
- ret = xe_oa_append_report(stream, buf, count, offset, report);
+ ret = xe_oa_append_report(stream, buf, count, offset, head);
if (ret)
break;
if (!(stream->oa_buffer.circ_size % report_size)) {
/* Clear out report id and timestamp to detect unlanded reports */
- oa_report_id_clear(stream, (void *)report);
- oa_timestamp_clear(stream, (void *)report);
+ oa_report_id_clear(stream, head);
+ oa_timestamp_clear(stream, head);
} else {
- u8 *oa_buf_end = stream->oa_buffer.vaddr + stream->oa_buffer.circ_size;
- u32 part = oa_buf_end - report;
+ struct iosys_map *map = &stream->oa_buffer.bo->vmap;
+ u32 part = stream->oa_buffer.circ_size - head;
/* Zero out the entire report */
if (report_size <= part) {
- memset(report, 0, report_size);
+ xe_map_memset(stream->oa->xe, map, head, 0, report_size);
} else {
- memset(report, 0, part);
- memset(oa_buf_base, 0, report_size - part);
+ xe_map_memset(stream->oa->xe, map, head, 0, part);
+ xe_map_memset(stream->oa->xe, map, 0, 0, report_size - part);
}
}
}
@@ -436,7 +446,8 @@ static void xe_oa_init_oa_buffer(struct xe_oa_stream *stream)
spin_unlock_irqrestore(&stream->oa_buffer.ptr_lock, flags);
/* Zero out the OA buffer since we rely on zero report id and timestamp fields */
- memset(stream->oa_buffer.vaddr, 0, xe_bo_size(stream->oa_buffer.bo));
+ xe_map_memset(stream->oa->xe, &stream->oa_buffer.bo->vmap, 0, 0,
+ xe_bo_size(stream->oa_buffer.bo));
}
static u32 __format_to_oactrl(const struct xe_oa_format *format, int counter_sel_mask)
@@ -891,7 +902,6 @@ static int xe_oa_alloc_oa_buffer(struct xe_oa_stream *stream, size_t size)
stream->oa_buffer.bo = bo;
/* mmap implementation requires OA buffer to be in system memory */
xe_assert(stream->oa->xe, bo->vmap.is_iomem == 0);
- stream->oa_buffer.vaddr = bo->vmap.vaddr;
return 0;
}
diff --git a/drivers/gpu/drm/xe/xe_oa_types.h b/drivers/gpu/drm/xe/xe_oa_types.h
index b03ffd5134834..2dd550c93b8ac 100644
--- a/drivers/gpu/drm/xe/xe_oa_types.h
+++ b/drivers/gpu/drm/xe/xe_oa_types.h
@@ -162,9 +162,6 @@ struct xe_oa_buffer {
/** @format: xe_bo backing the OA buffer */
struct xe_bo *bo;
- /** @vaddr: mapped vaddr of the OA buffer */
- u8 *vaddr;
-
/** @ptr_lock: Lock protecting reads/writes to head/tail pointers */
spinlock_t ptr_lock;
--
2.48.1
^ permalink raw reply related [flat|nested] 25+ messages in thread
* Re: [PATCH 1/3] drm/xe/oa: Use xe_map layer
2026-04-07 3:02 ` [PATCH 1/3] drm/xe/oa: Use xe_map layer Ashutosh Dixit
@ 2026-04-07 22:49 ` Umesh Nerlige Ramappa
2026-04-08 15:32 ` Dixit, Ashutosh
0 siblings, 1 reply; 25+ messages in thread
From: Umesh Nerlige Ramappa @ 2026-04-07 22:49 UTC (permalink / raw)
To: Ashutosh Dixit; +Cc: intel-xe
On Mon, Apr 06, 2026 at 08:02:17PM -0700, Ashutosh Dixit wrote:
>OA code should have used xe_map layer to begin with. In CRI, the OA buffer
>can be located both in system and device memory. For these reasons, move OA
>code to use the xe_map layer when accessing the OA buffer.
>
>Signed-off-by: Ashutosh Dixit <ashutosh.dixit@intel.com>
>---
> drivers/gpu/drm/xe/xe_oa.c | 88 ++++++++++++++++++--------------
> drivers/gpu/drm/xe/xe_oa_types.h | 3 --
> 2 files changed, 49 insertions(+), 42 deletions(-)
>
>diff --git a/drivers/gpu/drm/xe/xe_oa.c b/drivers/gpu/drm/xe/xe_oa.c
>index 6337e671c97ae..dfd3b6a789cc3 100644
>--- a/drivers/gpu/drm/xe/xe_oa.c
>+++ b/drivers/gpu/drm/xe/xe_oa.c
>@@ -213,32 +213,40 @@ static u32 xe_oa_hw_tail_read(struct xe_oa_stream *stream)
> #define oa_report_header_64bit(__s) \
> ((__s)->oa_buffer.format->header == HDR_64_BIT)
>
>-static u64 oa_report_id(struct xe_oa_stream *stream, void *report)
>+static u64 oa_report_id(struct xe_oa_stream *stream, u32 tail)
> {
>- return oa_report_header_64bit(stream) ? *(u64 *)report : *(u32 *)report;
>+ struct iosys_map *map = &stream->oa_buffer.bo->vmap;
>+
>+ return oa_report_header_64bit(stream) ?
>+ xe_map_rd(stream->oa->xe, map, tail, u64) :
>+ xe_map_rd(stream->oa->xe, map, tail, u32);
> }
>
>-static void oa_report_id_clear(struct xe_oa_stream *stream, u32 *report)
>+static void oa_report_id_clear(struct xe_oa_stream *stream, u32 head)
> {
>- if (oa_report_header_64bit(stream))
>- *(u64 *)report = 0;
>- else
>- *report = 0;
>+ struct iosys_map *map = &stream->oa_buffer.bo->vmap;
>+
>+ oa_report_header_64bit(stream) ?
>+ xe_map_wr(stream->oa->xe, map, head, u64, 0) :
>+ xe_map_wr(stream->oa->xe, map, head, u32, 0);
> }
>
>-static u64 oa_timestamp(struct xe_oa_stream *stream, void *report)
>+static u64 oa_timestamp(struct xe_oa_stream *stream, u32 tail)
> {
>+ struct iosys_map *map = &stream->oa_buffer.bo->vmap;
>+
> return oa_report_header_64bit(stream) ?
>- *((u64 *)report + 1) :
>- *((u32 *)report + 1);
>+ xe_map_rd(stream->oa->xe, map, tail + 2, u64) :
>+ xe_map_rd(stream->oa->xe, map, tail + 1, u32);
> }
>
>-static void oa_timestamp_clear(struct xe_oa_stream *stream, u32 *report)
>+static void oa_timestamp_clear(struct xe_oa_stream *stream, u32 head)
> {
>- if (oa_report_header_64bit(stream))
>- *(u64 *)&report[2] = 0;
>- else
>- report[1] = 0;
>+ struct iosys_map *map = &stream->oa_buffer.bo->vmap;
>+
>+ oa_report_header_64bit(stream) ?
>+ xe_map_wr(stream->oa->xe, map, head + 2, u64, 0) :
>+ xe_map_wr(stream->oa->xe, map, head + 1, u32, 0);
> }
>
> static bool xe_oa_buffer_check_unlocked(struct xe_oa_stream *stream)
>@@ -275,9 +283,7 @@ static bool xe_oa_buffer_check_unlocked(struct xe_oa_stream *stream)
> * they were written. If not : (╯°□°)╯︵ ┻━┻
> */
> while (xe_oa_circ_diff(stream, tail, stream->oa_buffer.tail) >= report_size) {
>- void *report = stream->oa_buffer.vaddr + tail;
>-
>- if (oa_report_id(stream, report) || oa_timestamp(stream, report))
>+ if (oa_report_id(stream, tail) || oa_timestamp(stream, tail))
> break;
>
> tail = xe_oa_circ_diff(stream, tail, report_size);
>@@ -311,30 +317,37 @@ static enum hrtimer_restart xe_oa_poll_check_timer_cb(struct hrtimer *hrtimer)
> return HRTIMER_RESTART;
> }
>
>+static inline unsigned long
>+xe_oa_copy_to_user(void __user *dst, const struct iosys_map *src, size_t src_offset, size_t len)
>+{
>+ if (src->is_iomem)
>+ return copy_to_user(dst, src->vaddr_iomem + src_offset, len);
I think this requires xe_map_memcpy_from for the iomem case.
Thanks,
Umesh
>+ else
>+ return copy_to_user(dst, src->vaddr + src_offset, len);
>+}
>+
> static int xe_oa_append_report(struct xe_oa_stream *stream, char __user *buf,
>- size_t count, size_t *offset, const u8 *report)
>+ size_t count, size_t *offset, u32 head)
> {
>+ struct iosys_map *map = &stream->oa_buffer.bo->vmap;
> int report_size = stream->oa_buffer.format->size;
> int report_size_partial;
>- u8 *oa_buf_end;
>
> if ((count - *offset) < report_size)
> return -ENOSPC;
>
> buf += *offset;
>
>- oa_buf_end = stream->oa_buffer.vaddr + stream->oa_buffer.circ_size;
>- report_size_partial = oa_buf_end - report;
>+ report_size_partial = stream->oa_buffer.circ_size - head;
>
> if (report_size_partial < report_size) {
>- if (copy_to_user(buf, report, report_size_partial))
>+ if (xe_oa_copy_to_user(buf, map, head, report_size_partial))
> return -EFAULT;
> buf += report_size_partial;
>
>- if (copy_to_user(buf, stream->oa_buffer.vaddr,
>- report_size - report_size_partial))
>+ if (xe_oa_copy_to_user(buf, map, 0, report_size - report_size_partial))
> return -EFAULT;
>- } else if (copy_to_user(buf, report, report_size)) {
>+ } else if (xe_oa_copy_to_user(buf, map, head, report_size)) {
> return -EFAULT;
> }
>
>@@ -347,7 +360,6 @@ static int xe_oa_append_reports(struct xe_oa_stream *stream, char __user *buf,
> size_t count, size_t *offset)
> {
> int report_size = stream->oa_buffer.format->size;
>- u8 *oa_buf_base = stream->oa_buffer.vaddr;
> u32 gtt_offset = xe_bo_ggtt_addr(stream->oa_buffer.bo);
> size_t start_offset = *offset;
> unsigned long flags;
>@@ -364,26 +376,24 @@ static int xe_oa_append_reports(struct xe_oa_stream *stream, char __user *buf,
>
> for (; xe_oa_circ_diff(stream, tail, head);
> head = xe_oa_circ_incr(stream, head, report_size)) {
>- u8 *report = oa_buf_base + head;
>-
>- ret = xe_oa_append_report(stream, buf, count, offset, report);
>+ ret = xe_oa_append_report(stream, buf, count, offset, head);
> if (ret)
> break;
>
> if (!(stream->oa_buffer.circ_size % report_size)) {
> /* Clear out report id and timestamp to detect unlanded reports */
>- oa_report_id_clear(stream, (void *)report);
>- oa_timestamp_clear(stream, (void *)report);
>+ oa_report_id_clear(stream, head);
>+ oa_timestamp_clear(stream, head);
> } else {
>- u8 *oa_buf_end = stream->oa_buffer.vaddr + stream->oa_buffer.circ_size;
>- u32 part = oa_buf_end - report;
>+ struct iosys_map *map = &stream->oa_buffer.bo->vmap;
>+ u32 part = stream->oa_buffer.circ_size - head;
>
> /* Zero out the entire report */
> if (report_size <= part) {
>- memset(report, 0, report_size);
>+ xe_map_memset(stream->oa->xe, map, head, 0, report_size);
> } else {
>- memset(report, 0, part);
>- memset(oa_buf_base, 0, report_size - part);
>+ xe_map_memset(stream->oa->xe, map, head, 0, part);
>+ xe_map_memset(stream->oa->xe, map, 0, 0, report_size - part);
> }
> }
> }
>@@ -436,7 +446,8 @@ static void xe_oa_init_oa_buffer(struct xe_oa_stream *stream)
> spin_unlock_irqrestore(&stream->oa_buffer.ptr_lock, flags);
>
> /* Zero out the OA buffer since we rely on zero report id and timestamp fields */
>- memset(stream->oa_buffer.vaddr, 0, xe_bo_size(stream->oa_buffer.bo));
>+ xe_map_memset(stream->oa->xe, &stream->oa_buffer.bo->vmap, 0, 0,
>+ xe_bo_size(stream->oa_buffer.bo));
> }
>
> static u32 __format_to_oactrl(const struct xe_oa_format *format, int counter_sel_mask)
>@@ -891,7 +902,6 @@ static int xe_oa_alloc_oa_buffer(struct xe_oa_stream *stream, size_t size)
> stream->oa_buffer.bo = bo;
> /* mmap implementation requires OA buffer to be in system memory */
> xe_assert(stream->oa->xe, bo->vmap.is_iomem == 0);
>- stream->oa_buffer.vaddr = bo->vmap.vaddr;
> return 0;
> }
>
>diff --git a/drivers/gpu/drm/xe/xe_oa_types.h b/drivers/gpu/drm/xe/xe_oa_types.h
>index b03ffd5134834..2dd550c93b8ac 100644
>--- a/drivers/gpu/drm/xe/xe_oa_types.h
>+++ b/drivers/gpu/drm/xe/xe_oa_types.h
>@@ -162,9 +162,6 @@ struct xe_oa_buffer {
> /** @format: xe_bo backing the OA buffer */
> struct xe_bo *bo;
>
>- /** @vaddr: mapped vaddr of the OA buffer */
>- u8 *vaddr;
>-
> /** @ptr_lock: Lock protecting reads/writes to head/tail pointers */
> spinlock_t ptr_lock;
>
>--
>2.48.1
>
^ permalink raw reply [flat|nested] 25+ messages in thread
* Re: [PATCH 1/3] drm/xe/oa: Use xe_map layer
2026-04-07 22:49 ` Umesh Nerlige Ramappa
@ 2026-04-08 15:32 ` Dixit, Ashutosh
2026-04-08 18:21 ` Umesh Nerlige Ramappa
0 siblings, 1 reply; 25+ messages in thread
From: Dixit, Ashutosh @ 2026-04-08 15:32 UTC (permalink / raw)
To: Umesh Nerlige Ramappa; +Cc: intel-xe
On Tue, 07 Apr 2026 15:49:27 -0700, Umesh Nerlige Ramappa wrote:
>
Hi Umesh,
> On Mon, Apr 06, 2026 at 08:02:17PM -0700, Ashutosh Dixit wrote:
> > @@ -311,30 +317,37 @@ static enum hrtimer_restart xe_oa_poll_check_timer_cb(struct hrtimer *hrtimer)
> > return HRTIMER_RESTART;
> > }
> >
> > +static inline unsigned long
> > +xe_oa_copy_to_user(void __user *dst, const struct iosys_map *src, size_t src_offset, size_t len)
> > +{
> > + if (src->is_iomem)
> > + return copy_to_user(dst, src->vaddr_iomem + src_offset, len);
>
> I think this requires xe_map_memcpy_from for the iomem case.
>
> > + else
> > + return copy_to_user(dst, src->vaddr + src_offset, len);
> > +}
> > +
Yes, you have certainly spotted the iffy part of this patch. However, this
situation is not that simple:
* xe_map_memcpy_from or iosys_map_memcpy_from copies from iomem to kernel
memory (potentially using special accessors for iomem), not to user
memory
* copy_to_user copies from kernel memory to user memory (using special
checks), not from iomem
So in the kernel we don't have functions which copy directly from iomem to
user memory. That is, functions like iosys_map_copy_to_user() or
copy_to_user_from_iomem() do not exist, because they would need to combine
iomem access and copy_to_user (likely copy_to_user will need to be
rewritten with iomem accessors, if such a function were to be
implemented). There may be a reason why such functions don't exist in the
kernel.
Another way might be to copy from iomem to kernel memory and from kernel to
user memory (that is have both iosys_map_memcpy_from and copy_to_user
calls), but this would entail an extra copy into a bounce buffer in kernel
memory. Something like this:
static inline unsigned long
xe_oa_copy_to_user(void __user *dst, const struct iosys_map *src, size_t src_offset, size_t len)
{
void *tmp = kmalloc(len, GFP_KERNEL); // kmalloc or statically allocated tmp buffer
xe_map_memcpy_from(xe, tmp, src, src_offset, len); // extra copy
return copy_to_user(dst, tmp, len);
}
In our case, xe_map_memcpy_from or iosys_map_memcpy_from are basically a
memcpy (because vram bar is ioremapped, special accessors for iomem are not
needed). Therefore, to avoid the extra copy, I decided to drop the iomem
access part (since the copy_to_user part cannot be dropped). Hence I end up
with the xe_oa_copy_to_user() above. The function was tested on BMG with OA
buffer in vram and works as expected.
I did think about renaming the above function xe_map_copy_to_user() and
moving it to xe_map.h, so that is possible. But since there are no other
users (and none are expected) at this point of xe_map_copy_to_user() apart
from OA because of this workaround (other observation stream buffers are in
system memory, not vram), I left the function as specific to OA.
Let me know what you think. Thanks.
^ permalink raw reply [flat|nested] 25+ messages in thread
* Re: [PATCH 1/3] drm/xe/oa: Use xe_map layer
2026-04-08 15:32 ` Dixit, Ashutosh
@ 2026-04-08 18:21 ` Umesh Nerlige Ramappa
0 siblings, 0 replies; 25+ messages in thread
From: Umesh Nerlige Ramappa @ 2026-04-08 18:21 UTC (permalink / raw)
To: Dixit, Ashutosh; +Cc: intel-xe
On Wed, Apr 08, 2026 at 08:32:50AM -0700, Dixit, Ashutosh wrote:
>On Tue, 07 Apr 2026 15:49:27 -0700, Umesh Nerlige Ramappa wrote:
>>
>
>Hi Umesh,
>
>> On Mon, Apr 06, 2026 at 08:02:17PM -0700, Ashutosh Dixit wrote:
>> > @@ -311,30 +317,37 @@ static enum hrtimer_restart xe_oa_poll_check_timer_cb(struct hrtimer *hrtimer)
>> > return HRTIMER_RESTART;
>> > }
>> >
>> > +static inline unsigned long
>> > +xe_oa_copy_to_user(void __user *dst, const struct iosys_map *src, size_t src_offset, size_t len)
>> > +{
>> > + if (src->is_iomem)
>> > + return copy_to_user(dst, src->vaddr_iomem + src_offset, len);
>>
>> I think this requires xe_map_memcpy_from for the iomem case.
>>
>> > + else
>> > + return copy_to_user(dst, src->vaddr + src_offset, len);
>> > +}
>> > +
>
>Yes, you have certainly spotted the iffy part of this patch. However, this
>situation is not that simple:
>
>* xe_map_memcpy_from or iosys_map_memcpy_from copies from iomem to kernel
> memory (potentially using special accessors for iomem), not to user
> memory
>
>* copy_to_user copies from kernel memory to user memory (using special
> checks), not from iomem
>
>So in the kernel we don't have functions which copy directly from iomem to
>user memory. That is, functions like iosys_map_copy_to_user() or
>copy_to_user_from_iomem() do not exist, because they would need to combine
>iomem access and copy_to_user (likely copy_to_user will need to be
>rewritten with iomem accessors, if such a function were to be
>implemented). There may be a reason why such functions don't exist in the
>kernel.
>
>Another way might be to copy from iomem to kernel memory and from kernel to
>user memory (that is have both iosys_map_memcpy_from and copy_to_user
>calls), but this would entail an extra copy into a bounce buffer in kernel
>memory. Something like this:
>
>static inline unsigned long
>xe_oa_copy_to_user(void __user *dst, const struct iosys_map *src, size_t src_offset, size_t len)
>{
> void *tmp = kmalloc(len, GFP_KERNEL); // kmalloc or statically allocated tmp buffer
>
> xe_map_memcpy_from(xe, tmp, src, src_offset, len); // extra copy
>
> return copy_to_user(dst, tmp, len);
>}
I did notice that there wasn't anything like copy_from_iomem_to_user. I
was imagining you could copy over 32-bit iomem words into the user
buffer using the iosys_map accessors. That should be possible and would
avoid an intermediate buffer.
>
>In our case, xe_map_memcpy_from or iosys_map_memcpy_from are basically a
>memcpy (because vram bar is ioremapped, special accessors for iomem are not
>needed).
Hmm, then why even have this patch :). fwiu, you are accessing the OA
buffer using iosys_map accessors in this patch. It should work without
it because of the above statement.
If we go the iosys_map route, then let's do it consistently. If not, we
should just drop this patch. I would lean into reading 32bit words into
the user buffer.
Regards,
Umesh
>Therefore, to avoid the extra copy, I decided to drop the iomem
>access part (since the copy_to_user part cannot be dropped). Hence I end up
>with the xe_oa_copy_to_user() above. The function was tested on BMG with OA
>buffer in vram and works as expected.
>
>I did think about renaming the above function xe_map_copy_to_user() and
>moving it to xe_map.h, so that is possible. But since there are no other
>users (and none are expected) at this point of xe_map_copy_to_user() apart
>from OA because of this workaround (other observation stream buffers are in
>system memory, not vram), I left the function as specific to OA.
>
>Let me know what you think. Thanks.
^ permalink raw reply [flat|nested] 25+ messages in thread
* [PATCH 1/3] drm/xe/oa: Use xe_map layer
2026-04-09 23:17 [PATCH v2 0/3] drm/xe/oa: Wa_14026633728 Ashutosh Dixit
@ 2026-04-09 23:17 ` Ashutosh Dixit
2026-04-11 0:49 ` Dixit, Ashutosh
0 siblings, 1 reply; 25+ messages in thread
From: Ashutosh Dixit @ 2026-04-09 23:17 UTC (permalink / raw)
To: intel-xe; +Cc: Umesh Nerlige Ramappa
OA code should have used xe_map layer to begin with. In CRI, the OA buffer
can be located both in system and device memory. For these reasons, move OA
code to use the xe_map layer when accessing the OA buffer.
v2: Use xe_map layer also in xe_oa_copy_to_user (Umesh)
Signed-off-by: Ashutosh Dixit <ashutosh.dixit@intel.com>
---
drivers/gpu/drm/xe/xe_oa.c | 98 +++++++++++++++++++-------------
drivers/gpu/drm/xe/xe_oa_types.h | 3 -
2 files changed, 59 insertions(+), 42 deletions(-)
diff --git a/drivers/gpu/drm/xe/xe_oa.c b/drivers/gpu/drm/xe/xe_oa.c
index 6337e671c97ae..ff2ddec8ffa65 100644
--- a/drivers/gpu/drm/xe/xe_oa.c
+++ b/drivers/gpu/drm/xe/xe_oa.c
@@ -213,32 +213,40 @@ static u32 xe_oa_hw_tail_read(struct xe_oa_stream *stream)
#define oa_report_header_64bit(__s) \
((__s)->oa_buffer.format->header == HDR_64_BIT)
-static u64 oa_report_id(struct xe_oa_stream *stream, void *report)
+static u64 oa_report_id(struct xe_oa_stream *stream, u32 tail)
{
- return oa_report_header_64bit(stream) ? *(u64 *)report : *(u32 *)report;
+ struct iosys_map *map = &stream->oa_buffer.bo->vmap;
+
+ return oa_report_header_64bit(stream) ?
+ xe_map_rd(stream->oa->xe, map, tail, u64) :
+ xe_map_rd(stream->oa->xe, map, tail, u32);
}
-static void oa_report_id_clear(struct xe_oa_stream *stream, u32 *report)
+static void oa_report_id_clear(struct xe_oa_stream *stream, u32 head)
{
- if (oa_report_header_64bit(stream))
- *(u64 *)report = 0;
- else
- *report = 0;
+ struct iosys_map *map = &stream->oa_buffer.bo->vmap;
+
+ oa_report_header_64bit(stream) ?
+ xe_map_wr(stream->oa->xe, map, head, u64, 0) :
+ xe_map_wr(stream->oa->xe, map, head, u32, 0);
}
-static u64 oa_timestamp(struct xe_oa_stream *stream, void *report)
+static u64 oa_timestamp(struct xe_oa_stream *stream, u32 tail)
{
+ struct iosys_map *map = &stream->oa_buffer.bo->vmap;
+
return oa_report_header_64bit(stream) ?
- *((u64 *)report + 1) :
- *((u32 *)report + 1);
+ xe_map_rd(stream->oa->xe, map, tail + 2, u64) :
+ xe_map_rd(stream->oa->xe, map, tail + 1, u32);
}
-static void oa_timestamp_clear(struct xe_oa_stream *stream, u32 *report)
+static void oa_timestamp_clear(struct xe_oa_stream *stream, u32 head)
{
- if (oa_report_header_64bit(stream))
- *(u64 *)&report[2] = 0;
- else
- report[1] = 0;
+ struct iosys_map *map = &stream->oa_buffer.bo->vmap;
+
+ oa_report_header_64bit(stream) ?
+ xe_map_wr(stream->oa->xe, map, head + 2, u64, 0) :
+ xe_map_wr(stream->oa->xe, map, head + 1, u32, 0);
}
static bool xe_oa_buffer_check_unlocked(struct xe_oa_stream *stream)
@@ -275,9 +283,7 @@ static bool xe_oa_buffer_check_unlocked(struct xe_oa_stream *stream)
* they were written. If not : (╯°□°)╯︵ ┻━┻
*/
while (xe_oa_circ_diff(stream, tail, stream->oa_buffer.tail) >= report_size) {
- void *report = stream->oa_buffer.vaddr + tail;
-
- if (oa_report_id(stream, report) || oa_timestamp(stream, report))
+ if (oa_report_id(stream, tail) || oa_timestamp(stream, tail))
break;
tail = xe_oa_circ_diff(stream, tail, report_size);
@@ -311,30 +317,47 @@ static enum hrtimer_restart xe_oa_poll_check_timer_cb(struct hrtimer *hrtimer)
return HRTIMER_RESTART;
}
+/* Because there is no iosys_map_copy_to_user() or copy_to_user_from_iomem() */
+static int xe_oa_copy_to_user(struct xe_oa_stream *stream, void __user *dst, u32 head, u32 len)
+{
+ u32 __user *dptr = dst;
+
+ xe_gt_assert(stream->gt, len % 4 == 0);
+
+ for (int i = 0; i < len / 4; i++) {
+ struct iosys_map *map = &stream->oa_buffer.bo->vmap;
+ u32 val = xe_map_rd(stream->oa->xe, map, head, u32);
+ int ret = put_user(val, dptr++);
+
+ if (ret)
+ return ret;
+ head += 4;
+ }
+
+ return 0;
+}
+
static int xe_oa_append_report(struct xe_oa_stream *stream, char __user *buf,
- size_t count, size_t *offset, const u8 *report)
+ size_t count, size_t *offset, u32 head)
{
int report_size = stream->oa_buffer.format->size;
int report_size_partial;
- u8 *oa_buf_end;
if ((count - *offset) < report_size)
return -ENOSPC;
buf += *offset;
- oa_buf_end = stream->oa_buffer.vaddr + stream->oa_buffer.circ_size;
- report_size_partial = oa_buf_end - report;
+ report_size_partial = stream->oa_buffer.circ_size - head;
if (report_size_partial < report_size) {
- if (copy_to_user(buf, report, report_size_partial))
+ if (xe_oa_copy_to_user(stream, buf, head, report_size_partial))
return -EFAULT;
buf += report_size_partial;
- if (copy_to_user(buf, stream->oa_buffer.vaddr,
- report_size - report_size_partial))
+ if (xe_oa_copy_to_user(stream, buf, 0, report_size - report_size_partial))
return -EFAULT;
- } else if (copy_to_user(buf, report, report_size)) {
+ } else if (xe_oa_copy_to_user(stream, buf, head, report_size)) {
return -EFAULT;
}
@@ -347,7 +370,6 @@ static int xe_oa_append_reports(struct xe_oa_stream *stream, char __user *buf,
size_t count, size_t *offset)
{
int report_size = stream->oa_buffer.format->size;
- u8 *oa_buf_base = stream->oa_buffer.vaddr;
u32 gtt_offset = xe_bo_ggtt_addr(stream->oa_buffer.bo);
size_t start_offset = *offset;
unsigned long flags;
@@ -364,26 +386,24 @@ static int xe_oa_append_reports(struct xe_oa_stream *stream, char __user *buf,
for (; xe_oa_circ_diff(stream, tail, head);
head = xe_oa_circ_incr(stream, head, report_size)) {
- u8 *report = oa_buf_base + head;
-
- ret = xe_oa_append_report(stream, buf, count, offset, report);
+ ret = xe_oa_append_report(stream, buf, count, offset, head);
if (ret)
break;
if (!(stream->oa_buffer.circ_size % report_size)) {
/* Clear out report id and timestamp to detect unlanded reports */
- oa_report_id_clear(stream, (void *)report);
- oa_timestamp_clear(stream, (void *)report);
+ oa_report_id_clear(stream, head);
+ oa_timestamp_clear(stream, head);
} else {
- u8 *oa_buf_end = stream->oa_buffer.vaddr + stream->oa_buffer.circ_size;
- u32 part = oa_buf_end - report;
+ struct iosys_map *map = &stream->oa_buffer.bo->vmap;
+ u32 part = stream->oa_buffer.circ_size - head;
/* Zero out the entire report */
if (report_size <= part) {
- memset(report, 0, report_size);
+ xe_map_memset(stream->oa->xe, map, head, 0, report_size);
} else {
- memset(report, 0, part);
- memset(oa_buf_base, 0, report_size - part);
+ xe_map_memset(stream->oa->xe, map, head, 0, part);
+ xe_map_memset(stream->oa->xe, map, 0, 0, report_size - part);
}
}
}
@@ -436,7 +456,8 @@ static void xe_oa_init_oa_buffer(struct xe_oa_stream *stream)
spin_unlock_irqrestore(&stream->oa_buffer.ptr_lock, flags);
/* Zero out the OA buffer since we rely on zero report id and timestamp fields */
- memset(stream->oa_buffer.vaddr, 0, xe_bo_size(stream->oa_buffer.bo));
+ xe_map_memset(stream->oa->xe, &stream->oa_buffer.bo->vmap, 0, 0,
+ xe_bo_size(stream->oa_buffer.bo));
}
static u32 __format_to_oactrl(const struct xe_oa_format *format, int counter_sel_mask)
@@ -891,7 +912,6 @@ static int xe_oa_alloc_oa_buffer(struct xe_oa_stream *stream, size_t size)
stream->oa_buffer.bo = bo;
/* mmap implementation requires OA buffer to be in system memory */
xe_assert(stream->oa->xe, bo->vmap.is_iomem == 0);
- stream->oa_buffer.vaddr = bo->vmap.vaddr;
return 0;
}
diff --git a/drivers/gpu/drm/xe/xe_oa_types.h b/drivers/gpu/drm/xe/xe_oa_types.h
index b03ffd5134834..2dd550c93b8ac 100644
--- a/drivers/gpu/drm/xe/xe_oa_types.h
+++ b/drivers/gpu/drm/xe/xe_oa_types.h
@@ -162,9 +162,6 @@ struct xe_oa_buffer {
/** @format: xe_bo backing the OA buffer */
struct xe_bo *bo;
- /** @vaddr: mapped vaddr of the OA buffer */
- u8 *vaddr;
-
/** @ptr_lock: Lock protecting reads/writes to head/tail pointers */
spinlock_t ptr_lock;
--
2.48.1
^ permalink raw reply related [flat|nested] 25+ messages in thread
* [PATCH 1/3] drm/xe/oa: Use xe_map layer
2026-04-11 0:48 [PATCH v3 0/3] drm/xe/oa: Wa_14026633728 Ashutosh Dixit
@ 2026-04-11 0:48 ` Ashutosh Dixit
0 siblings, 0 replies; 25+ messages in thread
From: Ashutosh Dixit @ 2026-04-11 0:48 UTC (permalink / raw)
To: intel-xe; +Cc: Umesh Nerlige Ramappa
OA code should have used xe_map layer to begin with. In CRI, the OA buffer
can be located both in system and device memory. For these reasons, move OA
code to use the xe_map layer when accessing the OA buffer.
v2: Use xe_map layer also in xe_oa_copy_to_user (Umesh)
v3: To avoid performance impact in v2, revert to v1 but move
xe_map_copy_to_user() to xe_map.h
Signed-off-by: Ashutosh Dixit <ashutosh.dixit@intel.com>
---
drivers/gpu/drm/xe/xe_map.h | 14 ++++++
drivers/gpu/drm/xe/xe_oa.c | 80 ++++++++++++++++----------------
drivers/gpu/drm/xe/xe_oa_types.h | 3 --
3 files changed, 55 insertions(+), 42 deletions(-)
diff --git a/drivers/gpu/drm/xe/xe_map.h b/drivers/gpu/drm/xe/xe_map.h
index c447771256913..8007fcb342880 100644
--- a/drivers/gpu/drm/xe/xe_map.h
+++ b/drivers/gpu/drm/xe/xe_map.h
@@ -44,6 +44,20 @@ static inline void xe_map_memset(struct xe_device *xe,
iosys_map_memset(dst, offset, value, len);
}
+static inline unsigned long
+xe_map_copy_to_user(struct xe_device *xe, void __user *dst,
+ const struct iosys_map *src,
+ size_t src_offset, size_t len)
+{
+ xe_device_assert_mem_access(xe);
+
+ /* Because there is no iosys_map_copy_to_user() or copy_to_user_from_iomem() */
+ if (src->is_iomem)
+ return copy_to_user(dst, src->vaddr_iomem + src_offset, len);
+ else
+ return copy_to_user(dst, src->vaddr + src_offset, len);
+}
+
/* FIXME: We likely should kill these two functions sooner or later */
static inline u32 xe_map_read32(struct xe_device *xe, struct iosys_map *map)
{
diff --git a/drivers/gpu/drm/xe/xe_oa.c b/drivers/gpu/drm/xe/xe_oa.c
index 6337e671c97ae..d25beb6ac6040 100644
--- a/drivers/gpu/drm/xe/xe_oa.c
+++ b/drivers/gpu/drm/xe/xe_oa.c
@@ -213,32 +213,40 @@ static u32 xe_oa_hw_tail_read(struct xe_oa_stream *stream)
#define oa_report_header_64bit(__s) \
((__s)->oa_buffer.format->header == HDR_64_BIT)
-static u64 oa_report_id(struct xe_oa_stream *stream, void *report)
+static u64 oa_report_id(struct xe_oa_stream *stream, u32 tail)
{
- return oa_report_header_64bit(stream) ? *(u64 *)report : *(u32 *)report;
+ struct iosys_map *map = &stream->oa_buffer.bo->vmap;
+
+ return oa_report_header_64bit(stream) ?
+ xe_map_rd(stream->oa->xe, map, tail, u64) :
+ xe_map_rd(stream->oa->xe, map, tail, u32);
}
-static void oa_report_id_clear(struct xe_oa_stream *stream, u32 *report)
+static void oa_report_id_clear(struct xe_oa_stream *stream, u32 head)
{
- if (oa_report_header_64bit(stream))
- *(u64 *)report = 0;
- else
- *report = 0;
+ struct iosys_map *map = &stream->oa_buffer.bo->vmap;
+
+ oa_report_header_64bit(stream) ?
+ xe_map_wr(stream->oa->xe, map, head, u64, 0) :
+ xe_map_wr(stream->oa->xe, map, head, u32, 0);
}
-static u64 oa_timestamp(struct xe_oa_stream *stream, void *report)
+static u64 oa_timestamp(struct xe_oa_stream *stream, u32 tail)
{
+ struct iosys_map *map = &stream->oa_buffer.bo->vmap;
+
return oa_report_header_64bit(stream) ?
- *((u64 *)report + 1) :
- *((u32 *)report + 1);
+ xe_map_rd(stream->oa->xe, map, tail + 2, u64) :
+ xe_map_rd(stream->oa->xe, map, tail + 1, u32);
}
-static void oa_timestamp_clear(struct xe_oa_stream *stream, u32 *report)
+static void oa_timestamp_clear(struct xe_oa_stream *stream, u32 head)
{
- if (oa_report_header_64bit(stream))
- *(u64 *)&report[2] = 0;
- else
- report[1] = 0;
+ struct iosys_map *map = &stream->oa_buffer.bo->vmap;
+
+ oa_report_header_64bit(stream) ?
+ xe_map_wr(stream->oa->xe, map, head + 2, u64, 0) :
+ xe_map_wr(stream->oa->xe, map, head + 1, u32, 0);
}
static bool xe_oa_buffer_check_unlocked(struct xe_oa_stream *stream)
@@ -275,9 +283,7 @@ static bool xe_oa_buffer_check_unlocked(struct xe_oa_stream *stream)
* they were written. If not : (╯°□°)╯︵ ┻━┻
*/
while (xe_oa_circ_diff(stream, tail, stream->oa_buffer.tail) >= report_size) {
- void *report = stream->oa_buffer.vaddr + tail;
-
- if (oa_report_id(stream, report) || oa_timestamp(stream, report))
+ if (oa_report_id(stream, tail) || oa_timestamp(stream, tail))
break;
tail = xe_oa_circ_diff(stream, tail, report_size);
@@ -312,29 +318,28 @@ static enum hrtimer_restart xe_oa_poll_check_timer_cb(struct hrtimer *hrtimer)
}
static int xe_oa_append_report(struct xe_oa_stream *stream, char __user *buf,
- size_t count, size_t *offset, const u8 *report)
+ size_t count, size_t *offset, u32 head)
{
+ struct iosys_map *map = &stream->oa_buffer.bo->vmap;
int report_size = stream->oa_buffer.format->size;
+ struct xe_device *xe = stream->oa->xe;
int report_size_partial;
- u8 *oa_buf_end;
if ((count - *offset) < report_size)
return -ENOSPC;
buf += *offset;
- oa_buf_end = stream->oa_buffer.vaddr + stream->oa_buffer.circ_size;
- report_size_partial = oa_buf_end - report;
+ report_size_partial = stream->oa_buffer.circ_size - head;
if (report_size_partial < report_size) {
- if (copy_to_user(buf, report, report_size_partial))
+ if (xe_map_copy_to_user(xe, buf, map, head, report_size_partial))
return -EFAULT;
buf += report_size_partial;
- if (copy_to_user(buf, stream->oa_buffer.vaddr,
- report_size - report_size_partial))
+ if (xe_map_copy_to_user(xe, buf, map, 0, report_size - report_size_partial))
return -EFAULT;
- } else if (copy_to_user(buf, report, report_size)) {
+ } else if (xe_map_copy_to_user(xe, buf, map, head, report_size)) {
return -EFAULT;
}
@@ -347,7 +352,6 @@ static int xe_oa_append_reports(struct xe_oa_stream *stream, char __user *buf,
size_t count, size_t *offset)
{
int report_size = stream->oa_buffer.format->size;
- u8 *oa_buf_base = stream->oa_buffer.vaddr;
u32 gtt_offset = xe_bo_ggtt_addr(stream->oa_buffer.bo);
size_t start_offset = *offset;
unsigned long flags;
@@ -364,26 +368,24 @@ static int xe_oa_append_reports(struct xe_oa_stream *stream, char __user *buf,
for (; xe_oa_circ_diff(stream, tail, head);
head = xe_oa_circ_incr(stream, head, report_size)) {
- u8 *report = oa_buf_base + head;
-
- ret = xe_oa_append_report(stream, buf, count, offset, report);
+ ret = xe_oa_append_report(stream, buf, count, offset, head);
if (ret)
break;
if (!(stream->oa_buffer.circ_size % report_size)) {
/* Clear out report id and timestamp to detect unlanded reports */
- oa_report_id_clear(stream, (void *)report);
- oa_timestamp_clear(stream, (void *)report);
+ oa_report_id_clear(stream, head);
+ oa_timestamp_clear(stream, head);
} else {
- u8 *oa_buf_end = stream->oa_buffer.vaddr + stream->oa_buffer.circ_size;
- u32 part = oa_buf_end - report;
+ struct iosys_map *map = &stream->oa_buffer.bo->vmap;
+ u32 part = stream->oa_buffer.circ_size - head;
/* Zero out the entire report */
if (report_size <= part) {
- memset(report, 0, report_size);
+ xe_map_memset(stream->oa->xe, map, head, 0, report_size);
} else {
- memset(report, 0, part);
- memset(oa_buf_base, 0, report_size - part);
+ xe_map_memset(stream->oa->xe, map, head, 0, part);
+ xe_map_memset(stream->oa->xe, map, 0, 0, report_size - part);
}
}
}
@@ -436,7 +438,8 @@ static void xe_oa_init_oa_buffer(struct xe_oa_stream *stream)
spin_unlock_irqrestore(&stream->oa_buffer.ptr_lock, flags);
/* Zero out the OA buffer since we rely on zero report id and timestamp fields */
- memset(stream->oa_buffer.vaddr, 0, xe_bo_size(stream->oa_buffer.bo));
+ xe_map_memset(stream->oa->xe, &stream->oa_buffer.bo->vmap, 0, 0,
+ xe_bo_size(stream->oa_buffer.bo));
}
static u32 __format_to_oactrl(const struct xe_oa_format *format, int counter_sel_mask)
@@ -891,7 +894,6 @@ static int xe_oa_alloc_oa_buffer(struct xe_oa_stream *stream, size_t size)
stream->oa_buffer.bo = bo;
/* mmap implementation requires OA buffer to be in system memory */
xe_assert(stream->oa->xe, bo->vmap.is_iomem == 0);
- stream->oa_buffer.vaddr = bo->vmap.vaddr;
return 0;
}
diff --git a/drivers/gpu/drm/xe/xe_oa_types.h b/drivers/gpu/drm/xe/xe_oa_types.h
index b03ffd5134834..2dd550c93b8ac 100644
--- a/drivers/gpu/drm/xe/xe_oa_types.h
+++ b/drivers/gpu/drm/xe/xe_oa_types.h
@@ -162,9 +162,6 @@ struct xe_oa_buffer {
/** @format: xe_bo backing the OA buffer */
struct xe_bo *bo;
- /** @vaddr: mapped vaddr of the OA buffer */
- u8 *vaddr;
-
/** @ptr_lock: Lock protecting reads/writes to head/tail pointers */
spinlock_t ptr_lock;
--
2.48.1
^ permalink raw reply related [flat|nested] 25+ messages in thread
* Re: [PATCH 1/3] drm/xe/oa: Use xe_map layer
2026-04-09 23:17 ` [PATCH 1/3] drm/xe/oa: Use xe_map layer Ashutosh Dixit
@ 2026-04-11 0:49 ` Dixit, Ashutosh
2026-04-13 18:05 ` Umesh Nerlige Ramappa
0 siblings, 1 reply; 25+ messages in thread
From: Dixit, Ashutosh @ 2026-04-11 0:49 UTC (permalink / raw)
To: intel-xe; +Cc: Umesh Nerlige Ramappa
On Thu, 09 Apr 2026 16:17:52 -0700, Ashutosh Dixit wrote:
>
Hi Umesh,
> +/* Because there is no iosys_map_copy_to_user() or copy_to_user_from_iomem() */
> +static int xe_oa_copy_to_user(struct xe_oa_stream *stream, void __user *dst, u32 head, u32 len)
> +{
> + u32 __user *dptr = dst;
> +
> + xe_gt_assert(stream->gt, len % 4 == 0);
> +
> + for (int i = 0; i < len / 4; i++) {
> + struct iosys_map *map = &stream->oa_buffer.bo->vmap;
> + u32 val = xe_map_rd(stream->oa->xe, map, head, u32);
> + int ret = put_user(val, dptr++);
> +
> + if (ret)
> + return ret;
> + head += 4;
> + }
> +
> + return 0;
> +}
Unfortunately, with this copy_to_user() implementation we are seeing buffer
overflow, implying there is significant performance impact, even when OA
buffer is in system memory:
Starting subtest: non-zero-reason
Starting dynamic subtest: oag-0
(xe_oa:6798) CRITICAL: Test assertion failure function test_non_zero_reason, file ../tests/intel/xe_oa.c:2693:
(xe_oa:6798) CRITICAL: Failed assertion: !(oa_status & DRM_XE_OASTATUS_BUFFER_OVERFLOW)
That is, we are unable to read data at the rate at which HW is writing
data, causing buffer overrun.
Therefore in v3 I have basically reverted to the copy_to_user()
implementation of v1, except that I have taken the copy_to_user() out of
xe_oa.c and moved it to xe_map.h, to make that implementation "official"
(and OA code to strictly use the xe_map layer). This seems to be the best
approach to me, it avoids unnecessary copies and works correctly on all of
our systems of interest.
Also, the previous code, without this patch, is of course doing this exact
same thing, except that the use of the xe_map layer make these assumptions
explicit.
If you still don't think we should be doing this, I can try to get a second
opinion about this patch. Thanks.
^ permalink raw reply [flat|nested] 25+ messages in thread
* Re: [PATCH 1/3] drm/xe/oa: Use xe_map layer
2026-04-11 0:49 ` Dixit, Ashutosh
@ 2026-04-13 18:05 ` Umesh Nerlige Ramappa
2026-04-13 22:06 ` Dixit, Ashutosh
0 siblings, 1 reply; 25+ messages in thread
From: Umesh Nerlige Ramappa @ 2026-04-13 18:05 UTC (permalink / raw)
To: Dixit, Ashutosh; +Cc: intel-xe
On Fri, Apr 10, 2026 at 05:49:03PM -0700, Dixit, Ashutosh wrote:
>On Thu, 09 Apr 2026 16:17:52 -0700, Ashutosh Dixit wrote:
>>
>
>Hi Umesh,
>
>> +/* Because there is no iosys_map_copy_to_user() or copy_to_user_from_iomem() */
>> +static int xe_oa_copy_to_user(struct xe_oa_stream *stream, void __user *dst, u32 head, u32 len)
>> +{
>> + u32 __user *dptr = dst;
>> +
>> + xe_gt_assert(stream->gt, len % 4 == 0);
>> +
>> + for (int i = 0; i < len / 4; i++) {
>> + struct iosys_map *map = &stream->oa_buffer.bo->vmap;
>> + u32 val = xe_map_rd(stream->oa->xe, map, head, u32);
>> + int ret = put_user(val, dptr++);
>> +
>> + if (ret)
>> + return ret;
>> + head += 4;
>> + }
>> +
>> + return 0;
>> +}
>
>Unfortunately, with this copy_to_user() implementation we are seeing buffer
>overflow, implying there is significant performance impact, even when OA
>buffer is in system memory:
>
> Starting subtest: non-zero-reason
> Starting dynamic subtest: oag-0
> (xe_oa:6798) CRITICAL: Test assertion failure function test_non_zero_reason, file ../tests/intel/xe_oa.c:2693:
> (xe_oa:6798) CRITICAL: Failed assertion: !(oa_status & DRM_XE_OASTATUS_BUFFER_OVERFLOW)
>
>That is, we are unable to read data at the rate at which HW is writing
>data, causing buffer overrun.
>
>Therefore in v3 I have basically reverted to the copy_to_user()
>implementation of v1, except that I have taken the copy_to_user() out of
>xe_oa.c and moved it to xe_map.h, to make that implementation "official"
>(and OA code to strictly use the xe_map layer). This seems to be the best
>approach to me, it avoids unnecessary copies and works correctly on all of
>our systems of interest.
>
>Also, the previous code, without this patch, is of course doing this exact
>same thing, except that the use of the xe_map layer make these assumptions
>explicit.
>
>If you still don't think we should be doing this, I can try to get a second
>opinion about this patch. Thanks.
Yes, please include someone who can give us an Ack or some inputs on
this approach because we are hitting some issues.
Thanks,
Umesh
^ permalink raw reply [flat|nested] 25+ messages in thread
* Re: [PATCH 1/3] drm/xe/oa: Use xe_map layer
2026-04-13 18:05 ` Umesh Nerlige Ramappa
@ 2026-04-13 22:06 ` Dixit, Ashutosh
0 siblings, 0 replies; 25+ messages in thread
From: Dixit, Ashutosh @ 2026-04-13 22:06 UTC (permalink / raw)
To: Umesh Nerlige Ramappa; +Cc: intel-xe
On Mon, 13 Apr 2026 11:05:09 -0700, Umesh Nerlige Ramappa wrote:
>
> On Fri, Apr 10, 2026 at 05:49:03PM -0700, Dixit, Ashutosh wrote:
> > On Thu, 09 Apr 2026 16:17:52 -0700, Ashutosh Dixit wrote:
> >>
> >
> > Hi Umesh,
> >
> >> +/* Because there is no iosys_map_copy_to_user() or copy_to_user_from_iomem() */
> >> +static int xe_oa_copy_to_user(struct xe_oa_stream *stream, void __user *dst, u32 head, u32 len)
> >> +{
> >> + u32 __user *dptr = dst;
> >> +
> >> + xe_gt_assert(stream->gt, len % 4 == 0);
> >> +
> >> + for (int i = 0; i < len / 4; i++) {
> >> + struct iosys_map *map = &stream->oa_buffer.bo->vmap;
> >> + u32 val = xe_map_rd(stream->oa->xe, map, head, u32);
> >> + int ret = put_user(val, dptr++);
> >> +
> >> + if (ret)
> >> + return ret;
> >> + head += 4;
> >> + }
> >> +
> >> + return 0;
> >> +}
> >
> > Unfortunately, with this copy_to_user() implementation we are seeing buffer
> > overflow, implying there is significant performance impact, even when OA
> > buffer is in system memory:
> >
> > Starting subtest: non-zero-reason
> > Starting dynamic subtest: oag-0
> > (xe_oa:6798) CRITICAL: Test assertion failure function test_non_zero_reason, file ../tests/intel/xe_oa.c:2693:
> > (xe_oa:6798) CRITICAL: Failed assertion: !(oa_status & DRM_XE_OASTATUS_BUFFER_OVERFLOW)
> >
> > That is, we are unable to read data at the rate at which HW is writing
> > data, causing buffer overrun.
> >
> > Therefore in v3 I have basically reverted to the copy_to_user()
> > implementation of v1, except that I have taken the copy_to_user() out of
> > xe_oa.c and moved it to xe_map.h, to make that implementation "official"
> > (and OA code to strictly use the xe_map layer). This seems to be the best
> > approach to me, it avoids unnecessary copies and works correctly on all of
> > our systems of interest.
> >
> > Also, the previous code, without this patch, is of course doing this exact
> > same thing, except that the use of the xe_map layer make these assumptions
> > explicit.
> >
> > If you still don't think we should be doing this, I can try to get a second
> > opinion about this patch. Thanks.
>
> Yes, please include someone who can give us an Ack or some inputs on this
> approach because we are hitting some issues.
I have asked. What I posted in v3 is my preferred option since it avoids the
extra copy. But if that doesn't go through, the function below has the extra
copy, but seems to have the same performace as v3, so that is also an option:
static int xe_oa_copy_to_user(struct xe_oa_stream *stream, void __user *dst, u32 head, u32 len)
{
struct iosys_map *map = &stream->oa_buffer.bo->vmap;
u8 buf[1024]; // Allocate as part of stream
xe_map_memcpy_from(xe, buf, map, head, len);
return copy_to_user(dst, buf, len);
}
So I am planning to try this next, if v3 doesn't go through. Thanks.
^ permalink raw reply [flat|nested] 25+ messages in thread
* [PATCH 1/3] drm/xe/oa: Use xe_map layer
2026-04-15 2:03 [PATCH v4 0/3] drm/xe/oa: Wa_14026633728 Ashutosh Dixit
@ 2026-04-15 2:03 ` Ashutosh Dixit
2026-04-24 21:05 ` Umesh Nerlige Ramappa
0 siblings, 1 reply; 25+ messages in thread
From: Ashutosh Dixit @ 2026-04-15 2:03 UTC (permalink / raw)
To: intel-xe; +Cc: Umesh Nerlige Ramappa
OA code should have used xe_map layer to begin with. In CRI, the OA buffer
can be located both in system and device memory. For these reasons, move OA
code to use the xe_map layer when accessing the OA buffer.
v2: Use xe_map layer and put_user in xe_oa_copy_to_user (Umesh)
v3: To avoid performance impact in v2, revert to v1 but move
xe_map_copy_to_user() to xe_map.h
v4: Use bounce buffer and copy_to_user in xe_oa_copy_to_user
Signed-off-by: Ashutosh Dixit <ashutosh.dixit@intel.com>
---
drivers/gpu/drm/xe/xe_oa.c | 95 +++++++++++++++++++-------------
drivers/gpu/drm/xe/xe_oa_types.h | 4 +-
2 files changed, 58 insertions(+), 41 deletions(-)
diff --git a/drivers/gpu/drm/xe/xe_oa.c b/drivers/gpu/drm/xe/xe_oa.c
index 6337e671c97ae..4d13d8ef8103f 100644
--- a/drivers/gpu/drm/xe/xe_oa.c
+++ b/drivers/gpu/drm/xe/xe_oa.c
@@ -213,32 +213,40 @@ static u32 xe_oa_hw_tail_read(struct xe_oa_stream *stream)
#define oa_report_header_64bit(__s) \
((__s)->oa_buffer.format->header == HDR_64_BIT)
-static u64 oa_report_id(struct xe_oa_stream *stream, void *report)
+static u64 oa_report_id(struct xe_oa_stream *stream, u32 tail)
{
- return oa_report_header_64bit(stream) ? *(u64 *)report : *(u32 *)report;
+ struct iosys_map *map = &stream->oa_buffer.bo->vmap;
+
+ return oa_report_header_64bit(stream) ?
+ xe_map_rd(stream->oa->xe, map, tail, u64) :
+ xe_map_rd(stream->oa->xe, map, tail, u32);
}
-static void oa_report_id_clear(struct xe_oa_stream *stream, u32 *report)
+static void oa_report_id_clear(struct xe_oa_stream *stream, u32 head)
{
- if (oa_report_header_64bit(stream))
- *(u64 *)report = 0;
- else
- *report = 0;
+ struct iosys_map *map = &stream->oa_buffer.bo->vmap;
+
+ oa_report_header_64bit(stream) ?
+ xe_map_wr(stream->oa->xe, map, head, u64, 0) :
+ xe_map_wr(stream->oa->xe, map, head, u32, 0);
}
-static u64 oa_timestamp(struct xe_oa_stream *stream, void *report)
+static u64 oa_timestamp(struct xe_oa_stream *stream, u32 tail)
{
+ struct iosys_map *map = &stream->oa_buffer.bo->vmap;
+
return oa_report_header_64bit(stream) ?
- *((u64 *)report + 1) :
- *((u32 *)report + 1);
+ xe_map_rd(stream->oa->xe, map, tail + 2, u64) :
+ xe_map_rd(stream->oa->xe, map, tail + 1, u32);
}
-static void oa_timestamp_clear(struct xe_oa_stream *stream, u32 *report)
+static void oa_timestamp_clear(struct xe_oa_stream *stream, u32 head)
{
- if (oa_report_header_64bit(stream))
- *(u64 *)&report[2] = 0;
- else
- report[1] = 0;
+ struct iosys_map *map = &stream->oa_buffer.bo->vmap;
+
+ oa_report_header_64bit(stream) ?
+ xe_map_wr(stream->oa->xe, map, head + 2, u64, 0) :
+ xe_map_wr(stream->oa->xe, map, head + 1, u32, 0);
}
static bool xe_oa_buffer_check_unlocked(struct xe_oa_stream *stream)
@@ -275,9 +283,7 @@ static bool xe_oa_buffer_check_unlocked(struct xe_oa_stream *stream)
* they were written. If not : (╯°□°)╯︵ ┻━┻
*/
while (xe_oa_circ_diff(stream, tail, stream->oa_buffer.tail) >= report_size) {
- void *report = stream->oa_buffer.vaddr + tail;
-
- if (oa_report_id(stream, report) || oa_timestamp(stream, report))
+ if (oa_report_id(stream, tail) || oa_timestamp(stream, tail))
break;
tail = xe_oa_circ_diff(stream, tail, report_size);
@@ -311,30 +317,35 @@ static enum hrtimer_restart xe_oa_poll_check_timer_cb(struct hrtimer *hrtimer)
return HRTIMER_RESTART;
}
+static unsigned long
+xe_oa_copy_to_user(struct xe_oa_stream *stream, void __user *dst, u32 head, u32 len)
+{
+ xe_map_memcpy_from(stream->oa->xe, stream->oa_buffer.bounce,
+ &stream->oa_buffer.bo->vmap, head, len);
+ return copy_to_user(dst, stream->oa_buffer.bounce, len);
+}
+
static int xe_oa_append_report(struct xe_oa_stream *stream, char __user *buf,
- size_t count, size_t *offset, const u8 *report)
+ size_t count, size_t *offset, u32 head)
{
int report_size = stream->oa_buffer.format->size;
int report_size_partial;
- u8 *oa_buf_end;
if ((count - *offset) < report_size)
return -ENOSPC;
buf += *offset;
- oa_buf_end = stream->oa_buffer.vaddr + stream->oa_buffer.circ_size;
- report_size_partial = oa_buf_end - report;
+ report_size_partial = stream->oa_buffer.circ_size - head;
if (report_size_partial < report_size) {
- if (copy_to_user(buf, report, report_size_partial))
+ if (xe_oa_copy_to_user(stream, buf, head, report_size_partial))
return -EFAULT;
buf += report_size_partial;
- if (copy_to_user(buf, stream->oa_buffer.vaddr,
- report_size - report_size_partial))
+ if (xe_oa_copy_to_user(stream, buf, 0, report_size - report_size_partial))
return -EFAULT;
- } else if (copy_to_user(buf, report, report_size)) {
+ } else if (xe_oa_copy_to_user(stream, buf, head, report_size)) {
return -EFAULT;
}
@@ -347,7 +358,6 @@ static int xe_oa_append_reports(struct xe_oa_stream *stream, char __user *buf,
size_t count, size_t *offset)
{
int report_size = stream->oa_buffer.format->size;
- u8 *oa_buf_base = stream->oa_buffer.vaddr;
u32 gtt_offset = xe_bo_ggtt_addr(stream->oa_buffer.bo);
size_t start_offset = *offset;
unsigned long flags;
@@ -364,26 +374,24 @@ static int xe_oa_append_reports(struct xe_oa_stream *stream, char __user *buf,
for (; xe_oa_circ_diff(stream, tail, head);
head = xe_oa_circ_incr(stream, head, report_size)) {
- u8 *report = oa_buf_base + head;
-
- ret = xe_oa_append_report(stream, buf, count, offset, report);
+ ret = xe_oa_append_report(stream, buf, count, offset, head);
if (ret)
break;
if (!(stream->oa_buffer.circ_size % report_size)) {
/* Clear out report id and timestamp to detect unlanded reports */
- oa_report_id_clear(stream, (void *)report);
- oa_timestamp_clear(stream, (void *)report);
+ oa_report_id_clear(stream, head);
+ oa_timestamp_clear(stream, head);
} else {
- u8 *oa_buf_end = stream->oa_buffer.vaddr + stream->oa_buffer.circ_size;
- u32 part = oa_buf_end - report;
+ struct iosys_map *map = &stream->oa_buffer.bo->vmap;
+ u32 part = stream->oa_buffer.circ_size - head;
/* Zero out the entire report */
if (report_size <= part) {
- memset(report, 0, report_size);
+ xe_map_memset(stream->oa->xe, map, head, 0, report_size);
} else {
- memset(report, 0, part);
- memset(oa_buf_base, 0, report_size - part);
+ xe_map_memset(stream->oa->xe, map, head, 0, part);
+ xe_map_memset(stream->oa->xe, map, 0, 0, report_size - part);
}
}
}
@@ -436,7 +444,8 @@ static void xe_oa_init_oa_buffer(struct xe_oa_stream *stream)
spin_unlock_irqrestore(&stream->oa_buffer.ptr_lock, flags);
/* Zero out the OA buffer since we rely on zero report id and timestamp fields */
- memset(stream->oa_buffer.vaddr, 0, xe_bo_size(stream->oa_buffer.bo));
+ xe_map_memset(stream->oa->xe, &stream->oa_buffer.bo->vmap, 0, 0,
+ xe_bo_size(stream->oa_buffer.bo));
}
static u32 __format_to_oactrl(const struct xe_oa_format *format, int counter_sel_mask)
@@ -699,6 +708,7 @@ static int num_lri_dwords(int num_regs)
static void xe_oa_free_oa_buffer(struct xe_oa_stream *stream)
{
xe_bo_unpin_map_no_vm(stream->oa_buffer.bo);
+ kfree(stream->oa_buffer.bounce);
}
static void xe_oa_free_configs(struct xe_oa_stream *stream)
@@ -889,9 +899,16 @@ static int xe_oa_alloc_oa_buffer(struct xe_oa_stream *stream, size_t size)
return PTR_ERR(bo);
stream->oa_buffer.bo = bo;
+
/* mmap implementation requires OA buffer to be in system memory */
xe_assert(stream->oa->xe, bo->vmap.is_iomem == 0);
- stream->oa_buffer.vaddr = bo->vmap.vaddr;
+
+ stream->oa_buffer.bounce = kmalloc(stream->oa_buffer.format->size, GFP_KERNEL);
+ if (!stream->oa_buffer.bounce) {
+ xe_bo_unpin_map_no_vm(stream->oa_buffer.bo);
+ return -ENOMEM;
+ }
+
return 0;
}
diff --git a/drivers/gpu/drm/xe/xe_oa_types.h b/drivers/gpu/drm/xe/xe_oa_types.h
index b03ffd5134834..99a04b8d586b5 100644
--- a/drivers/gpu/drm/xe/xe_oa_types.h
+++ b/drivers/gpu/drm/xe/xe_oa_types.h
@@ -162,8 +162,8 @@ struct xe_oa_buffer {
/** @format: xe_bo backing the OA buffer */
struct xe_bo *bo;
- /** @vaddr: mapped vaddr of the OA buffer */
- u8 *vaddr;
+ /** @bounce: bounce buffer used with xe_map layer */
+ void *bounce;
/** @ptr_lock: Lock protecting reads/writes to head/tail pointers */
spinlock_t ptr_lock;
--
2.48.1
^ permalink raw reply related [flat|nested] 25+ messages in thread
* Re: [PATCH 1/3] drm/xe/oa: Use xe_map layer
2026-04-15 2:03 ` [PATCH 1/3] drm/xe/oa: Use xe_map layer Ashutosh Dixit
@ 2026-04-24 21:05 ` Umesh Nerlige Ramappa
2026-04-24 23:56 ` Dixit, Ashutosh
0 siblings, 1 reply; 25+ messages in thread
From: Umesh Nerlige Ramappa @ 2026-04-24 21:05 UTC (permalink / raw)
To: Ashutosh Dixit; +Cc: intel-xe
On Tue, Apr 14, 2026 at 07:03:40PM -0700, Ashutosh Dixit wrote:
>OA code should have used xe_map layer to begin with. In CRI, the OA buffer
>can be located both in system and device memory. For these reasons, move OA
>code to use the xe_map layer when accessing the OA buffer.
>
>v2: Use xe_map layer and put_user in xe_oa_copy_to_user (Umesh)
>v3: To avoid performance impact in v2, revert to v1 but move
> xe_map_copy_to_user() to xe_map.h
>v4: Use bounce buffer and copy_to_user in xe_oa_copy_to_user
>
>Signed-off-by: Ashutosh Dixit <ashutosh.dixit@intel.com>
>---
> drivers/gpu/drm/xe/xe_oa.c | 95 +++++++++++++++++++-------------
> drivers/gpu/drm/xe/xe_oa_types.h | 4 +-
> 2 files changed, 58 insertions(+), 41 deletions(-)
>
>diff --git a/drivers/gpu/drm/xe/xe_oa.c b/drivers/gpu/drm/xe/xe_oa.c
>index 6337e671c97ae..4d13d8ef8103f 100644
>--- a/drivers/gpu/drm/xe/xe_oa.c
>+++ b/drivers/gpu/drm/xe/xe_oa.c
>@@ -213,32 +213,40 @@ static u32 xe_oa_hw_tail_read(struct xe_oa_stream *stream)
> #define oa_report_header_64bit(__s) \
> ((__s)->oa_buffer.format->header == HDR_64_BIT)
>
>-static u64 oa_report_id(struct xe_oa_stream *stream, void *report)
>+static u64 oa_report_id(struct xe_oa_stream *stream, u32 tail)
> {
>- return oa_report_header_64bit(stream) ? *(u64 *)report : *(u32 *)report;
>+ struct iosys_map *map = &stream->oa_buffer.bo->vmap;
>+
>+ return oa_report_header_64bit(stream) ?
>+ xe_map_rd(stream->oa->xe, map, tail, u64) :
>+ xe_map_rd(stream->oa->xe, map, tail, u32);
> }
(1) The helper is just returning the report id for a report passed. In
this case it is tail, but the code would work for any report. I would
keep the name as report or report_offset. Let the caller use specific
names like tail.
>
>-static void oa_report_id_clear(struct xe_oa_stream *stream, u32 *report)
>+static void oa_report_id_clear(struct xe_oa_stream *stream, u32 head)
> {
>- if (oa_report_header_64bit(stream))
>- *(u64 *)report = 0;
>- else
>- *report = 0;
>+ struct iosys_map *map = &stream->oa_buffer.bo->vmap;
>+
>+ oa_report_header_64bit(stream) ?
>+ xe_map_wr(stream->oa->xe, map, head, u64, 0) :
>+ xe_map_wr(stream->oa->xe, map, head, u32, 0);
> }
>
>-static u64 oa_timestamp(struct xe_oa_stream *stream, void *report)
>+static u64 oa_timestamp(struct xe_oa_stream *stream, u32 tail)
> {
>+ struct iosys_map *map = &stream->oa_buffer.bo->vmap;
>+
> return oa_report_header_64bit(stream) ?
>- *((u64 *)report + 1) :
>- *((u32 *)report + 1);
>+ xe_map_rd(stream->oa->xe, map, tail + 2, u64) :
>+ xe_map_rd(stream->oa->xe, map, tail + 1, u32);
(2) I think this should be tail + 8 and tail + 4. Can you please double
check? The u32/u64 is only being used to access the data at the offset
rather than determine how to calculate the actual offset.
> }
>
>-static void oa_timestamp_clear(struct xe_oa_stream *stream, u32 *report)
>+static void oa_timestamp_clear(struct xe_oa_stream *stream, u32 head)
> {
>- if (oa_report_header_64bit(stream))
>- *(u64 *)&report[2] = 0;
>- else
>- report[1] = 0;
>+ struct iosys_map *map = &stream->oa_buffer.bo->vmap;
>+
>+ oa_report_header_64bit(stream) ?
>+ xe_map_wr(stream->oa->xe, map, head + 2, u64, 0) :
>+ xe_map_wr(stream->oa->xe, map, head + 1, u32, 0);
Same here - comment (2)
> }
Same comment (1) for all the above helpers - the name can be generic -
report_offset. I see similar change in other functions below as well. If
you think it is necessary, I suggest trying to move it to a separate
patch with commit message explaining why you want to rename them. That
would also help review this patch easily.
>
> static bool xe_oa_buffer_check_unlocked(struct xe_oa_stream *stream)
>@@ -275,9 +283,7 @@ static bool xe_oa_buffer_check_unlocked(struct xe_oa_stream *stream)
> * they were written. If not : (╯°□°)╯︵ ┻━┻
> */
> while (xe_oa_circ_diff(stream, tail, stream->oa_buffer.tail) >= report_size) {
>- void *report = stream->oa_buffer.vaddr + tail;
>-
>- if (oa_report_id(stream, report) || oa_timestamp(stream, report))
>+ if (oa_report_id(stream, tail) || oa_timestamp(stream, tail))
> break;
>
> tail = xe_oa_circ_diff(stream, tail, report_size);
>@@ -311,30 +317,35 @@ static enum hrtimer_restart xe_oa_poll_check_timer_cb(struct hrtimer *hrtimer)
> return HRTIMER_RESTART;
> }
>
>+static unsigned long
>+xe_oa_copy_to_user(struct xe_oa_stream *stream, void __user *dst, u32 head, u32 len)
>+{
>+ xe_map_memcpy_from(stream->oa->xe, stream->oa_buffer.bounce,
>+ &stream->oa_buffer.bo->vmap, head, len);
>+ return copy_to_user(dst, stream->oa_buffer.bounce, len);
>+}
>+
Still need to look at the patch below this point. Will send that out
next week...
Regards,
Umesh
> static int xe_oa_append_report(struct xe_oa_stream *stream, char __user *buf,
>- size_t count, size_t *offset, const u8 *report)
>+ size_t count, size_t *offset, u32 head)
> {
> int report_size = stream->oa_buffer.format->size;
> int report_size_partial;
>- u8 *oa_buf_end;
>
> if ((count - *offset) < report_size)
> return -ENOSPC;
>
> buf += *offset;
>
>- oa_buf_end = stream->oa_buffer.vaddr + stream->oa_buffer.circ_size;
>- report_size_partial = oa_buf_end - report;
>+ report_size_partial = stream->oa_buffer.circ_size - head;
>
> if (report_size_partial < report_size) {
>- if (copy_to_user(buf, report, report_size_partial))
>+ if (xe_oa_copy_to_user(stream, buf, head, report_size_partial))
> return -EFAULT;
> buf += report_size_partial;
>
>- if (copy_to_user(buf, stream->oa_buffer.vaddr,
>- report_size - report_size_partial))
>+ if (xe_oa_copy_to_user(stream, buf, 0, report_size - report_size_partial))
> return -EFAULT;
>- } else if (copy_to_user(buf, report, report_size)) {
>+ } else if (xe_oa_copy_to_user(stream, buf, head, report_size)) {
> return -EFAULT;
> }
>
>@@ -347,7 +358,6 @@ static int xe_oa_append_reports(struct xe_oa_stream *stream, char __user *buf,
> size_t count, size_t *offset)
> {
> int report_size = stream->oa_buffer.format->size;
>- u8 *oa_buf_base = stream->oa_buffer.vaddr;
> u32 gtt_offset = xe_bo_ggtt_addr(stream->oa_buffer.bo);
> size_t start_offset = *offset;
> unsigned long flags;
>@@ -364,26 +374,24 @@ static int xe_oa_append_reports(struct xe_oa_stream *stream, char __user *buf,
>
> for (; xe_oa_circ_diff(stream, tail, head);
> head = xe_oa_circ_incr(stream, head, report_size)) {
>- u8 *report = oa_buf_base + head;
>-
>- ret = xe_oa_append_report(stream, buf, count, offset, report);
>+ ret = xe_oa_append_report(stream, buf, count, offset, head);
> if (ret)
> break;
>
> if (!(stream->oa_buffer.circ_size % report_size)) {
> /* Clear out report id and timestamp to detect unlanded reports */
>- oa_report_id_clear(stream, (void *)report);
>- oa_timestamp_clear(stream, (void *)report);
>+ oa_report_id_clear(stream, head);
>+ oa_timestamp_clear(stream, head);
> } else {
>- u8 *oa_buf_end = stream->oa_buffer.vaddr + stream->oa_buffer.circ_size;
>- u32 part = oa_buf_end - report;
>+ struct iosys_map *map = &stream->oa_buffer.bo->vmap;
>+ u32 part = stream->oa_buffer.circ_size - head;
>
> /* Zero out the entire report */
> if (report_size <= part) {
>- memset(report, 0, report_size);
>+ xe_map_memset(stream->oa->xe, map, head, 0, report_size);
> } else {
>- memset(report, 0, part);
>- memset(oa_buf_base, 0, report_size - part);
>+ xe_map_memset(stream->oa->xe, map, head, 0, part);
>+ xe_map_memset(stream->oa->xe, map, 0, 0, report_size - part);
> }
> }
> }
>@@ -436,7 +444,8 @@ static void xe_oa_init_oa_buffer(struct xe_oa_stream *stream)
> spin_unlock_irqrestore(&stream->oa_buffer.ptr_lock, flags);
>
> /* Zero out the OA buffer since we rely on zero report id and timestamp fields */
>- memset(stream->oa_buffer.vaddr, 0, xe_bo_size(stream->oa_buffer.bo));
>+ xe_map_memset(stream->oa->xe, &stream->oa_buffer.bo->vmap, 0, 0,
>+ xe_bo_size(stream->oa_buffer.bo));
> }
>
> static u32 __format_to_oactrl(const struct xe_oa_format *format, int counter_sel_mask)
>@@ -699,6 +708,7 @@ static int num_lri_dwords(int num_regs)
> static void xe_oa_free_oa_buffer(struct xe_oa_stream *stream)
> {
> xe_bo_unpin_map_no_vm(stream->oa_buffer.bo);
>+ kfree(stream->oa_buffer.bounce);
> }
>
> static void xe_oa_free_configs(struct xe_oa_stream *stream)
>@@ -889,9 +899,16 @@ static int xe_oa_alloc_oa_buffer(struct xe_oa_stream *stream, size_t size)
> return PTR_ERR(bo);
>
> stream->oa_buffer.bo = bo;
>+
> /* mmap implementation requires OA buffer to be in system memory */
> xe_assert(stream->oa->xe, bo->vmap.is_iomem == 0);
>- stream->oa_buffer.vaddr = bo->vmap.vaddr;
>+
>+ stream->oa_buffer.bounce = kmalloc(stream->oa_buffer.format->size, GFP_KERNEL);
>+ if (!stream->oa_buffer.bounce) {
>+ xe_bo_unpin_map_no_vm(stream->oa_buffer.bo);
>+ return -ENOMEM;
>+ }
>+
> return 0;
> }
>
>diff --git a/drivers/gpu/drm/xe/xe_oa_types.h b/drivers/gpu/drm/xe/xe_oa_types.h
>index b03ffd5134834..99a04b8d586b5 100644
>--- a/drivers/gpu/drm/xe/xe_oa_types.h
>+++ b/drivers/gpu/drm/xe/xe_oa_types.h
>@@ -162,8 +162,8 @@ struct xe_oa_buffer {
> /** @format: xe_bo backing the OA buffer */
> struct xe_bo *bo;
>
>- /** @vaddr: mapped vaddr of the OA buffer */
>- u8 *vaddr;
>+ /** @bounce: bounce buffer used with xe_map layer */
>+ void *bounce;
>
> /** @ptr_lock: Lock protecting reads/writes to head/tail pointers */
> spinlock_t ptr_lock;
>--
>2.48.1
>
^ permalink raw reply [flat|nested] 25+ messages in thread
* Re: [PATCH 1/3] drm/xe/oa: Use xe_map layer
2026-04-24 21:05 ` Umesh Nerlige Ramappa
@ 2026-04-24 23:56 ` Dixit, Ashutosh
2026-04-25 0:16 ` Dixit, Ashutosh
0 siblings, 1 reply; 25+ messages in thread
From: Dixit, Ashutosh @ 2026-04-24 23:56 UTC (permalink / raw)
To: Umesh Nerlige Ramappa; +Cc: intel-xe
On Fri, 24 Apr 2026 14:05:16 -0700, Umesh Nerlige Ramappa wrote:
>
Hi Umesh,
> On Tue, Apr 14, 2026 at 07:03:40PM -0700, Ashutosh Dixit wrote:
> > OA code should have used xe_map layer to begin with. In CRI, the OA buffer
> > can be located both in system and device memory. For these reasons, move OA
> > code to use the xe_map layer when accessing the OA buffer.
> >
> > v2: Use xe_map layer and put_user in xe_oa_copy_to_user (Umesh)
> > v3: To avoid performance impact in v2, revert to v1 but move
> > xe_map_copy_to_user() to xe_map.h
> > v4: Use bounce buffer and copy_to_user in xe_oa_copy_to_user
> >
> > Signed-off-by: Ashutosh Dixit <ashutosh.dixit@intel.com>
> > ---
> > drivers/gpu/drm/xe/xe_oa.c | 95 +++++++++++++++++++-------------
> > drivers/gpu/drm/xe/xe_oa_types.h | 4 +-
> > 2 files changed, 58 insertions(+), 41 deletions(-)
> >
> > diff --git a/drivers/gpu/drm/xe/xe_oa.c b/drivers/gpu/drm/xe/xe_oa.c
> > index 6337e671c97ae..4d13d8ef8103f 100644
> > --- a/drivers/gpu/drm/xe/xe_oa.c
> > +++ b/drivers/gpu/drm/xe/xe_oa.c
> > @@ -213,32 +213,40 @@ static u32 xe_oa_hw_tail_read(struct xe_oa_stream *stream)
> > #define oa_report_header_64bit(__s) \
> > ((__s)->oa_buffer.format->header == HDR_64_BIT)
> >
> > -static u64 oa_report_id(struct xe_oa_stream *stream, void *report)
> > +static u64 oa_report_id(struct xe_oa_stream *stream, u32 tail)
> > {
> > - return oa_report_header_64bit(stream) ? *(u64 *)report : *(u32 *)report;
> > + struct iosys_map *map = &stream->oa_buffer.bo->vmap;
> > +
> > + return oa_report_header_64bit(stream) ?
> > + xe_map_rd(stream->oa->xe, map, tail, u64) :
> > + xe_map_rd(stream->oa->xe, map, tail, u32);
> > }
>
> (1) The helper is just returning the report id for a report passed. In this
> case it is tail, but the code would work for any report. I would keep the
> name as report or report_offset. Let the caller use specific names like
> tail.
>
> >
> > -static void oa_report_id_clear(struct xe_oa_stream *stream, u32 *report)
> > +static void oa_report_id_clear(struct xe_oa_stream *stream, u32 head)
> > {
> > - if (oa_report_header_64bit(stream))
> > - *(u64 *)report = 0;
> > - else
> > - *report = 0;
> > + struct iosys_map *map = &stream->oa_buffer.bo->vmap;
> > +
> > + oa_report_header_64bit(stream) ?
> > + xe_map_wr(stream->oa->xe, map, head, u64, 0) :
> > + xe_map_wr(stream->oa->xe, map, head, u32, 0);
> > }
> >
> > -static u64 oa_timestamp(struct xe_oa_stream *stream, void *report)
> > +static u64 oa_timestamp(struct xe_oa_stream *stream, u32 tail)
> > {
> > + struct iosys_map *map = &stream->oa_buffer.bo->vmap;
> > +
> > return oa_report_header_64bit(stream) ?
> > - *((u64 *)report + 1) :
> > - *((u32 *)report + 1);
> > + xe_map_rd(stream->oa->xe, map, tail + 2, u64) :
> > + xe_map_rd(stream->oa->xe, map, tail + 1, u32);
>
> (2) I think this should be tail + 8 and tail + 4. Can you please double
> check? The u32/u64 is only being used to access the data at the offset
> rather than determine how to calculate the actual offset.
Hmm, you are right!!! I don't believe I made such a basic mistake :/ And
all the xe_oa tests work fine even with this.
Anyway thanks for catching this, I'm fixing this in the next rev.
>
> > }
> >
> > -static void oa_timestamp_clear(struct xe_oa_stream *stream, u32 *report)
> > +static void oa_timestamp_clear(struct xe_oa_stream *stream, u32 head)
> > {
> > - if (oa_report_header_64bit(stream))
> > - *(u64 *)&report[2] = 0;
> > - else
> > - report[1] = 0;
> > + struct iosys_map *map = &stream->oa_buffer.bo->vmap;
> > +
> > + oa_report_header_64bit(stream) ?
> > + xe_map_wr(stream->oa->xe, map, head + 2, u64, 0) :
> > + xe_map_wr(stream->oa->xe, map, head + 1, u32, 0);
>
> Same here - comment (2)
>
> > }
>
> Same comment (1) for all the above helpers - the name can be generic -
> report_offset. I see similar change in other functions below as well. If
> you think it is necessary, I suggest trying to move it to a separate patch
> with commit message explaining why you want to rename them. That would also
> help review this patch easily.
So, as we know HW writes to tail and SW reads from head. So the helpers
with head as the arg are invoked from the read side only and the helpers
with tail as the arg are invoked from the write side only. That was the
reason for naming the arg as head/tail, rather than just "offset". I
thought it will make the code easier to understand and also maybe prevent
mistakes in the future.
So anyway, let me ask once again if you think I should change it. Of
course, there's no problem changing the arg name in the helpers.
I'll also see if it's possible to split the patch some way to make the
review a bit easier.
Thanks.
--
Ashutosh
>
> >
> > static bool xe_oa_buffer_check_unlocked(struct xe_oa_stream *stream)
> > @@ -275,9 +283,7 @@ static bool xe_oa_buffer_check_unlocked(struct xe_oa_stream *stream)
> > * they were written. If not : (╯°□°)╯︵ ┻━┻
> > */
> > while (xe_oa_circ_diff(stream, tail, stream->oa_buffer.tail) >= report_size) {
> > - void *report = stream->oa_buffer.vaddr + tail;
> > -
> > - if (oa_report_id(stream, report) || oa_timestamp(stream, report))
> > + if (oa_report_id(stream, tail) || oa_timestamp(stream, tail))
> > break;
> >
> > tail = xe_oa_circ_diff(stream, tail, report_size);
> > @@ -311,30 +317,35 @@ static enum hrtimer_restart xe_oa_poll_check_timer_cb(struct hrtimer *hrtimer)
> > return HRTIMER_RESTART;
> > }
> >
> > +static unsigned long
> > +xe_oa_copy_to_user(struct xe_oa_stream *stream, void __user *dst, u32 head, u32 len)
> > +{
> > + xe_map_memcpy_from(stream->oa->xe, stream->oa_buffer.bounce,
> > + &stream->oa_buffer.bo->vmap, head, len);
> > + return copy_to_user(dst, stream->oa_buffer.bounce, len);
> > +}
> > +
>
> Still need to look at the patch below this point. Will send that out next
> week...
>
> Regards,
> Umesh
>
> > static int xe_oa_append_report(struct xe_oa_stream *stream, char __user *buf,
> > - size_t count, size_t *offset, const u8 *report)
> > + size_t count, size_t *offset, u32 head)
> > {
> > int report_size = stream->oa_buffer.format->size;
> > int report_size_partial;
> > - u8 *oa_buf_end;
> >
> > if ((count - *offset) < report_size)
> > return -ENOSPC;
> >
> > buf += *offset;
> >
> > - oa_buf_end = stream->oa_buffer.vaddr + stream->oa_buffer.circ_size;
> > - report_size_partial = oa_buf_end - report;
> > + report_size_partial = stream->oa_buffer.circ_size - head;
> >
> > if (report_size_partial < report_size) {
> > - if (copy_to_user(buf, report, report_size_partial))
> > + if (xe_oa_copy_to_user(stream, buf, head, report_size_partial))
> > return -EFAULT;
> > buf += report_size_partial;
> >
> > - if (copy_to_user(buf, stream->oa_buffer.vaddr,
> > - report_size - report_size_partial))
> > + if (xe_oa_copy_to_user(stream, buf, 0, report_size - report_size_partial))
> > return -EFAULT;
> > - } else if (copy_to_user(buf, report, report_size)) {
> > + } else if (xe_oa_copy_to_user(stream, buf, head, report_size)) {
> > return -EFAULT;
> > }
> >
> > @@ -347,7 +358,6 @@ static int xe_oa_append_reports(struct xe_oa_stream *stream, char __user *buf,
> > size_t count, size_t *offset)
> > {
> > int report_size = stream->oa_buffer.format->size;
> > - u8 *oa_buf_base = stream->oa_buffer.vaddr;
> > u32 gtt_offset = xe_bo_ggtt_addr(stream->oa_buffer.bo);
> > size_t start_offset = *offset;
> > unsigned long flags;
> > @@ -364,26 +374,24 @@ static int xe_oa_append_reports(struct xe_oa_stream *stream, char __user *buf,
> >
> > for (; xe_oa_circ_diff(stream, tail, head);
> > head = xe_oa_circ_incr(stream, head, report_size)) {
> > - u8 *report = oa_buf_base + head;
> > -
> > - ret = xe_oa_append_report(stream, buf, count, offset, report);
> > + ret = xe_oa_append_report(stream, buf, count, offset, head);
> > if (ret)
> > break;
> >
> > if (!(stream->oa_buffer.circ_size % report_size)) {
> > /* Clear out report id and timestamp to detect unlanded reports */
> > - oa_report_id_clear(stream, (void *)report);
> > - oa_timestamp_clear(stream, (void *)report);
> > + oa_report_id_clear(stream, head);
> > + oa_timestamp_clear(stream, head);
> > } else {
> > - u8 *oa_buf_end = stream->oa_buffer.vaddr + stream->oa_buffer.circ_size;
> > - u32 part = oa_buf_end - report;
> > + struct iosys_map *map = &stream->oa_buffer.bo->vmap;
> > + u32 part = stream->oa_buffer.circ_size - head;
> >
> > /* Zero out the entire report */
> > if (report_size <= part) {
> > - memset(report, 0, report_size);
> > + xe_map_memset(stream->oa->xe, map, head, 0, report_size);
> > } else {
> > - memset(report, 0, part);
> > - memset(oa_buf_base, 0, report_size - part);
> > + xe_map_memset(stream->oa->xe, map, head, 0, part);
> > + xe_map_memset(stream->oa->xe, map, 0, 0, report_size - part);
> > }
> > }
> > }
> > @@ -436,7 +444,8 @@ static void xe_oa_init_oa_buffer(struct xe_oa_stream *stream)
> > spin_unlock_irqrestore(&stream->oa_buffer.ptr_lock, flags);
> >
> > /* Zero out the OA buffer since we rely on zero report id and timestamp fields */
> > - memset(stream->oa_buffer.vaddr, 0, xe_bo_size(stream->oa_buffer.bo));
> > + xe_map_memset(stream->oa->xe, &stream->oa_buffer.bo->vmap, 0, 0,
> > + xe_bo_size(stream->oa_buffer.bo));
> > }
> >
> > static u32 __format_to_oactrl(const struct xe_oa_format *format, int counter_sel_mask)
> > @@ -699,6 +708,7 @@ static int num_lri_dwords(int num_regs)
> > static void xe_oa_free_oa_buffer(struct xe_oa_stream *stream)
> > {
> > xe_bo_unpin_map_no_vm(stream->oa_buffer.bo);
> > + kfree(stream->oa_buffer.bounce);
> > }
> >
> > static void xe_oa_free_configs(struct xe_oa_stream *stream)
> > @@ -889,9 +899,16 @@ static int xe_oa_alloc_oa_buffer(struct xe_oa_stream *stream, size_t size)
> > return PTR_ERR(bo);
> >
> > stream->oa_buffer.bo = bo;
> > +
> > /* mmap implementation requires OA buffer to be in system memory */
> > xe_assert(stream->oa->xe, bo->vmap.is_iomem == 0);
> > - stream->oa_buffer.vaddr = bo->vmap.vaddr;
> > +
> > + stream->oa_buffer.bounce = kmalloc(stream->oa_buffer.format->size, GFP_KERNEL);
> > + if (!stream->oa_buffer.bounce) {
> > + xe_bo_unpin_map_no_vm(stream->oa_buffer.bo);
> > + return -ENOMEM;
> > + }
> > +
> > return 0;
> > }
> >
> > diff --git a/drivers/gpu/drm/xe/xe_oa_types.h b/drivers/gpu/drm/xe/xe_oa_types.h
> > index b03ffd5134834..99a04b8d586b5 100644
> > --- a/drivers/gpu/drm/xe/xe_oa_types.h
> > +++ b/drivers/gpu/drm/xe/xe_oa_types.h
> > @@ -162,8 +162,8 @@ struct xe_oa_buffer {
> > /** @format: xe_bo backing the OA buffer */
> > struct xe_bo *bo;
> >
> > - /** @vaddr: mapped vaddr of the OA buffer */
> > - u8 *vaddr;
> > + /** @bounce: bounce buffer used with xe_map layer */
> > + void *bounce;
> >
> > /** @ptr_lock: Lock protecting reads/writes to head/tail pointers */
> > spinlock_t ptr_lock;
> > --
> > 2.48.1
> >
^ permalink raw reply [flat|nested] 25+ messages in thread
* [PATCH 1/3] drm/xe/oa: Use xe_map layer
2026-04-25 0:14 [PATCH v5 0/3] drm/xe/oa: Wa_14026633728 Ashutosh Dixit
@ 2026-04-25 0:14 ` Ashutosh Dixit
2026-04-27 19:23 ` Umesh Nerlige Ramappa
0 siblings, 1 reply; 25+ messages in thread
From: Ashutosh Dixit @ 2026-04-25 0:14 UTC (permalink / raw)
To: intel-xe; +Cc: Umesh Nerlige Ramappa
OA code should have used xe_map layer to begin with. In CRI, the OA buffer
can be located both in system and device memory. For these reasons, move OA
code to use the xe_map layer when accessing the OA buffer.
v2: Use xe_map layer and put_user in xe_oa_copy_to_user (Umesh)
v3: To avoid performance impact in v2, revert to v1 but move
xe_map_copy_to_user() to xe_map.h
v4: Use bounce buffer and copy_to_user in xe_oa_copy_to_user
v5: Fix offsets in oa_timestamp() and oa_timestamp_clear() (Umesh)
Signed-off-by: Ashutosh Dixit <ashutosh.dixit@intel.com>
---
drivers/gpu/drm/xe/xe_oa.c | 95 +++++++++++++++++++-------------
drivers/gpu/drm/xe/xe_oa_types.h | 4 +-
2 files changed, 58 insertions(+), 41 deletions(-)
diff --git a/drivers/gpu/drm/xe/xe_oa.c b/drivers/gpu/drm/xe/xe_oa.c
index 6337e671c97ae..3ae30dc609bd7 100644
--- a/drivers/gpu/drm/xe/xe_oa.c
+++ b/drivers/gpu/drm/xe/xe_oa.c
@@ -213,32 +213,40 @@ static u32 xe_oa_hw_tail_read(struct xe_oa_stream *stream)
#define oa_report_header_64bit(__s) \
((__s)->oa_buffer.format->header == HDR_64_BIT)
-static u64 oa_report_id(struct xe_oa_stream *stream, void *report)
+static u64 oa_report_id(struct xe_oa_stream *stream, u32 tail)
{
- return oa_report_header_64bit(stream) ? *(u64 *)report : *(u32 *)report;
+ struct iosys_map *map = &stream->oa_buffer.bo->vmap;
+
+ return oa_report_header_64bit(stream) ?
+ xe_map_rd(stream->oa->xe, map, tail, u64) :
+ xe_map_rd(stream->oa->xe, map, tail, u32);
}
-static void oa_report_id_clear(struct xe_oa_stream *stream, u32 *report)
+static void oa_report_id_clear(struct xe_oa_stream *stream, u32 head)
{
- if (oa_report_header_64bit(stream))
- *(u64 *)report = 0;
- else
- *report = 0;
+ struct iosys_map *map = &stream->oa_buffer.bo->vmap;
+
+ oa_report_header_64bit(stream) ?
+ xe_map_wr(stream->oa->xe, map, head, u64, 0) :
+ xe_map_wr(stream->oa->xe, map, head, u32, 0);
}
-static u64 oa_timestamp(struct xe_oa_stream *stream, void *report)
+static u64 oa_timestamp(struct xe_oa_stream *stream, u32 tail)
{
+ struct iosys_map *map = &stream->oa_buffer.bo->vmap;
+
return oa_report_header_64bit(stream) ?
- *((u64 *)report + 1) :
- *((u32 *)report + 1);
+ xe_map_rd(stream->oa->xe, map, tail + 8, u64) :
+ xe_map_rd(stream->oa->xe, map, tail + 4, u32);
}
-static void oa_timestamp_clear(struct xe_oa_stream *stream, u32 *report)
+static void oa_timestamp_clear(struct xe_oa_stream *stream, u32 head)
{
- if (oa_report_header_64bit(stream))
- *(u64 *)&report[2] = 0;
- else
- report[1] = 0;
+ struct iosys_map *map = &stream->oa_buffer.bo->vmap;
+
+ oa_report_header_64bit(stream) ?
+ xe_map_wr(stream->oa->xe, map, head + 8, u64, 0) :
+ xe_map_wr(stream->oa->xe, map, head + 4, u32, 0);
}
static bool xe_oa_buffer_check_unlocked(struct xe_oa_stream *stream)
@@ -275,9 +283,7 @@ static bool xe_oa_buffer_check_unlocked(struct xe_oa_stream *stream)
* they were written. If not : (╯°□°)╯︵ ┻━┻
*/
while (xe_oa_circ_diff(stream, tail, stream->oa_buffer.tail) >= report_size) {
- void *report = stream->oa_buffer.vaddr + tail;
-
- if (oa_report_id(stream, report) || oa_timestamp(stream, report))
+ if (oa_report_id(stream, tail) || oa_timestamp(stream, tail))
break;
tail = xe_oa_circ_diff(stream, tail, report_size);
@@ -311,30 +317,35 @@ static enum hrtimer_restart xe_oa_poll_check_timer_cb(struct hrtimer *hrtimer)
return HRTIMER_RESTART;
}
+static unsigned long
+xe_oa_copy_to_user(struct xe_oa_stream *stream, void __user *dst, u32 head, u32 len)
+{
+ xe_map_memcpy_from(stream->oa->xe, stream->oa_buffer.bounce,
+ &stream->oa_buffer.bo->vmap, head, len);
+ return copy_to_user(dst, stream->oa_buffer.bounce, len);
+}
+
static int xe_oa_append_report(struct xe_oa_stream *stream, char __user *buf,
- size_t count, size_t *offset, const u8 *report)
+ size_t count, size_t *offset, u32 head)
{
int report_size = stream->oa_buffer.format->size;
int report_size_partial;
- u8 *oa_buf_end;
if ((count - *offset) < report_size)
return -ENOSPC;
buf += *offset;
- oa_buf_end = stream->oa_buffer.vaddr + stream->oa_buffer.circ_size;
- report_size_partial = oa_buf_end - report;
+ report_size_partial = stream->oa_buffer.circ_size - head;
if (report_size_partial < report_size) {
- if (copy_to_user(buf, report, report_size_partial))
+ if (xe_oa_copy_to_user(stream, buf, head, report_size_partial))
return -EFAULT;
buf += report_size_partial;
- if (copy_to_user(buf, stream->oa_buffer.vaddr,
- report_size - report_size_partial))
+ if (xe_oa_copy_to_user(stream, buf, 0, report_size - report_size_partial))
return -EFAULT;
- } else if (copy_to_user(buf, report, report_size)) {
+ } else if (xe_oa_copy_to_user(stream, buf, head, report_size)) {
return -EFAULT;
}
@@ -347,7 +358,6 @@ static int xe_oa_append_reports(struct xe_oa_stream *stream, char __user *buf,
size_t count, size_t *offset)
{
int report_size = stream->oa_buffer.format->size;
- u8 *oa_buf_base = stream->oa_buffer.vaddr;
u32 gtt_offset = xe_bo_ggtt_addr(stream->oa_buffer.bo);
size_t start_offset = *offset;
unsigned long flags;
@@ -364,26 +374,24 @@ static int xe_oa_append_reports(struct xe_oa_stream *stream, char __user *buf,
for (; xe_oa_circ_diff(stream, tail, head);
head = xe_oa_circ_incr(stream, head, report_size)) {
- u8 *report = oa_buf_base + head;
-
- ret = xe_oa_append_report(stream, buf, count, offset, report);
+ ret = xe_oa_append_report(stream, buf, count, offset, head);
if (ret)
break;
if (!(stream->oa_buffer.circ_size % report_size)) {
/* Clear out report id and timestamp to detect unlanded reports */
- oa_report_id_clear(stream, (void *)report);
- oa_timestamp_clear(stream, (void *)report);
+ oa_report_id_clear(stream, head);
+ oa_timestamp_clear(stream, head);
} else {
- u8 *oa_buf_end = stream->oa_buffer.vaddr + stream->oa_buffer.circ_size;
- u32 part = oa_buf_end - report;
+ struct iosys_map *map = &stream->oa_buffer.bo->vmap;
+ u32 part = stream->oa_buffer.circ_size - head;
/* Zero out the entire report */
if (report_size <= part) {
- memset(report, 0, report_size);
+ xe_map_memset(stream->oa->xe, map, head, 0, report_size);
} else {
- memset(report, 0, part);
- memset(oa_buf_base, 0, report_size - part);
+ xe_map_memset(stream->oa->xe, map, head, 0, part);
+ xe_map_memset(stream->oa->xe, map, 0, 0, report_size - part);
}
}
}
@@ -436,7 +444,8 @@ static void xe_oa_init_oa_buffer(struct xe_oa_stream *stream)
spin_unlock_irqrestore(&stream->oa_buffer.ptr_lock, flags);
/* Zero out the OA buffer since we rely on zero report id and timestamp fields */
- memset(stream->oa_buffer.vaddr, 0, xe_bo_size(stream->oa_buffer.bo));
+ xe_map_memset(stream->oa->xe, &stream->oa_buffer.bo->vmap, 0, 0,
+ xe_bo_size(stream->oa_buffer.bo));
}
static u32 __format_to_oactrl(const struct xe_oa_format *format, int counter_sel_mask)
@@ -699,6 +708,7 @@ static int num_lri_dwords(int num_regs)
static void xe_oa_free_oa_buffer(struct xe_oa_stream *stream)
{
xe_bo_unpin_map_no_vm(stream->oa_buffer.bo);
+ kfree(stream->oa_buffer.bounce);
}
static void xe_oa_free_configs(struct xe_oa_stream *stream)
@@ -889,9 +899,16 @@ static int xe_oa_alloc_oa_buffer(struct xe_oa_stream *stream, size_t size)
return PTR_ERR(bo);
stream->oa_buffer.bo = bo;
+
/* mmap implementation requires OA buffer to be in system memory */
xe_assert(stream->oa->xe, bo->vmap.is_iomem == 0);
- stream->oa_buffer.vaddr = bo->vmap.vaddr;
+
+ stream->oa_buffer.bounce = kmalloc(stream->oa_buffer.format->size, GFP_KERNEL);
+ if (!stream->oa_buffer.bounce) {
+ xe_bo_unpin_map_no_vm(stream->oa_buffer.bo);
+ return -ENOMEM;
+ }
+
return 0;
}
diff --git a/drivers/gpu/drm/xe/xe_oa_types.h b/drivers/gpu/drm/xe/xe_oa_types.h
index 8906c3084b5f8..3d9ec8490899c 100644
--- a/drivers/gpu/drm/xe/xe_oa_types.h
+++ b/drivers/gpu/drm/xe/xe_oa_types.h
@@ -164,8 +164,8 @@ struct xe_oa_buffer {
/** @bo: xe_bo backing the OA buffer */
struct xe_bo *bo;
- /** @vaddr: mapped vaddr of the OA buffer */
- u8 *vaddr;
+ /** @bounce: bounce buffer used with xe_map layer */
+ void *bounce;
/** @ptr_lock: Lock protecting reads/writes to head/tail pointers */
spinlock_t ptr_lock;
--
2.54.0
^ permalink raw reply related [flat|nested] 25+ messages in thread
* Re: [PATCH 1/3] drm/xe/oa: Use xe_map layer
2026-04-24 23:56 ` Dixit, Ashutosh
@ 2026-04-25 0:16 ` Dixit, Ashutosh
2026-04-27 18:27 ` Umesh Nerlige Ramappa
0 siblings, 1 reply; 25+ messages in thread
From: Dixit, Ashutosh @ 2026-04-25 0:16 UTC (permalink / raw)
To: Umesh Nerlige Ramappa; +Cc: intel-xe
On Fri, 24 Apr 2026 16:56:04 -0700, Dixit, Ashutosh wrote:
>
> On Fri, 24 Apr 2026 14:05:16 -0700, Umesh Nerlige Ramappa wrote:
> >
>
> Hi Umesh,
>
> > On Tue, Apr 14, 2026 at 07:03:40PM -0700, Ashutosh Dixit wrote:
> > > OA code should have used xe_map layer to begin with. In CRI, the OA buffer
> > > can be located both in system and device memory. For these reasons, move OA
> > > code to use the xe_map layer when accessing the OA buffer.
> > >
> > > v2: Use xe_map layer and put_user in xe_oa_copy_to_user (Umesh)
> > > v3: To avoid performance impact in v2, revert to v1 but move
> > > xe_map_copy_to_user() to xe_map.h
> > > v4: Use bounce buffer and copy_to_user in xe_oa_copy_to_user
> > >
> > > Signed-off-by: Ashutosh Dixit <ashutosh.dixit@intel.com>
> > > ---
> > > drivers/gpu/drm/xe/xe_oa.c | 95 +++++++++++++++++++-------------
> > > drivers/gpu/drm/xe/xe_oa_types.h | 4 +-
> > > 2 files changed, 58 insertions(+), 41 deletions(-)
> > >
> > > diff --git a/drivers/gpu/drm/xe/xe_oa.c b/drivers/gpu/drm/xe/xe_oa.c
> > > index 6337e671c97ae..4d13d8ef8103f 100644
> > > --- a/drivers/gpu/drm/xe/xe_oa.c
> > > +++ b/drivers/gpu/drm/xe/xe_oa.c
> > > @@ -213,32 +213,40 @@ static u32 xe_oa_hw_tail_read(struct xe_oa_stream *stream)
> > > #define oa_report_header_64bit(__s) \
> > > ((__s)->oa_buffer.format->header == HDR_64_BIT)
> > >
> > > -static u64 oa_report_id(struct xe_oa_stream *stream, void *report)
> > > +static u64 oa_report_id(struct xe_oa_stream *stream, u32 tail)
> > > {
> > > - return oa_report_header_64bit(stream) ? *(u64 *)report : *(u32 *)report;
> > > + struct iosys_map *map = &stream->oa_buffer.bo->vmap;
> > > +
> > > + return oa_report_header_64bit(stream) ?
> > > + xe_map_rd(stream->oa->xe, map, tail, u64) :
> > > + xe_map_rd(stream->oa->xe, map, tail, u32);
> > > }
> >
> > (1) The helper is just returning the report id for a report passed. In this
> > case it is tail, but the code would work for any report. I would keep the
> > name as report or report_offset. Let the caller use specific names like
> > tail.
> >
> > >
> > > -static void oa_report_id_clear(struct xe_oa_stream *stream, u32 *report)
> > > +static void oa_report_id_clear(struct xe_oa_stream *stream, u32 head)
> > > {
> > > - if (oa_report_header_64bit(stream))
> > > - *(u64 *)report = 0;
> > > - else
> > > - *report = 0;
> > > + struct iosys_map *map = &stream->oa_buffer.bo->vmap;
> > > +
> > > + oa_report_header_64bit(stream) ?
> > > + xe_map_wr(stream->oa->xe, map, head, u64, 0) :
> > > + xe_map_wr(stream->oa->xe, map, head, u32, 0);
> > > }
> > >
> > > -static u64 oa_timestamp(struct xe_oa_stream *stream, void *report)
> > > +static u64 oa_timestamp(struct xe_oa_stream *stream, u32 tail)
> > > {
> > > + struct iosys_map *map = &stream->oa_buffer.bo->vmap;
> > > +
> > > return oa_report_header_64bit(stream) ?
> > > - *((u64 *)report + 1) :
> > > - *((u32 *)report + 1);
> > > + xe_map_rd(stream->oa->xe, map, tail + 2, u64) :
> > > + xe_map_rd(stream->oa->xe, map, tail + 1, u32);
> >
> > (2) I think this should be tail + 8 and tail + 4. Can you please double
> > check? The u32/u64 is only being used to access the data at the offset
> > rather than determine how to calculate the actual offset.
>
> Hmm, you are right!!! I don't believe I made such a basic mistake :/ And
> all the xe_oa tests work fine even with this.
>
> Anyway thanks for catching this, I'm fixing this in the next rev.
>
> >
> > > }
> > >
> > > -static void oa_timestamp_clear(struct xe_oa_stream *stream, u32 *report)
> > > +static void oa_timestamp_clear(struct xe_oa_stream *stream, u32 head)
> > > {
> > > - if (oa_report_header_64bit(stream))
> > > - *(u64 *)&report[2] = 0;
> > > - else
> > > - report[1] = 0;
> > > + struct iosys_map *map = &stream->oa_buffer.bo->vmap;
> > > +
> > > + oa_report_header_64bit(stream) ?
> > > + xe_map_wr(stream->oa->xe, map, head + 2, u64, 0) :
> > > + xe_map_wr(stream->oa->xe, map, head + 1, u32, 0);
> >
> > Same here - comment (2)
> >
> > > }
> >
> > Same comment (1) for all the above helpers - the name can be generic -
> > report_offset. I see similar change in other functions below as well. If
> > you think it is necessary, I suggest trying to move it to a separate patch
> > with commit message explaining why you want to rename them. That would also
> > help review this patch easily.
>
> So, as we know HW writes to tail and SW reads from head. So the helpers
> with head as the arg are invoked from the read side only and the helpers
> with tail as the arg are invoked from the write side only. That was the
> reason for naming the arg as head/tail, rather than just "offset". I
> thought it will make the code easier to understand and also maybe prevent
> mistakes in the future.
>
> So anyway, let me ask once again if you think I should change it. Of
> course, there's no problem changing the arg name in the helpers.
>
> I'll also see if it's possible to split the patch some way to make the
> review a bit easier.
I just sent out v5 in which I have let all this stay as is. Anyway, please
let me know what you prefer. Thanks.
>
> Thanks.
> --
> Ashutosh
>
>
> >
> > >
> > > static bool xe_oa_buffer_check_unlocked(struct xe_oa_stream *stream)
> > > @@ -275,9 +283,7 @@ static bool xe_oa_buffer_check_unlocked(struct xe_oa_stream *stream)
> > > * they were written. If not : (╯°□°)╯︵ ┻━┻
> > > */
> > > while (xe_oa_circ_diff(stream, tail, stream->oa_buffer.tail) >= report_size) {
> > > - void *report = stream->oa_buffer.vaddr + tail;
> > > -
> > > - if (oa_report_id(stream, report) || oa_timestamp(stream, report))
> > > + if (oa_report_id(stream, tail) || oa_timestamp(stream, tail))
> > > break;
> > >
> > > tail = xe_oa_circ_diff(stream, tail, report_size);
> > > @@ -311,30 +317,35 @@ static enum hrtimer_restart xe_oa_poll_check_timer_cb(struct hrtimer *hrtimer)
> > > return HRTIMER_RESTART;
> > > }
> > >
> > > +static unsigned long
> > > +xe_oa_copy_to_user(struct xe_oa_stream *stream, void __user *dst, u32 head, u32 len)
> > > +{
> > > + xe_map_memcpy_from(stream->oa->xe, stream->oa_buffer.bounce,
> > > + &stream->oa_buffer.bo->vmap, head, len);
> > > + return copy_to_user(dst, stream->oa_buffer.bounce, len);
> > > +}
> > > +
> >
> > Still need to look at the patch below this point. Will send that out next
> > week...
> >
> > Regards,
> > Umesh
> >
> > > static int xe_oa_append_report(struct xe_oa_stream *stream, char __user *buf,
> > > - size_t count, size_t *offset, const u8 *report)
> > > + size_t count, size_t *offset, u32 head)
> > > {
> > > int report_size = stream->oa_buffer.format->size;
> > > int report_size_partial;
> > > - u8 *oa_buf_end;
> > >
> > > if ((count - *offset) < report_size)
> > > return -ENOSPC;
> > >
> > > buf += *offset;
> > >
> > > - oa_buf_end = stream->oa_buffer.vaddr + stream->oa_buffer.circ_size;
> > > - report_size_partial = oa_buf_end - report;
> > > + report_size_partial = stream->oa_buffer.circ_size - head;
> > >
> > > if (report_size_partial < report_size) {
> > > - if (copy_to_user(buf, report, report_size_partial))
> > > + if (xe_oa_copy_to_user(stream, buf, head, report_size_partial))
> > > return -EFAULT;
> > > buf += report_size_partial;
> > >
> > > - if (copy_to_user(buf, stream->oa_buffer.vaddr,
> > > - report_size - report_size_partial))
> > > + if (xe_oa_copy_to_user(stream, buf, 0, report_size - report_size_partial))
> > > return -EFAULT;
> > > - } else if (copy_to_user(buf, report, report_size)) {
> > > + } else if (xe_oa_copy_to_user(stream, buf, head, report_size)) {
> > > return -EFAULT;
> > > }
> > >
> > > @@ -347,7 +358,6 @@ static int xe_oa_append_reports(struct xe_oa_stream *stream, char __user *buf,
> > > size_t count, size_t *offset)
> > > {
> > > int report_size = stream->oa_buffer.format->size;
> > > - u8 *oa_buf_base = stream->oa_buffer.vaddr;
> > > u32 gtt_offset = xe_bo_ggtt_addr(stream->oa_buffer.bo);
> > > size_t start_offset = *offset;
> > > unsigned long flags;
> > > @@ -364,26 +374,24 @@ static int xe_oa_append_reports(struct xe_oa_stream *stream, char __user *buf,
> > >
> > > for (; xe_oa_circ_diff(stream, tail, head);
> > > head = xe_oa_circ_incr(stream, head, report_size)) {
> > > - u8 *report = oa_buf_base + head;
> > > -
> > > - ret = xe_oa_append_report(stream, buf, count, offset, report);
> > > + ret = xe_oa_append_report(stream, buf, count, offset, head);
> > > if (ret)
> > > break;
> > >
> > > if (!(stream->oa_buffer.circ_size % report_size)) {
> > > /* Clear out report id and timestamp to detect unlanded reports */
> > > - oa_report_id_clear(stream, (void *)report);
> > > - oa_timestamp_clear(stream, (void *)report);
> > > + oa_report_id_clear(stream, head);
> > > + oa_timestamp_clear(stream, head);
> > > } else {
> > > - u8 *oa_buf_end = stream->oa_buffer.vaddr + stream->oa_buffer.circ_size;
> > > - u32 part = oa_buf_end - report;
> > > + struct iosys_map *map = &stream->oa_buffer.bo->vmap;
> > > + u32 part = stream->oa_buffer.circ_size - head;
> > >
> > > /* Zero out the entire report */
> > > if (report_size <= part) {
> > > - memset(report, 0, report_size);
> > > + xe_map_memset(stream->oa->xe, map, head, 0, report_size);
> > > } else {
> > > - memset(report, 0, part);
> > > - memset(oa_buf_base, 0, report_size - part);
> > > + xe_map_memset(stream->oa->xe, map, head, 0, part);
> > > + xe_map_memset(stream->oa->xe, map, 0, 0, report_size - part);
> > > }
> > > }
> > > }
> > > @@ -436,7 +444,8 @@ static void xe_oa_init_oa_buffer(struct xe_oa_stream *stream)
> > > spin_unlock_irqrestore(&stream->oa_buffer.ptr_lock, flags);
> > >
> > > /* Zero out the OA buffer since we rely on zero report id and timestamp fields */
> > > - memset(stream->oa_buffer.vaddr, 0, xe_bo_size(stream->oa_buffer.bo));
> > > + xe_map_memset(stream->oa->xe, &stream->oa_buffer.bo->vmap, 0, 0,
> > > + xe_bo_size(stream->oa_buffer.bo));
> > > }
> > >
> > > static u32 __format_to_oactrl(const struct xe_oa_format *format, int counter_sel_mask)
> > > @@ -699,6 +708,7 @@ static int num_lri_dwords(int num_regs)
> > > static void xe_oa_free_oa_buffer(struct xe_oa_stream *stream)
> > > {
> > > xe_bo_unpin_map_no_vm(stream->oa_buffer.bo);
> > > + kfree(stream->oa_buffer.bounce);
> > > }
> > >
> > > static void xe_oa_free_configs(struct xe_oa_stream *stream)
> > > @@ -889,9 +899,16 @@ static int xe_oa_alloc_oa_buffer(struct xe_oa_stream *stream, size_t size)
> > > return PTR_ERR(bo);
> > >
> > > stream->oa_buffer.bo = bo;
> > > +
> > > /* mmap implementation requires OA buffer to be in system memory */
> > > xe_assert(stream->oa->xe, bo->vmap.is_iomem == 0);
> > > - stream->oa_buffer.vaddr = bo->vmap.vaddr;
> > > +
> > > + stream->oa_buffer.bounce = kmalloc(stream->oa_buffer.format->size, GFP_KERNEL);
> > > + if (!stream->oa_buffer.bounce) {
> > > + xe_bo_unpin_map_no_vm(stream->oa_buffer.bo);
> > > + return -ENOMEM;
> > > + }
> > > +
> > > return 0;
> > > }
> > >
> > > diff --git a/drivers/gpu/drm/xe/xe_oa_types.h b/drivers/gpu/drm/xe/xe_oa_types.h
> > > index b03ffd5134834..99a04b8d586b5 100644
> > > --- a/drivers/gpu/drm/xe/xe_oa_types.h
> > > +++ b/drivers/gpu/drm/xe/xe_oa_types.h
> > > @@ -162,8 +162,8 @@ struct xe_oa_buffer {
> > > /** @format: xe_bo backing the OA buffer */
> > > struct xe_bo *bo;
> > >
> > > - /** @vaddr: mapped vaddr of the OA buffer */
> > > - u8 *vaddr;
> > > + /** @bounce: bounce buffer used with xe_map layer */
> > > + void *bounce;
> > >
> > > /** @ptr_lock: Lock protecting reads/writes to head/tail pointers */
> > > spinlock_t ptr_lock;
> > > --
> > > 2.48.1
> > >
^ permalink raw reply [flat|nested] 25+ messages in thread
* Re: [PATCH 1/3] drm/xe/oa: Use xe_map layer
2026-04-25 0:16 ` Dixit, Ashutosh
@ 2026-04-27 18:27 ` Umesh Nerlige Ramappa
2026-04-27 19:04 ` Dixit, Ashutosh
0 siblings, 1 reply; 25+ messages in thread
From: Umesh Nerlige Ramappa @ 2026-04-27 18:27 UTC (permalink / raw)
To: Dixit, Ashutosh; +Cc: intel-xe
On Fri, Apr 24, 2026 at 05:16:48PM -0700, Dixit, Ashutosh wrote:
>On Fri, 24 Apr 2026 16:56:04 -0700, Dixit, Ashutosh wrote:
>>
>> On Fri, 24 Apr 2026 14:05:16 -0700, Umesh Nerlige Ramappa wrote:
>> >
>>
>> Hi Umesh,
>>
>> > On Tue, Apr 14, 2026 at 07:03:40PM -0700, Ashutosh Dixit wrote:
>> > > OA code should have used xe_map layer to begin with. In CRI, the OA buffer
>> > > can be located both in system and device memory. For these reasons, move OA
>> > > code to use the xe_map layer when accessing the OA buffer.
>> > >
>> > > v2: Use xe_map layer and put_user in xe_oa_copy_to_user (Umesh)
>> > > v3: To avoid performance impact in v2, revert to v1 but move
>> > > xe_map_copy_to_user() to xe_map.h
>> > > v4: Use bounce buffer and copy_to_user in xe_oa_copy_to_user
>> > >
>> > > Signed-off-by: Ashutosh Dixit <ashutosh.dixit@intel.com>
>> > > ---
>> > > drivers/gpu/drm/xe/xe_oa.c | 95 +++++++++++++++++++-------------
>> > > drivers/gpu/drm/xe/xe_oa_types.h | 4 +-
>> > > 2 files changed, 58 insertions(+), 41 deletions(-)
>> > >
>> > > diff --git a/drivers/gpu/drm/xe/xe_oa.c b/drivers/gpu/drm/xe/xe_oa.c
>> > > index 6337e671c97ae..4d13d8ef8103f 100644
>> > > --- a/drivers/gpu/drm/xe/xe_oa.c
>> > > +++ b/drivers/gpu/drm/xe/xe_oa.c
>> > > @@ -213,32 +213,40 @@ static u32 xe_oa_hw_tail_read(struct xe_oa_stream *stream)
>> > > #define oa_report_header_64bit(__s) \
>> > > ((__s)->oa_buffer.format->header == HDR_64_BIT)
>> > >
>> > > -static u64 oa_report_id(struct xe_oa_stream *stream, void *report)
>> > > +static u64 oa_report_id(struct xe_oa_stream *stream, u32 tail)
>> > > {
>> > > - return oa_report_header_64bit(stream) ? *(u64 *)report : *(u32 *)report;
>> > > + struct iosys_map *map = &stream->oa_buffer.bo->vmap;
>> > > +
>> > > + return oa_report_header_64bit(stream) ?
>> > > + xe_map_rd(stream->oa->xe, map, tail, u64) :
>> > > + xe_map_rd(stream->oa->xe, map, tail, u32);
>> > > }
>> >
>> > (1) The helper is just returning the report id for a report passed. In this
>> > case it is tail, but the code would work for any report. I would keep the
>> > name as report or report_offset. Let the caller use specific names like
>> > tail.
>> >
>> > >
>> > > -static void oa_report_id_clear(struct xe_oa_stream *stream, u32 *report)
>> > > +static void oa_report_id_clear(struct xe_oa_stream *stream, u32 head)
>> > > {
>> > > - if (oa_report_header_64bit(stream))
>> > > - *(u64 *)report = 0;
>> > > - else
>> > > - *report = 0;
>> > > + struct iosys_map *map = &stream->oa_buffer.bo->vmap;
>> > > +
>> > > + oa_report_header_64bit(stream) ?
>> > > + xe_map_wr(stream->oa->xe, map, head, u64, 0) :
>> > > + xe_map_wr(stream->oa->xe, map, head, u32, 0);
>> > > }
>> > >
>> > > -static u64 oa_timestamp(struct xe_oa_stream *stream, void *report)
>> > > +static u64 oa_timestamp(struct xe_oa_stream *stream, u32 tail)
>> > > {
>> > > + struct iosys_map *map = &stream->oa_buffer.bo->vmap;
>> > > +
>> > > return oa_report_header_64bit(stream) ?
>> > > - *((u64 *)report + 1) :
>> > > - *((u32 *)report + 1);
>> > > + xe_map_rd(stream->oa->xe, map, tail + 2, u64) :
>> > > + xe_map_rd(stream->oa->xe, map, tail + 1, u32);
>> >
>> > (2) I think this should be tail + 8 and tail + 4. Can you please double
>> > check? The u32/u64 is only being used to access the data at the offset
>> > rather than determine how to calculate the actual offset.
>>
>> Hmm, you are right!!! I don't believe I made such a basic mistake :/ And
>> all the xe_oa tests work fine even with this.
>>
>> Anyway thanks for catching this, I'm fixing this in the next rev.
>>
>> >
>> > > }
>> > >
>> > > -static void oa_timestamp_clear(struct xe_oa_stream *stream, u32 *report)
>> > > +static void oa_timestamp_clear(struct xe_oa_stream *stream, u32 head)
>> > > {
>> > > - if (oa_report_header_64bit(stream))
>> > > - *(u64 *)&report[2] = 0;
>> > > - else
>> > > - report[1] = 0;
>> > > + struct iosys_map *map = &stream->oa_buffer.bo->vmap;
>> > > +
>> > > + oa_report_header_64bit(stream) ?
>> > > + xe_map_wr(stream->oa->xe, map, head + 2, u64, 0) :
>> > > + xe_map_wr(stream->oa->xe, map, head + 1, u32, 0);
>> >
>> > Same here - comment (2)
>> >
>> > > }
>> >
>> > Same comment (1) for all the above helpers - the name can be generic -
>> > report_offset. I see similar change in other functions below as well. If
>> > you think it is necessary, I suggest trying to move it to a separate patch
>> > with commit message explaining why you want to rename them. That would also
>> > help review this patch easily.
>>
>> So, as we know HW writes to tail and SW reads from head. So the helpers
>> with head as the arg are invoked from the read side only and the helpers
>> with tail as the arg are invoked from the write side only. That was the
>> reason for naming the arg as head/tail, rather than just "offset". I
>> thought it will make the code easier to understand and also maybe prevent
>> mistakes in the future.
To me, the report_offset (or offset) is clearer. When I read the code I
would remember this helper as returning report_id/timestamp given any
report offset as an argument. The tail/head names makes me wonder if the
caller has to be accounted for as well.
Anyways, please add the above description you have to the commit message
and you can leave it as is.
Thanks,
Umesh
>>
>> So anyway, let me ask once again if you think I should change it. Of
>> course, there's no problem changing the arg name in the helpers.
>>
>> I'll also see if it's possible to split the patch some way to make the
>> review a bit easier.
>
>I just sent out v5 in which I have let all this stay as is. Anyway, please
>let me know what you prefer. Thanks.
>
>>
>> Thanks.
>> --
>> Ashutosh
>>
>>
>> >
>> > >
>> > > static bool xe_oa_buffer_check_unlocked(struct xe_oa_stream *stream)
>> > > @@ -275,9 +283,7 @@ static bool xe_oa_buffer_check_unlocked(struct xe_oa_stream *stream)
>> > > * they were written. If not : (╯°□°)╯︵ ┻━┻
>> > > */
>> > > while (xe_oa_circ_diff(stream, tail, stream->oa_buffer.tail) >= report_size) {
>> > > - void *report = stream->oa_buffer.vaddr + tail;
>> > > -
>> > > - if (oa_report_id(stream, report) || oa_timestamp(stream, report))
>> > > + if (oa_report_id(stream, tail) || oa_timestamp(stream, tail))
>> > > break;
>> > >
>> > > tail = xe_oa_circ_diff(stream, tail, report_size);
>> > > @@ -311,30 +317,35 @@ static enum hrtimer_restart xe_oa_poll_check_timer_cb(struct hrtimer *hrtimer)
>> > > return HRTIMER_RESTART;
>> > > }
>> > >
>> > > +static unsigned long
>> > > +xe_oa_copy_to_user(struct xe_oa_stream *stream, void __user *dst, u32 head, u32 len)
>> > > +{
>> > > + xe_map_memcpy_from(stream->oa->xe, stream->oa_buffer.bounce,
>> > > + &stream->oa_buffer.bo->vmap, head, len);
>> > > + return copy_to_user(dst, stream->oa_buffer.bounce, len);
>> > > +}
>> > > +
>> >
>> > Still need to look at the patch below this point. Will send that out next
>> > week...
>> >
>> > Regards,
>> > Umesh
>> >
>> > > static int xe_oa_append_report(struct xe_oa_stream *stream, char __user *buf,
>> > > - size_t count, size_t *offset, const u8 *report)
>> > > + size_t count, size_t *offset, u32 head)
>> > > {
>> > > int report_size = stream->oa_buffer.format->size;
>> > > int report_size_partial;
>> > > - u8 *oa_buf_end;
>> > >
>> > > if ((count - *offset) < report_size)
>> > > return -ENOSPC;
>> > >
>> > > buf += *offset;
>> > >
>> > > - oa_buf_end = stream->oa_buffer.vaddr + stream->oa_buffer.circ_size;
>> > > - report_size_partial = oa_buf_end - report;
>> > > + report_size_partial = stream->oa_buffer.circ_size - head;
>> > >
>> > > if (report_size_partial < report_size) {
>> > > - if (copy_to_user(buf, report, report_size_partial))
>> > > + if (xe_oa_copy_to_user(stream, buf, head, report_size_partial))
>> > > return -EFAULT;
>> > > buf += report_size_partial;
>> > >
>> > > - if (copy_to_user(buf, stream->oa_buffer.vaddr,
>> > > - report_size - report_size_partial))
>> > > + if (xe_oa_copy_to_user(stream, buf, 0, report_size - report_size_partial))
>> > > return -EFAULT;
>> > > - } else if (copy_to_user(buf, report, report_size)) {
>> > > + } else if (xe_oa_copy_to_user(stream, buf, head, report_size)) {
>> > > return -EFAULT;
>> > > }
>> > >
>> > > @@ -347,7 +358,6 @@ static int xe_oa_append_reports(struct xe_oa_stream *stream, char __user *buf,
>> > > size_t count, size_t *offset)
>> > > {
>> > > int report_size = stream->oa_buffer.format->size;
>> > > - u8 *oa_buf_base = stream->oa_buffer.vaddr;
>> > > u32 gtt_offset = xe_bo_ggtt_addr(stream->oa_buffer.bo);
>> > > size_t start_offset = *offset;
>> > > unsigned long flags;
>> > > @@ -364,26 +374,24 @@ static int xe_oa_append_reports(struct xe_oa_stream *stream, char __user *buf,
>> > >
>> > > for (; xe_oa_circ_diff(stream, tail, head);
>> > > head = xe_oa_circ_incr(stream, head, report_size)) {
>> > > - u8 *report = oa_buf_base + head;
>> > > -
>> > > - ret = xe_oa_append_report(stream, buf, count, offset, report);
>> > > + ret = xe_oa_append_report(stream, buf, count, offset, head);
>> > > if (ret)
>> > > break;
>> > >
>> > > if (!(stream->oa_buffer.circ_size % report_size)) {
>> > > /* Clear out report id and timestamp to detect unlanded reports */
>> > > - oa_report_id_clear(stream, (void *)report);
>> > > - oa_timestamp_clear(stream, (void *)report);
>> > > + oa_report_id_clear(stream, head);
>> > > + oa_timestamp_clear(stream, head);
>> > > } else {
>> > > - u8 *oa_buf_end = stream->oa_buffer.vaddr + stream->oa_buffer.circ_size;
>> > > - u32 part = oa_buf_end - report;
>> > > + struct iosys_map *map = &stream->oa_buffer.bo->vmap;
>> > > + u32 part = stream->oa_buffer.circ_size - head;
>> > >
>> > > /* Zero out the entire report */
>> > > if (report_size <= part) {
>> > > - memset(report, 0, report_size);
>> > > + xe_map_memset(stream->oa->xe, map, head, 0, report_size);
>> > > } else {
>> > > - memset(report, 0, part);
>> > > - memset(oa_buf_base, 0, report_size - part);
>> > > + xe_map_memset(stream->oa->xe, map, head, 0, part);
>> > > + xe_map_memset(stream->oa->xe, map, 0, 0, report_size - part);
>> > > }
>> > > }
>> > > }
>> > > @@ -436,7 +444,8 @@ static void xe_oa_init_oa_buffer(struct xe_oa_stream *stream)
>> > > spin_unlock_irqrestore(&stream->oa_buffer.ptr_lock, flags);
>> > >
>> > > /* Zero out the OA buffer since we rely on zero report id and timestamp fields */
>> > > - memset(stream->oa_buffer.vaddr, 0, xe_bo_size(stream->oa_buffer.bo));
>> > > + xe_map_memset(stream->oa->xe, &stream->oa_buffer.bo->vmap, 0, 0,
>> > > + xe_bo_size(stream->oa_buffer.bo));
>> > > }
>> > >
>> > > static u32 __format_to_oactrl(const struct xe_oa_format *format, int counter_sel_mask)
>> > > @@ -699,6 +708,7 @@ static int num_lri_dwords(int num_regs)
>> > > static void xe_oa_free_oa_buffer(struct xe_oa_stream *stream)
>> > > {
>> > > xe_bo_unpin_map_no_vm(stream->oa_buffer.bo);
>> > > + kfree(stream->oa_buffer.bounce);
>> > > }
>> > >
>> > > static void xe_oa_free_configs(struct xe_oa_stream *stream)
>> > > @@ -889,9 +899,16 @@ static int xe_oa_alloc_oa_buffer(struct xe_oa_stream *stream, size_t size)
>> > > return PTR_ERR(bo);
>> > >
>> > > stream->oa_buffer.bo = bo;
>> > > +
>> > > /* mmap implementation requires OA buffer to be in system memory */
>> > > xe_assert(stream->oa->xe, bo->vmap.is_iomem == 0);
>> > > - stream->oa_buffer.vaddr = bo->vmap.vaddr;
>> > > +
>> > > + stream->oa_buffer.bounce = kmalloc(stream->oa_buffer.format->size, GFP_KERNEL);
>> > > + if (!stream->oa_buffer.bounce) {
>> > > + xe_bo_unpin_map_no_vm(stream->oa_buffer.bo);
>> > > + return -ENOMEM;
>> > > + }
>> > > +
>> > > return 0;
>> > > }
>> > >
>> > > diff --git a/drivers/gpu/drm/xe/xe_oa_types.h b/drivers/gpu/drm/xe/xe_oa_types.h
>> > > index b03ffd5134834..99a04b8d586b5 100644
>> > > --- a/drivers/gpu/drm/xe/xe_oa_types.h
>> > > +++ b/drivers/gpu/drm/xe/xe_oa_types.h
>> > > @@ -162,8 +162,8 @@ struct xe_oa_buffer {
>> > > /** @format: xe_bo backing the OA buffer */
>> > > struct xe_bo *bo;
>> > >
>> > > - /** @vaddr: mapped vaddr of the OA buffer */
>> > > - u8 *vaddr;
>> > > + /** @bounce: bounce buffer used with xe_map layer */
>> > > + void *bounce;
>> > >
>> > > /** @ptr_lock: Lock protecting reads/writes to head/tail pointers */
>> > > spinlock_t ptr_lock;
>> > > --
>> > > 2.48.1
>> > >
^ permalink raw reply [flat|nested] 25+ messages in thread
* [PATCH v6 0/3] drm/xe/oa: Wa_14026633728
@ 2026-04-27 19:02 Ashutosh Dixit
2026-04-27 19:02 ` [PATCH 1/3] drm/xe/oa: Use xe_map layer Ashutosh Dixit
` (2 more replies)
0 siblings, 3 replies; 25+ messages in thread
From: Ashutosh Dixit @ 2026-04-27 19:02 UTC (permalink / raw)
To: intel-xe; +Cc: Umesh Nerlige Ramappa
v6 though v2: Change to Patch 1 (see changelog in Patch 1)
Ashutosh Dixit (3):
drm/xe/oa: Use xe_map layer
drm/xe/oa: Use drm_gem_mmap_obj for OA buffer mmap
drm/xe/oa: Implement Wa_14026633728
drivers/gpu/drm/xe/xe_oa.c | 123 ++++++++++++++++-------------
drivers/gpu/drm/xe/xe_oa_types.h | 4 +-
drivers/gpu/drm/xe/xe_wa_oob.rules | 1 +
3 files changed, 70 insertions(+), 58 deletions(-)
--
2.54.0
^ permalink raw reply [flat|nested] 25+ messages in thread
* [PATCH 1/3] drm/xe/oa: Use xe_map layer
2026-04-27 19:02 [PATCH v6 0/3] drm/xe/oa: Wa_14026633728 Ashutosh Dixit
@ 2026-04-27 19:02 ` Ashutosh Dixit
2026-04-27 19:34 ` Umesh Nerlige Ramappa
2026-04-27 19:02 ` [PATCH 2/3] drm/xe/oa: Use drm_gem_mmap_obj for OA buffer mmap Ashutosh Dixit
2026-04-27 19:02 ` [PATCH 3/3] drm/xe/oa: Implement Wa_14026633728 Ashutosh Dixit
2 siblings, 1 reply; 25+ messages in thread
From: Ashutosh Dixit @ 2026-04-27 19:02 UTC (permalink / raw)
To: intel-xe; +Cc: Umesh Nerlige Ramappa
OA code should have used xe_map layer to begin with. In CRI, the OA buffer
can be located both in system and device memory. For these reasons, move OA
code to use the xe_map layer when accessing the OA buffer.
v2: Use xe_map layer and put_user in xe_oa_copy_to_user (Umesh)
v3: To avoid performance impact in v2, revert to v1 but move
xe_map_copy_to_user() to xe_map.h
v4: Use bounce buffer and copy_to_user in xe_oa_copy_to_user
v5: Fix offsets in oa_timestamp() and oa_timestamp_clear() (Umesh)
v6: Rename head/tail in helper args to report_offset (Umesh)
Signed-off-by: Ashutosh Dixit <ashutosh.dixit@intel.com>
---
drivers/gpu/drm/xe/xe_oa.c | 95 +++++++++++++++++++-------------
drivers/gpu/drm/xe/xe_oa_types.h | 4 +-
2 files changed, 58 insertions(+), 41 deletions(-)
diff --git a/drivers/gpu/drm/xe/xe_oa.c b/drivers/gpu/drm/xe/xe_oa.c
index 6337e671c97ae..1fff0e8e9e78e 100644
--- a/drivers/gpu/drm/xe/xe_oa.c
+++ b/drivers/gpu/drm/xe/xe_oa.c
@@ -213,32 +213,40 @@ static u32 xe_oa_hw_tail_read(struct xe_oa_stream *stream)
#define oa_report_header_64bit(__s) \
((__s)->oa_buffer.format->header == HDR_64_BIT)
-static u64 oa_report_id(struct xe_oa_stream *stream, void *report)
+static u64 oa_report_id(struct xe_oa_stream *stream, u32 report_offset)
{
- return oa_report_header_64bit(stream) ? *(u64 *)report : *(u32 *)report;
+ struct iosys_map *map = &stream->oa_buffer.bo->vmap;
+
+ return oa_report_header_64bit(stream) ?
+ xe_map_rd(stream->oa->xe, map, report_offset, u64) :
+ xe_map_rd(stream->oa->xe, map, report_offset, u32);
}
-static void oa_report_id_clear(struct xe_oa_stream *stream, u32 *report)
+static void oa_report_id_clear(struct xe_oa_stream *stream, u32 report_offset)
{
- if (oa_report_header_64bit(stream))
- *(u64 *)report = 0;
- else
- *report = 0;
+ struct iosys_map *map = &stream->oa_buffer.bo->vmap;
+
+ oa_report_header_64bit(stream) ?
+ xe_map_wr(stream->oa->xe, map, report_offset, u64, 0) :
+ xe_map_wr(stream->oa->xe, map, report_offset, u32, 0);
}
-static u64 oa_timestamp(struct xe_oa_stream *stream, void *report)
+static u64 oa_timestamp(struct xe_oa_stream *stream, u32 report_offset)
{
+ struct iosys_map *map = &stream->oa_buffer.bo->vmap;
+
return oa_report_header_64bit(stream) ?
- *((u64 *)report + 1) :
- *((u32 *)report + 1);
+ xe_map_rd(stream->oa->xe, map, report_offset + 8, u64) :
+ xe_map_rd(stream->oa->xe, map, report_offset + 4, u32);
}
-static void oa_timestamp_clear(struct xe_oa_stream *stream, u32 *report)
+static void oa_timestamp_clear(struct xe_oa_stream *stream, u32 report_offset)
{
- if (oa_report_header_64bit(stream))
- *(u64 *)&report[2] = 0;
- else
- report[1] = 0;
+ struct iosys_map *map = &stream->oa_buffer.bo->vmap;
+
+ oa_report_header_64bit(stream) ?
+ xe_map_wr(stream->oa->xe, map, report_offset + 8, u64, 0) :
+ xe_map_wr(stream->oa->xe, map, report_offset + 4, u32, 0);
}
static bool xe_oa_buffer_check_unlocked(struct xe_oa_stream *stream)
@@ -275,9 +283,7 @@ static bool xe_oa_buffer_check_unlocked(struct xe_oa_stream *stream)
* they were written. If not : (╯°□°)╯︵ ┻━┻
*/
while (xe_oa_circ_diff(stream, tail, stream->oa_buffer.tail) >= report_size) {
- void *report = stream->oa_buffer.vaddr + tail;
-
- if (oa_report_id(stream, report) || oa_timestamp(stream, report))
+ if (oa_report_id(stream, tail) || oa_timestamp(stream, tail))
break;
tail = xe_oa_circ_diff(stream, tail, report_size);
@@ -311,30 +317,35 @@ static enum hrtimer_restart xe_oa_poll_check_timer_cb(struct hrtimer *hrtimer)
return HRTIMER_RESTART;
}
+static unsigned long
+xe_oa_copy_to_user(struct xe_oa_stream *stream, void __user *dst, u32 report_offset, u32 len)
+{
+ xe_map_memcpy_from(stream->oa->xe, stream->oa_buffer.bounce,
+ &stream->oa_buffer.bo->vmap, report_offset, len);
+ return copy_to_user(dst, stream->oa_buffer.bounce, len);
+}
+
static int xe_oa_append_report(struct xe_oa_stream *stream, char __user *buf,
- size_t count, size_t *offset, const u8 *report)
+ size_t count, size_t *offset, u32 report_offset)
{
int report_size = stream->oa_buffer.format->size;
int report_size_partial;
- u8 *oa_buf_end;
if ((count - *offset) < report_size)
return -ENOSPC;
buf += *offset;
- oa_buf_end = stream->oa_buffer.vaddr + stream->oa_buffer.circ_size;
- report_size_partial = oa_buf_end - report;
+ report_size_partial = stream->oa_buffer.circ_size - report_offset;
if (report_size_partial < report_size) {
- if (copy_to_user(buf, report, report_size_partial))
+ if (xe_oa_copy_to_user(stream, buf, report_offset, report_size_partial))
return -EFAULT;
buf += report_size_partial;
- if (copy_to_user(buf, stream->oa_buffer.vaddr,
- report_size - report_size_partial))
+ if (xe_oa_copy_to_user(stream, buf, 0, report_size - report_size_partial))
return -EFAULT;
- } else if (copy_to_user(buf, report, report_size)) {
+ } else if (xe_oa_copy_to_user(stream, buf, report_offset, report_size)) {
return -EFAULT;
}
@@ -347,7 +358,6 @@ static int xe_oa_append_reports(struct xe_oa_stream *stream, char __user *buf,
size_t count, size_t *offset)
{
int report_size = stream->oa_buffer.format->size;
- u8 *oa_buf_base = stream->oa_buffer.vaddr;
u32 gtt_offset = xe_bo_ggtt_addr(stream->oa_buffer.bo);
size_t start_offset = *offset;
unsigned long flags;
@@ -364,26 +374,24 @@ static int xe_oa_append_reports(struct xe_oa_stream *stream, char __user *buf,
for (; xe_oa_circ_diff(stream, tail, head);
head = xe_oa_circ_incr(stream, head, report_size)) {
- u8 *report = oa_buf_base + head;
-
- ret = xe_oa_append_report(stream, buf, count, offset, report);
+ ret = xe_oa_append_report(stream, buf, count, offset, head);
if (ret)
break;
if (!(stream->oa_buffer.circ_size % report_size)) {
/* Clear out report id and timestamp to detect unlanded reports */
- oa_report_id_clear(stream, (void *)report);
- oa_timestamp_clear(stream, (void *)report);
+ oa_report_id_clear(stream, head);
+ oa_timestamp_clear(stream, head);
} else {
- u8 *oa_buf_end = stream->oa_buffer.vaddr + stream->oa_buffer.circ_size;
- u32 part = oa_buf_end - report;
+ struct iosys_map *map = &stream->oa_buffer.bo->vmap;
+ u32 part = stream->oa_buffer.circ_size - head;
/* Zero out the entire report */
if (report_size <= part) {
- memset(report, 0, report_size);
+ xe_map_memset(stream->oa->xe, map, head, 0, report_size);
} else {
- memset(report, 0, part);
- memset(oa_buf_base, 0, report_size - part);
+ xe_map_memset(stream->oa->xe, map, head, 0, part);
+ xe_map_memset(stream->oa->xe, map, 0, 0, report_size - part);
}
}
}
@@ -436,7 +444,8 @@ static void xe_oa_init_oa_buffer(struct xe_oa_stream *stream)
spin_unlock_irqrestore(&stream->oa_buffer.ptr_lock, flags);
/* Zero out the OA buffer since we rely on zero report id and timestamp fields */
- memset(stream->oa_buffer.vaddr, 0, xe_bo_size(stream->oa_buffer.bo));
+ xe_map_memset(stream->oa->xe, &stream->oa_buffer.bo->vmap, 0, 0,
+ xe_bo_size(stream->oa_buffer.bo));
}
static u32 __format_to_oactrl(const struct xe_oa_format *format, int counter_sel_mask)
@@ -699,6 +708,7 @@ static int num_lri_dwords(int num_regs)
static void xe_oa_free_oa_buffer(struct xe_oa_stream *stream)
{
xe_bo_unpin_map_no_vm(stream->oa_buffer.bo);
+ kfree(stream->oa_buffer.bounce);
}
static void xe_oa_free_configs(struct xe_oa_stream *stream)
@@ -889,9 +899,16 @@ static int xe_oa_alloc_oa_buffer(struct xe_oa_stream *stream, size_t size)
return PTR_ERR(bo);
stream->oa_buffer.bo = bo;
+
/* mmap implementation requires OA buffer to be in system memory */
xe_assert(stream->oa->xe, bo->vmap.is_iomem == 0);
- stream->oa_buffer.vaddr = bo->vmap.vaddr;
+
+ stream->oa_buffer.bounce = kmalloc(stream->oa_buffer.format->size, GFP_KERNEL);
+ if (!stream->oa_buffer.bounce) {
+ xe_bo_unpin_map_no_vm(stream->oa_buffer.bo);
+ return -ENOMEM;
+ }
+
return 0;
}
diff --git a/drivers/gpu/drm/xe/xe_oa_types.h b/drivers/gpu/drm/xe/xe_oa_types.h
index 8906c3084b5f8..3d9ec8490899c 100644
--- a/drivers/gpu/drm/xe/xe_oa_types.h
+++ b/drivers/gpu/drm/xe/xe_oa_types.h
@@ -164,8 +164,8 @@ struct xe_oa_buffer {
/** @bo: xe_bo backing the OA buffer */
struct xe_bo *bo;
- /** @vaddr: mapped vaddr of the OA buffer */
- u8 *vaddr;
+ /** @bounce: bounce buffer used with xe_map layer */
+ void *bounce;
/** @ptr_lock: Lock protecting reads/writes to head/tail pointers */
spinlock_t ptr_lock;
--
2.54.0
^ permalink raw reply related [flat|nested] 25+ messages in thread
* [PATCH 2/3] drm/xe/oa: Use drm_gem_mmap_obj for OA buffer mmap
2026-04-27 19:02 [PATCH v6 0/3] drm/xe/oa: Wa_14026633728 Ashutosh Dixit
2026-04-27 19:02 ` [PATCH 1/3] drm/xe/oa: Use xe_map layer Ashutosh Dixit
@ 2026-04-27 19:02 ` Ashutosh Dixit
2026-04-27 19:02 ` [PATCH 3/3] drm/xe/oa: Implement Wa_14026633728 Ashutosh Dixit
2 siblings, 0 replies; 25+ messages in thread
From: Ashutosh Dixit @ 2026-04-27 19:02 UTC (permalink / raw)
To: intel-xe; +Cc: Umesh Nerlige Ramappa
OA buffer mmap can currently only mmap OA buffer in system memory. CRI
MERTOA buffer can be located in device memory. Switch OA buffer mmap to
using drm_gem_mmap_obj, which can handle mmap's of both system and device
memory buffers.
Signed-off-by: Ashutosh Dixit <ashutosh.dixit@intel.com>
Reviewed-by: Umesh Nerlige Ramappa <umesh.nerlige.ramappa@intel.com>
---
drivers/gpu/drm/xe/xe_oa.c | 20 +++-----------------
1 file changed, 3 insertions(+), 17 deletions(-)
diff --git a/drivers/gpu/drm/xe/xe_oa.c b/drivers/gpu/drm/xe/xe_oa.c
index 1fff0e8e9e78e..d3994ae8bc82d 100644
--- a/drivers/gpu/drm/xe/xe_oa.c
+++ b/drivers/gpu/drm/xe/xe_oa.c
@@ -9,6 +9,7 @@
#include <linux/poll.h>
#include <drm/drm_drv.h>
+#include <drm/drm_gem.h>
#include <drm/drm_managed.h>
#include <drm/drm_syncobj.h>
#include <uapi/drm/xe_drm.h>
@@ -900,9 +901,6 @@ static int xe_oa_alloc_oa_buffer(struct xe_oa_stream *stream, size_t size)
stream->oa_buffer.bo = bo;
- /* mmap implementation requires OA buffer to be in system memory */
- xe_assert(stream->oa->xe, bo->vmap.is_iomem == 0);
-
stream->oa_buffer.bounce = kmalloc(stream->oa_buffer.format->size, GFP_KERNEL);
if (!stream->oa_buffer.bounce) {
xe_bo_unpin_map_no_vm(stream->oa_buffer.bo);
@@ -1690,8 +1688,6 @@ static int xe_oa_mmap(struct file *file, struct vm_area_struct *vma)
{
struct xe_oa_stream *stream = file->private_data;
struct xe_bo *bo = stream->oa_buffer.bo;
- unsigned long start = vma->vm_start;
- int i, ret;
if (xe_observation_paranoid && !perfmon_capable()) {
drm_dbg(&stream->oa->xe->drm, "Insufficient privilege to map OA buffer\n");
@@ -1699,7 +1695,7 @@ static int xe_oa_mmap(struct file *file, struct vm_area_struct *vma)
}
/* Can mmap the entire OA buffer or nothing (no partial OA buffer mmaps) */
- if (vma->vm_end - vma->vm_start != xe_bo_size(stream->oa_buffer.bo)) {
+ if (vma->vm_end - vma->vm_start != xe_bo_size(bo)) {
drm_dbg(&stream->oa->xe->drm, "Wrong mmap size, must be OA buffer size\n");
return -EINVAL;
}
@@ -1715,17 +1711,7 @@ static int xe_oa_mmap(struct file *file, struct vm_area_struct *vma)
vm_flags_mod(vma, VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP | VM_DONTCOPY,
VM_MAYWRITE | VM_MAYEXEC);
- xe_assert(stream->oa->xe, bo->ttm.ttm->num_pages == vma_pages(vma));
- for (i = 0; i < bo->ttm.ttm->num_pages; i++) {
- ret = remap_pfn_range(vma, start, page_to_pfn(bo->ttm.ttm->pages[i]),
- PAGE_SIZE, vma->vm_page_prot);
- if (ret)
- break;
-
- start += PAGE_SIZE;
- }
-
- return ret;
+ return drm_gem_mmap_obj(&bo->ttm.base, xe_bo_size(bo), vma);
}
static const struct file_operations xe_oa_fops = {
--
2.54.0
^ permalink raw reply related [flat|nested] 25+ messages in thread
* [PATCH 3/3] drm/xe/oa: Implement Wa_14026633728
2026-04-27 19:02 [PATCH v6 0/3] drm/xe/oa: Wa_14026633728 Ashutosh Dixit
2026-04-27 19:02 ` [PATCH 1/3] drm/xe/oa: Use xe_map layer Ashutosh Dixit
2026-04-27 19:02 ` [PATCH 2/3] drm/xe/oa: Use drm_gem_mmap_obj for OA buffer mmap Ashutosh Dixit
@ 2026-04-27 19:02 ` Ashutosh Dixit
2 siblings, 0 replies; 25+ messages in thread
From: Ashutosh Dixit @ 2026-04-27 19:02 UTC (permalink / raw)
To: intel-xe; +Cc: Umesh Nerlige Ramappa
CRI Wa_14026633728 requires MERTOA buffer to be allocated in device memory,
not system memory (which is the default for OA buffers).
Signed-off-by: Ashutosh Dixit <ashutosh.dixit@intel.com>
Reviewed-by: Umesh Nerlige Ramappa <umesh.nerlige.ramappa@intel.com>
---
drivers/gpu/drm/xe/xe_oa.c | 10 +++++++++-
drivers/gpu/drm/xe/xe_wa_oob.rules | 1 +
2 files changed, 10 insertions(+), 1 deletion(-)
diff --git a/drivers/gpu/drm/xe/xe_oa.c b/drivers/gpu/drm/xe/xe_oa.c
index d3994ae8bc82d..b988b02c661c7 100644
--- a/drivers/gpu/drm/xe/xe_oa.c
+++ b/drivers/gpu/drm/xe/xe_oa.c
@@ -250,6 +250,11 @@ static void oa_timestamp_clear(struct xe_oa_stream *stream, u32 report_offset)
xe_map_wr(stream->oa->xe, map, report_offset + 4, u32, 0);
}
+static bool mert_wa_14026633728(struct xe_oa_stream *s)
+{
+ return s->oa_unit->type == DRM_XE_OA_UNIT_TYPE_MERT && XE_GT_WA(s->gt, 14026633728);
+}
+
static bool xe_oa_buffer_check_unlocked(struct xe_oa_stream *stream)
{
u32 gtt_offset = xe_bo_ggtt_addr(stream->oa_buffer.bo);
@@ -891,11 +896,14 @@ static void xe_oa_stream_destroy(struct xe_oa_stream *stream)
static int xe_oa_alloc_oa_buffer(struct xe_oa_stream *stream, size_t size)
{
+ u32 vram = mert_wa_14026633728(stream) ?
+ XE_BO_FLAG_VRAM_IF_DGFX(xe_device_get_root_tile(stream->oa->xe)) :
+ XE_BO_FLAG_SYSTEM;
struct xe_bo *bo;
bo = xe_bo_create_pin_map_novm(stream->oa->xe, stream->gt->tile,
size, ttm_bo_type_kernel,
- XE_BO_FLAG_SYSTEM | XE_BO_FLAG_GGTT, false);
+ vram | XE_BO_FLAG_GGTT, false);
if (IS_ERR(bo))
return PTR_ERR(bo);
diff --git a/drivers/gpu/drm/xe/xe_wa_oob.rules b/drivers/gpu/drm/xe/xe_wa_oob.rules
index f8a185103b805..a7c1bd9bcb943 100644
--- a/drivers/gpu/drm/xe/xe_wa_oob.rules
+++ b/drivers/gpu/drm/xe/xe_wa_oob.rules
@@ -65,3 +65,4 @@
14025883347 MEDIA_VERSION_RANGE(1301, 3503)
GRAPHICS_VERSION_RANGE(2004, 3005)
+14026633728 PLATFORM(CRESCENTISLAND)
--
2.54.0
^ permalink raw reply related [flat|nested] 25+ messages in thread
* Re: [PATCH 1/3] drm/xe/oa: Use xe_map layer
2026-04-27 18:27 ` Umesh Nerlige Ramappa
@ 2026-04-27 19:04 ` Dixit, Ashutosh
0 siblings, 0 replies; 25+ messages in thread
From: Dixit, Ashutosh @ 2026-04-27 19:04 UTC (permalink / raw)
To: Umesh Nerlige Ramappa; +Cc: intel-xe
On Mon, 27 Apr 2026 11:27:59 -0700, Umesh Nerlige Ramappa wrote:
>
> On Fri, Apr 24, 2026 at 05:16:48PM -0700, Dixit, Ashutosh wrote:
> > On Fri, 24 Apr 2026 16:56:04 -0700, Dixit, Ashutosh wrote:
> >>
> >> On Fri, 24 Apr 2026 14:05:16 -0700, Umesh Nerlige Ramappa wrote:
> >> >
> >>
> >> Hi Umesh,
> >>
> >> > On Tue, Apr 14, 2026 at 07:03:40PM -0700, Ashutosh Dixit wrote:
> >> > > OA code should have used xe_map layer to begin with. In CRI, the OA buffer
> >> > > can be located both in system and device memory. For these reasons, move OA
> >> > > code to use the xe_map layer when accessing the OA buffer.
> >> > >
> >> > > v2: Use xe_map layer and put_user in xe_oa_copy_to_user (Umesh)
> >> > > v3: To avoid performance impact in v2, revert to v1 but move
> >> > > xe_map_copy_to_user() to xe_map.h
> >> > > v4: Use bounce buffer and copy_to_user in xe_oa_copy_to_user
> >> > >
> >> > > Signed-off-by: Ashutosh Dixit <ashutosh.dixit@intel.com>
> >> > > ---
> >> > > drivers/gpu/drm/xe/xe_oa.c | 95 +++++++++++++++++++-------------
> >> > > drivers/gpu/drm/xe/xe_oa_types.h | 4 +-
> >> > > 2 files changed, 58 insertions(+), 41 deletions(-)
> >> > >
> >> > > diff --git a/drivers/gpu/drm/xe/xe_oa.c b/drivers/gpu/drm/xe/xe_oa.c
> >> > > index 6337e671c97ae..4d13d8ef8103f 100644
> >> > > --- a/drivers/gpu/drm/xe/xe_oa.c
> >> > > +++ b/drivers/gpu/drm/xe/xe_oa.c
> >> > > @@ -213,32 +213,40 @@ static u32 xe_oa_hw_tail_read(struct xe_oa_stream *stream)
> >> > > #define oa_report_header_64bit(__s) \
> >> > > ((__s)->oa_buffer.format->header == HDR_64_BIT)
> >> > >
> >> > > -static u64 oa_report_id(struct xe_oa_stream *stream, void *report)
> >> > > +static u64 oa_report_id(struct xe_oa_stream *stream, u32 tail)
> >> > > {
> >> > > - return oa_report_header_64bit(stream) ? *(u64 *)report : *(u32 *)report;
> >> > > + struct iosys_map *map = &stream->oa_buffer.bo->vmap;
> >> > > +
> >> > > + return oa_report_header_64bit(stream) ?
> >> > > + xe_map_rd(stream->oa->xe, map, tail, u64) :
> >> > > + xe_map_rd(stream->oa->xe, map, tail, u32);
> >> > > }
> >> >
> >> > (1) The helper is just returning the report id for a report passed. In this
> >> > case it is tail, but the code would work for any report. I would keep the
> >> > name as report or report_offset. Let the caller use specific names like
> >> > tail.
> >> >
> >> > >
> >> > > -static void oa_report_id_clear(struct xe_oa_stream *stream, u32 *report)
> >> > > +static void oa_report_id_clear(struct xe_oa_stream *stream, u32 head)
> >> > > {
> >> > > - if (oa_report_header_64bit(stream))
> >> > > - *(u64 *)report = 0;
> >> > > - else
> >> > > - *report = 0;
> >> > > + struct iosys_map *map = &stream->oa_buffer.bo->vmap;
> >> > > +
> >> > > + oa_report_header_64bit(stream) ?
> >> > > + xe_map_wr(stream->oa->xe, map, head, u64, 0) :
> >> > > + xe_map_wr(stream->oa->xe, map, head, u32, 0);
> >> > > }
> >> > >
> >> > > -static u64 oa_timestamp(struct xe_oa_stream *stream, void *report)
> >> > > +static u64 oa_timestamp(struct xe_oa_stream *stream, u32 tail)
> >> > > {
> >> > > + struct iosys_map *map = &stream->oa_buffer.bo->vmap;
> >> > > +
> >> > > return oa_report_header_64bit(stream) ?
> >> > > - *((u64 *)report + 1) :
> >> > > - *((u32 *)report + 1);
> >> > > + xe_map_rd(stream->oa->xe, map, tail + 2, u64) :
> >> > > + xe_map_rd(stream->oa->xe, map, tail + 1, u32);
> >> >
> >> > (2) I think this should be tail + 8 and tail + 4. Can you please double
> >> > check? The u32/u64 is only being used to access the data at the offset
> >> > rather than determine how to calculate the actual offset.
> >>
> >> Hmm, you are right!!! I don't believe I made such a basic mistake :/ And
> >> all the xe_oa tests work fine even with this.
> >>
> >> Anyway thanks for catching this, I'm fixing this in the next rev.
> >>
> >> >
> >> > > }
> >> > >
> >> > > -static void oa_timestamp_clear(struct xe_oa_stream *stream, u32 *report)
> >> > > +static void oa_timestamp_clear(struct xe_oa_stream *stream, u32 head)
> >> > > {
> >> > > - if (oa_report_header_64bit(stream))
> >> > > - *(u64 *)&report[2] = 0;
> >> > > - else
> >> > > - report[1] = 0;
> >> > > + struct iosys_map *map = &stream->oa_buffer.bo->vmap;
> >> > > +
> >> > > + oa_report_header_64bit(stream) ?
> >> > > + xe_map_wr(stream->oa->xe, map, head + 2, u64, 0) :
> >> > > + xe_map_wr(stream->oa->xe, map, head + 1, u32, 0);
> >> >
> >> > Same here - comment (2)
> >> >
> >> > > }
> >> >
> >> > Same comment (1) for all the above helpers - the name can be generic -
> >> > report_offset. I see similar change in other functions below as well. If
> >> > you think it is necessary, I suggest trying to move it to a separate patch
> >> > with commit message explaining why you want to rename them. That would also
> >> > help review this patch easily.
> >>
> >> So, as we know HW writes to tail and SW reads from head. So the helpers
> >> with head as the arg are invoked from the read side only and the helpers
> >> with tail as the arg are invoked from the write side only. That was the
> >> reason for naming the arg as head/tail, rather than just "offset". I
> >> thought it will make the code easier to understand and also maybe prevent
> >> mistakes in the future.
>
> To me, the report_offset (or offset) is clearer. When I read the code I
> would remember this helper as returning report_id/timestamp given any
> report offset as an argument. The tail/head names makes me wonder if the
> caller has to be accounted for as well.
OK, understood now. I changed the arg to report_offset in v6. Thanks.
> Anyways, please add the above description you have to the commit message
> and you can leave it as is.
>
> Thanks,
> Umesh
>
> >>
> >> So anyway, let me ask once again if you think I should change it. Of
> >> course, there's no problem changing the arg name in the helpers.
> >>
> >> I'll also see if it's possible to split the patch some way to make the
> >> review a bit easier.
> >
> > I just sent out v5 in which I have let all this stay as is. Anyway, please
> > let me know what you prefer. Thanks.
> >
> >>
> >> Thanks.
> >> --
> >> Ashutosh
> >>
> >>
> >> >
> >> > >
> >> > > static bool xe_oa_buffer_check_unlocked(struct xe_oa_stream *stream)
> >> > > @@ -275,9 +283,7 @@ static bool xe_oa_buffer_check_unlocked(struct xe_oa_stream *stream)
> >> > > * they were written. If not : (╯°□°)╯︵ ┻━┻
> >> > > */
> >> > > while (xe_oa_circ_diff(stream, tail, stream->oa_buffer.tail) >= report_size) {
> >> > > - void *report = stream->oa_buffer.vaddr + tail;
> >> > > -
> >> > > - if (oa_report_id(stream, report) || oa_timestamp(stream, report))
> >> > > + if (oa_report_id(stream, tail) || oa_timestamp(stream, tail))
> >> > > break;
> >> > >
> >> > > tail = xe_oa_circ_diff(stream, tail, report_size);
> >> > > @@ -311,30 +317,35 @@ static enum hrtimer_restart xe_oa_poll_check_timer_cb(struct hrtimer *hrtimer)
> >> > > return HRTIMER_RESTART;
> >> > > }
> >> > >
> >> > > +static unsigned long
> >> > > +xe_oa_copy_to_user(struct xe_oa_stream *stream, void __user *dst, u32 head, u32 len)
> >> > > +{
> >> > > + xe_map_memcpy_from(stream->oa->xe, stream->oa_buffer.bounce,
> >> > > + &stream->oa_buffer.bo->vmap, head, len);
> >> > > + return copy_to_user(dst, stream->oa_buffer.bounce, len);
> >> > > +}
> >> > > +
> >> >
> >> > Still need to look at the patch below this point. Will send that out next
> >> > week...
> >> >
> >> > Regards,
> >> > Umesh
> >> >
> >> > > static int xe_oa_append_report(struct xe_oa_stream *stream, char __user *buf,
> >> > > - size_t count, size_t *offset, const u8 *report)
> >> > > + size_t count, size_t *offset, u32 head)
> >> > > {
> >> > > int report_size = stream->oa_buffer.format->size;
> >> > > int report_size_partial;
> >> > > - u8 *oa_buf_end;
> >> > >
> >> > > if ((count - *offset) < report_size)
> >> > > return -ENOSPC;
> >> > >
> >> > > buf += *offset;
> >> > >
> >> > > - oa_buf_end = stream->oa_buffer.vaddr + stream->oa_buffer.circ_size;
> >> > > - report_size_partial = oa_buf_end - report;
> >> > > + report_size_partial = stream->oa_buffer.circ_size - head;
> >> > >
> >> > > if (report_size_partial < report_size) {
> >> > > - if (copy_to_user(buf, report, report_size_partial))
> >> > > + if (xe_oa_copy_to_user(stream, buf, head, report_size_partial))
> >> > > return -EFAULT;
> >> > > buf += report_size_partial;
> >> > >
> >> > > - if (copy_to_user(buf, stream->oa_buffer.vaddr,
> >> > > - report_size - report_size_partial))
> >> > > + if (xe_oa_copy_to_user(stream, buf, 0, report_size - report_size_partial))
> >> > > return -EFAULT;
> >> > > - } else if (copy_to_user(buf, report, report_size)) {
> >> > > + } else if (xe_oa_copy_to_user(stream, buf, head, report_size)) {
> >> > > return -EFAULT;
> >> > > }
> >> > >
> >> > > @@ -347,7 +358,6 @@ static int xe_oa_append_reports(struct xe_oa_stream *stream, char __user *buf,
> >> > > size_t count, size_t *offset)
> >> > > {
> >> > > int report_size = stream->oa_buffer.format->size;
> >> > > - u8 *oa_buf_base = stream->oa_buffer.vaddr;
> >> > > u32 gtt_offset = xe_bo_ggtt_addr(stream->oa_buffer.bo);
> >> > > size_t start_offset = *offset;
> >> > > unsigned long flags;
> >> > > @@ -364,26 +374,24 @@ static int xe_oa_append_reports(struct xe_oa_stream *stream, char __user *buf,
> >> > >
> >> > > for (; xe_oa_circ_diff(stream, tail, head);
> >> > > head = xe_oa_circ_incr(stream, head, report_size)) {
> >> > > - u8 *report = oa_buf_base + head;
> >> > > -
> >> > > - ret = xe_oa_append_report(stream, buf, count, offset, report);
> >> > > + ret = xe_oa_append_report(stream, buf, count, offset, head);
> >> > > if (ret)
> >> > > break;
> >> > >
> >> > > if (!(stream->oa_buffer.circ_size % report_size)) {
> >> > > /* Clear out report id and timestamp to detect unlanded reports */
> >> > > - oa_report_id_clear(stream, (void *)report);
> >> > > - oa_timestamp_clear(stream, (void *)report);
> >> > > + oa_report_id_clear(stream, head);
> >> > > + oa_timestamp_clear(stream, head);
> >> > > } else {
> >> > > - u8 *oa_buf_end = stream->oa_buffer.vaddr + stream->oa_buffer.circ_size;
> >> > > - u32 part = oa_buf_end - report;
> >> > > + struct iosys_map *map = &stream->oa_buffer.bo->vmap;
> >> > > + u32 part = stream->oa_buffer.circ_size - head;
> >> > >
> >> > > /* Zero out the entire report */
> >> > > if (report_size <= part) {
> >> > > - memset(report, 0, report_size);
> >> > > + xe_map_memset(stream->oa->xe, map, head, 0, report_size);
> >> > > } else {
> >> > > - memset(report, 0, part);
> >> > > - memset(oa_buf_base, 0, report_size - part);
> >> > > + xe_map_memset(stream->oa->xe, map, head, 0, part);
> >> > > + xe_map_memset(stream->oa->xe, map, 0, 0, report_size - part);
> >> > > }
> >> > > }
> >> > > }
> >> > > @@ -436,7 +444,8 @@ static void xe_oa_init_oa_buffer(struct xe_oa_stream *stream)
> >> > > spin_unlock_irqrestore(&stream->oa_buffer.ptr_lock, flags);
> >> > >
> >> > > /* Zero out the OA buffer since we rely on zero report id and timestamp fields */
> >> > > - memset(stream->oa_buffer.vaddr, 0, xe_bo_size(stream->oa_buffer.bo));
> >> > > + xe_map_memset(stream->oa->xe, &stream->oa_buffer.bo->vmap, 0, 0,
> >> > > + xe_bo_size(stream->oa_buffer.bo));
> >> > > }
> >> > >
> >> > > static u32 __format_to_oactrl(const struct xe_oa_format *format, int counter_sel_mask)
> >> > > @@ -699,6 +708,7 @@ static int num_lri_dwords(int num_regs)
> >> > > static void xe_oa_free_oa_buffer(struct xe_oa_stream *stream)
> >> > > {
> >> > > xe_bo_unpin_map_no_vm(stream->oa_buffer.bo);
> >> > > + kfree(stream->oa_buffer.bounce);
> >> > > }
> >> > >
> >> > > static void xe_oa_free_configs(struct xe_oa_stream *stream)
> >> > > @@ -889,9 +899,16 @@ static int xe_oa_alloc_oa_buffer(struct xe_oa_stream *stream, size_t size)
> >> > > return PTR_ERR(bo);
> >> > >
> >> > > stream->oa_buffer.bo = bo;
> >> > > +
> >> > > /* mmap implementation requires OA buffer to be in system memory */
> >> > > xe_assert(stream->oa->xe, bo->vmap.is_iomem == 0);
> >> > > - stream->oa_buffer.vaddr = bo->vmap.vaddr;
> >> > > +
> >> > > + stream->oa_buffer.bounce = kmalloc(stream->oa_buffer.format->size, GFP_KERNEL);
> >> > > + if (!stream->oa_buffer.bounce) {
> >> > > + xe_bo_unpin_map_no_vm(stream->oa_buffer.bo);
> >> > > + return -ENOMEM;
> >> > > + }
> >> > > +
> >> > > return 0;
> >> > > }
> >> > >
> >> > > diff --git a/drivers/gpu/drm/xe/xe_oa_types.h b/drivers/gpu/drm/xe/xe_oa_types.h
> >> > > index b03ffd5134834..99a04b8d586b5 100644
> >> > > --- a/drivers/gpu/drm/xe/xe_oa_types.h
> >> > > +++ b/drivers/gpu/drm/xe/xe_oa_types.h
> >> > > @@ -162,8 +162,8 @@ struct xe_oa_buffer {
> >> > > /** @format: xe_bo backing the OA buffer */
> >> > > struct xe_bo *bo;
> >> > >
> >> > > - /** @vaddr: mapped vaddr of the OA buffer */
> >> > > - u8 *vaddr;
> >> > > + /** @bounce: bounce buffer used with xe_map layer */
> >> > > + void *bounce;
> >> > >
> >> > > /** @ptr_lock: Lock protecting reads/writes to head/tail pointers */
> >> > > spinlock_t ptr_lock;
> >> > > --
> >> > > 2.48.1
> >> > >
^ permalink raw reply [flat|nested] 25+ messages in thread
* Re: [PATCH 1/3] drm/xe/oa: Use xe_map layer
2026-04-25 0:14 ` [PATCH 1/3] drm/xe/oa: Use xe_map layer Ashutosh Dixit
@ 2026-04-27 19:23 ` Umesh Nerlige Ramappa
0 siblings, 0 replies; 25+ messages in thread
From: Umesh Nerlige Ramappa @ 2026-04-27 19:23 UTC (permalink / raw)
To: Ashutosh Dixit; +Cc: intel-xe
On Fri, Apr 24, 2026 at 05:14:37PM -0700, Ashutosh Dixit wrote:
>OA code should have used xe_map layer to begin with. In CRI, the OA buffer
>can be located both in system and device memory. For these reasons, move OA
>code to use the xe_map layer when accessing the OA buffer.
(1) Please also add the description from the thread in v4 on why you are
renaming some stuff.
>
>v2: Use xe_map layer and put_user in xe_oa_copy_to_user (Umesh)
>v3: To avoid performance impact in v2, revert to v1 but move
> xe_map_copy_to_user() to xe_map.h
>v4: Use bounce buffer and copy_to_user in xe_oa_copy_to_user
>v5: Fix offsets in oa_timestamp() and oa_timestamp_clear() (Umesh)
>
>Signed-off-by: Ashutosh Dixit <ashutosh.dixit@intel.com>
>---
> drivers/gpu/drm/xe/xe_oa.c | 95 +++++++++++++++++++-------------
> drivers/gpu/drm/xe/xe_oa_types.h | 4 +-
> 2 files changed, 58 insertions(+), 41 deletions(-)
>
>diff --git a/drivers/gpu/drm/xe/xe_oa.c b/drivers/gpu/drm/xe/xe_oa.c
>index 6337e671c97ae..3ae30dc609bd7 100644
>--- a/drivers/gpu/drm/xe/xe_oa.c
>+++ b/drivers/gpu/drm/xe/xe_oa.c
>@@ -213,32 +213,40 @@ static u32 xe_oa_hw_tail_read(struct xe_oa_stream *stream)
> #define oa_report_header_64bit(__s) \
> ((__s)->oa_buffer.format->header == HDR_64_BIT)
>
>-static u64 oa_report_id(struct xe_oa_stream *stream, void *report)
>+static u64 oa_report_id(struct xe_oa_stream *stream, u32 tail)
> {
>- return oa_report_header_64bit(stream) ? *(u64 *)report : *(u32 *)report;
>+ struct iosys_map *map = &stream->oa_buffer.bo->vmap;
>+
>+ return oa_report_header_64bit(stream) ?
>+ xe_map_rd(stream->oa->xe, map, tail, u64) :
>+ xe_map_rd(stream->oa->xe, map, tail, u32);
> }
>
>-static void oa_report_id_clear(struct xe_oa_stream *stream, u32 *report)
>+static void oa_report_id_clear(struct xe_oa_stream *stream, u32 head)
> {
>- if (oa_report_header_64bit(stream))
>- *(u64 *)report = 0;
>- else
>- *report = 0;
>+ struct iosys_map *map = &stream->oa_buffer.bo->vmap;
>+
>+ oa_report_header_64bit(stream) ?
>+ xe_map_wr(stream->oa->xe, map, head, u64, 0) :
>+ xe_map_wr(stream->oa->xe, map, head, u32, 0);
> }
>
>-static u64 oa_timestamp(struct xe_oa_stream *stream, void *report)
>+static u64 oa_timestamp(struct xe_oa_stream *stream, u32 tail)
> {
>+ struct iosys_map *map = &stream->oa_buffer.bo->vmap;
>+
> return oa_report_header_64bit(stream) ?
>- *((u64 *)report + 1) :
>- *((u32 *)report + 1);
>+ xe_map_rd(stream->oa->xe, map, tail + 8, u64) :
>+ xe_map_rd(stream->oa->xe, map, tail + 4, u32);
> }
>
>-static void oa_timestamp_clear(struct xe_oa_stream *stream, u32 *report)
>+static void oa_timestamp_clear(struct xe_oa_stream *stream, u32 head)
> {
>- if (oa_report_header_64bit(stream))
>- *(u64 *)&report[2] = 0;
>- else
>- report[1] = 0;
>+ struct iosys_map *map = &stream->oa_buffer.bo->vmap;
>+
>+ oa_report_header_64bit(stream) ?
>+ xe_map_wr(stream->oa->xe, map, head + 8, u64, 0) :
>+ xe_map_wr(stream->oa->xe, map, head + 4, u32, 0);
> }
Well, since this is cleared after the report is copied out, it's
difficult to catch it. The only code this will affect is when we try to
detect fully landed reports. I don't think existing tests can catch
that.
Can we plan to implement a test for this at some point? Something like -
mmap OA buffer, read the data using read() call and then checked the
mmapped location to ensure you zeroed out the timestamp and any other
field we are clearing.
>
> static bool xe_oa_buffer_check_unlocked(struct xe_oa_stream *stream)
>@@ -275,9 +283,7 @@ static bool xe_oa_buffer_check_unlocked(struct xe_oa_stream *stream)
> * they were written. If not : (╯°□°)╯︵ ┻━┻
> */
> while (xe_oa_circ_diff(stream, tail, stream->oa_buffer.tail) >= report_size) {
>- void *report = stream->oa_buffer.vaddr + tail;
>-
>- if (oa_report_id(stream, report) || oa_timestamp(stream, report))
>+ if (oa_report_id(stream, tail) || oa_timestamp(stream, tail))
> break;
>
> tail = xe_oa_circ_diff(stream, tail, report_size);
>@@ -311,30 +317,35 @@ static enum hrtimer_restart xe_oa_poll_check_timer_cb(struct hrtimer *hrtimer)
> return HRTIMER_RESTART;
> }
>
>+static unsigned long
>+xe_oa_copy_to_user(struct xe_oa_stream *stream, void __user *dst, u32 head, u32 len)
>+{
(2) Check and fail if len is greater than report_size. I see callers
right now are still within limits, so this should at least assert that
the len is <= report_size because your bounce buffer can only hold that
much data.
With 1 and 2, this is
Reviewed-by: Umesh Nerlige Ramappa <umesh.nerlige.ramappa@intel.com>
Thanks,
Umesh
>+ xe_map_memcpy_from(stream->oa->xe, stream->oa_buffer.bounce,
>+ &stream->oa_buffer.bo->vmap, head, len);
>+ return copy_to_user(dst, stream->oa_buffer.bounce, len);
>+}
>+
> static int xe_oa_append_report(struct xe_oa_stream *stream, char __user *buf,
>- size_t count, size_t *offset, const u8 *report)
>+ size_t count, size_t *offset, u32 head)
> {
> int report_size = stream->oa_buffer.format->size;
> int report_size_partial;
>- u8 *oa_buf_end;
>
> if ((count - *offset) < report_size)
> return -ENOSPC;
>
> buf += *offset;
>
>- oa_buf_end = stream->oa_buffer.vaddr + stream->oa_buffer.circ_size;
>- report_size_partial = oa_buf_end - report;
>+ report_size_partial = stream->oa_buffer.circ_size - head;
>
> if (report_size_partial < report_size) {
>- if (copy_to_user(buf, report, report_size_partial))
>+ if (xe_oa_copy_to_user(stream, buf, head, report_size_partial))
> return -EFAULT;
> buf += report_size_partial;
>
>- if (copy_to_user(buf, stream->oa_buffer.vaddr,
>- report_size - report_size_partial))
>+ if (xe_oa_copy_to_user(stream, buf, 0, report_size - report_size_partial))
> return -EFAULT;
>- } else if (copy_to_user(buf, report, report_size)) {
>+ } else if (xe_oa_copy_to_user(stream, buf, head, report_size)) {
> return -EFAULT;
> }
>
>@@ -347,7 +358,6 @@ static int xe_oa_append_reports(struct xe_oa_stream *stream, char __user *buf,
> size_t count, size_t *offset)
> {
> int report_size = stream->oa_buffer.format->size;
>- u8 *oa_buf_base = stream->oa_buffer.vaddr;
> u32 gtt_offset = xe_bo_ggtt_addr(stream->oa_buffer.bo);
> size_t start_offset = *offset;
> unsigned long flags;
>@@ -364,26 +374,24 @@ static int xe_oa_append_reports(struct xe_oa_stream *stream, char __user *buf,
>
> for (; xe_oa_circ_diff(stream, tail, head);
> head = xe_oa_circ_incr(stream, head, report_size)) {
>- u8 *report = oa_buf_base + head;
>-
>- ret = xe_oa_append_report(stream, buf, count, offset, report);
>+ ret = xe_oa_append_report(stream, buf, count, offset, head);
> if (ret)
> break;
>
> if (!(stream->oa_buffer.circ_size % report_size)) {
> /* Clear out report id and timestamp to detect unlanded reports */
>- oa_report_id_clear(stream, (void *)report);
>- oa_timestamp_clear(stream, (void *)report);
>+ oa_report_id_clear(stream, head);
>+ oa_timestamp_clear(stream, head);
> } else {
>- u8 *oa_buf_end = stream->oa_buffer.vaddr + stream->oa_buffer.circ_size;
>- u32 part = oa_buf_end - report;
>+ struct iosys_map *map = &stream->oa_buffer.bo->vmap;
>+ u32 part = stream->oa_buffer.circ_size - head;
>
> /* Zero out the entire report */
> if (report_size <= part) {
>- memset(report, 0, report_size);
>+ xe_map_memset(stream->oa->xe, map, head, 0, report_size);
> } else {
>- memset(report, 0, part);
>- memset(oa_buf_base, 0, report_size - part);
>+ xe_map_memset(stream->oa->xe, map, head, 0, part);
>+ xe_map_memset(stream->oa->xe, map, 0, 0, report_size - part);
> }
> }
> }
>@@ -436,7 +444,8 @@ static void xe_oa_init_oa_buffer(struct xe_oa_stream *stream)
> spin_unlock_irqrestore(&stream->oa_buffer.ptr_lock, flags);
>
> /* Zero out the OA buffer since we rely on zero report id and timestamp fields */
>- memset(stream->oa_buffer.vaddr, 0, xe_bo_size(stream->oa_buffer.bo));
>+ xe_map_memset(stream->oa->xe, &stream->oa_buffer.bo->vmap, 0, 0,
>+ xe_bo_size(stream->oa_buffer.bo));
> }
>
> static u32 __format_to_oactrl(const struct xe_oa_format *format, int counter_sel_mask)
>@@ -699,6 +708,7 @@ static int num_lri_dwords(int num_regs)
> static void xe_oa_free_oa_buffer(struct xe_oa_stream *stream)
> {
> xe_bo_unpin_map_no_vm(stream->oa_buffer.bo);
>+ kfree(stream->oa_buffer.bounce);
> }
>
> static void xe_oa_free_configs(struct xe_oa_stream *stream)
>@@ -889,9 +899,16 @@ static int xe_oa_alloc_oa_buffer(struct xe_oa_stream *stream, size_t size)
> return PTR_ERR(bo);
>
> stream->oa_buffer.bo = bo;
>+
> /* mmap implementation requires OA buffer to be in system memory */
> xe_assert(stream->oa->xe, bo->vmap.is_iomem == 0);
>- stream->oa_buffer.vaddr = bo->vmap.vaddr;
>+
>+ stream->oa_buffer.bounce = kmalloc(stream->oa_buffer.format->size, GFP_KERNEL);
>+ if (!stream->oa_buffer.bounce) {
>+ xe_bo_unpin_map_no_vm(stream->oa_buffer.bo);
>+ return -ENOMEM;
>+ }
>+
> return 0;
> }
>
>diff --git a/drivers/gpu/drm/xe/xe_oa_types.h b/drivers/gpu/drm/xe/xe_oa_types.h
>index 8906c3084b5f8..3d9ec8490899c 100644
>--- a/drivers/gpu/drm/xe/xe_oa_types.h
>+++ b/drivers/gpu/drm/xe/xe_oa_types.h
>@@ -164,8 +164,8 @@ struct xe_oa_buffer {
> /** @bo: xe_bo backing the OA buffer */
> struct xe_bo *bo;
>
>- /** @vaddr: mapped vaddr of the OA buffer */
>- u8 *vaddr;
>+ /** @bounce: bounce buffer used with xe_map layer */
>+ void *bounce;
>
> /** @ptr_lock: Lock protecting reads/writes to head/tail pointers */
> spinlock_t ptr_lock;
>--
>2.54.0
>
^ permalink raw reply [flat|nested] 25+ messages in thread
* Re: [PATCH 1/3] drm/xe/oa: Use xe_map layer
2026-04-27 19:02 ` [PATCH 1/3] drm/xe/oa: Use xe_map layer Ashutosh Dixit
@ 2026-04-27 19:34 ` Umesh Nerlige Ramappa
2026-04-27 20:30 ` Dixit, Ashutosh
0 siblings, 1 reply; 25+ messages in thread
From: Umesh Nerlige Ramappa @ 2026-04-27 19:34 UTC (permalink / raw)
To: Ashutosh Dixit; +Cc: intel-xe
On Mon, Apr 27, 2026 at 12:02:21PM -0700, Ashutosh Dixit wrote:
>OA code should have used xe_map layer to begin with. In CRI, the OA buffer
>can be located both in system and device memory. For these reasons, move OA
>code to use the xe_map layer when accessing the OA buffer.
>
>v2: Use xe_map layer and put_user in xe_oa_copy_to_user (Umesh)
>v3: To avoid performance impact in v2, revert to v1 but move
> xe_map_copy_to_user() to xe_map.h
>v4: Use bounce buffer and copy_to_user in xe_oa_copy_to_user
>v5: Fix offsets in oa_timestamp() and oa_timestamp_clear() (Umesh)
>v6: Rename head/tail in helper args to report_offset (Umesh)
Thanks for renaming. My comments are in the v5 version of the series.
With an assert/check in the xe_oa_copy_to_user(), this should be
Reviewed-by: Umesh Nerlige Ramappa <umesh.nerlige.ramappa@intel.com>
Umesh
>
>Signed-off-by: Ashutosh Dixit <ashutosh.dixit@intel.com>
>---
> drivers/gpu/drm/xe/xe_oa.c | 95 +++++++++++++++++++-------------
> drivers/gpu/drm/xe/xe_oa_types.h | 4 +-
> 2 files changed, 58 insertions(+), 41 deletions(-)
>
>diff --git a/drivers/gpu/drm/xe/xe_oa.c b/drivers/gpu/drm/xe/xe_oa.c
>index 6337e671c97ae..1fff0e8e9e78e 100644
>--- a/drivers/gpu/drm/xe/xe_oa.c
>+++ b/drivers/gpu/drm/xe/xe_oa.c
>@@ -213,32 +213,40 @@ static u32 xe_oa_hw_tail_read(struct xe_oa_stream *stream)
> #define oa_report_header_64bit(__s) \
> ((__s)->oa_buffer.format->header == HDR_64_BIT)
>
>-static u64 oa_report_id(struct xe_oa_stream *stream, void *report)
>+static u64 oa_report_id(struct xe_oa_stream *stream, u32 report_offset)
> {
>- return oa_report_header_64bit(stream) ? *(u64 *)report : *(u32 *)report;
>+ struct iosys_map *map = &stream->oa_buffer.bo->vmap;
>+
>+ return oa_report_header_64bit(stream) ?
>+ xe_map_rd(stream->oa->xe, map, report_offset, u64) :
>+ xe_map_rd(stream->oa->xe, map, report_offset, u32);
> }
>
>-static void oa_report_id_clear(struct xe_oa_stream *stream, u32 *report)
>+static void oa_report_id_clear(struct xe_oa_stream *stream, u32 report_offset)
> {
>- if (oa_report_header_64bit(stream))
>- *(u64 *)report = 0;
>- else
>- *report = 0;
>+ struct iosys_map *map = &stream->oa_buffer.bo->vmap;
>+
>+ oa_report_header_64bit(stream) ?
>+ xe_map_wr(stream->oa->xe, map, report_offset, u64, 0) :
>+ xe_map_wr(stream->oa->xe, map, report_offset, u32, 0);
> }
>
>-static u64 oa_timestamp(struct xe_oa_stream *stream, void *report)
>+static u64 oa_timestamp(struct xe_oa_stream *stream, u32 report_offset)
> {
>+ struct iosys_map *map = &stream->oa_buffer.bo->vmap;
>+
> return oa_report_header_64bit(stream) ?
>- *((u64 *)report + 1) :
>- *((u32 *)report + 1);
>+ xe_map_rd(stream->oa->xe, map, report_offset + 8, u64) :
>+ xe_map_rd(stream->oa->xe, map, report_offset + 4, u32);
> }
>
>-static void oa_timestamp_clear(struct xe_oa_stream *stream, u32 *report)
>+static void oa_timestamp_clear(struct xe_oa_stream *stream, u32 report_offset)
> {
>- if (oa_report_header_64bit(stream))
>- *(u64 *)&report[2] = 0;
>- else
>- report[1] = 0;
>+ struct iosys_map *map = &stream->oa_buffer.bo->vmap;
>+
>+ oa_report_header_64bit(stream) ?
>+ xe_map_wr(stream->oa->xe, map, report_offset + 8, u64, 0) :
>+ xe_map_wr(stream->oa->xe, map, report_offset + 4, u32, 0);
> }
>
> static bool xe_oa_buffer_check_unlocked(struct xe_oa_stream *stream)
>@@ -275,9 +283,7 @@ static bool xe_oa_buffer_check_unlocked(struct xe_oa_stream *stream)
> * they were written. If not : (╯°□°)╯︵ ┻━┻
> */
> while (xe_oa_circ_diff(stream, tail, stream->oa_buffer.tail) >= report_size) {
>- void *report = stream->oa_buffer.vaddr + tail;
>-
>- if (oa_report_id(stream, report) || oa_timestamp(stream, report))
>+ if (oa_report_id(stream, tail) || oa_timestamp(stream, tail))
> break;
>
> tail = xe_oa_circ_diff(stream, tail, report_size);
>@@ -311,30 +317,35 @@ static enum hrtimer_restart xe_oa_poll_check_timer_cb(struct hrtimer *hrtimer)
> return HRTIMER_RESTART;
> }
>
>+static unsigned long
>+xe_oa_copy_to_user(struct xe_oa_stream *stream, void __user *dst, u32 report_offset, u32 len)
>+{
>+ xe_map_memcpy_from(stream->oa->xe, stream->oa_buffer.bounce,
>+ &stream->oa_buffer.bo->vmap, report_offset, len);
>+ return copy_to_user(dst, stream->oa_buffer.bounce, len);
>+}
>+
> static int xe_oa_append_report(struct xe_oa_stream *stream, char __user *buf,
>- size_t count, size_t *offset, const u8 *report)
>+ size_t count, size_t *offset, u32 report_offset)
> {
> int report_size = stream->oa_buffer.format->size;
> int report_size_partial;
>- u8 *oa_buf_end;
>
> if ((count - *offset) < report_size)
> return -ENOSPC;
>
> buf += *offset;
>
>- oa_buf_end = stream->oa_buffer.vaddr + stream->oa_buffer.circ_size;
>- report_size_partial = oa_buf_end - report;
>+ report_size_partial = stream->oa_buffer.circ_size - report_offset;
>
> if (report_size_partial < report_size) {
>- if (copy_to_user(buf, report, report_size_partial))
>+ if (xe_oa_copy_to_user(stream, buf, report_offset, report_size_partial))
> return -EFAULT;
> buf += report_size_partial;
>
>- if (copy_to_user(buf, stream->oa_buffer.vaddr,
>- report_size - report_size_partial))
>+ if (xe_oa_copy_to_user(stream, buf, 0, report_size - report_size_partial))
> return -EFAULT;
>- } else if (copy_to_user(buf, report, report_size)) {
>+ } else if (xe_oa_copy_to_user(stream, buf, report_offset, report_size)) {
> return -EFAULT;
> }
>
>@@ -347,7 +358,6 @@ static int xe_oa_append_reports(struct xe_oa_stream *stream, char __user *buf,
> size_t count, size_t *offset)
> {
> int report_size = stream->oa_buffer.format->size;
>- u8 *oa_buf_base = stream->oa_buffer.vaddr;
> u32 gtt_offset = xe_bo_ggtt_addr(stream->oa_buffer.bo);
> size_t start_offset = *offset;
> unsigned long flags;
>@@ -364,26 +374,24 @@ static int xe_oa_append_reports(struct xe_oa_stream *stream, char __user *buf,
>
> for (; xe_oa_circ_diff(stream, tail, head);
> head = xe_oa_circ_incr(stream, head, report_size)) {
>- u8 *report = oa_buf_base + head;
>-
>- ret = xe_oa_append_report(stream, buf, count, offset, report);
>+ ret = xe_oa_append_report(stream, buf, count, offset, head);
> if (ret)
> break;
>
> if (!(stream->oa_buffer.circ_size % report_size)) {
> /* Clear out report id and timestamp to detect unlanded reports */
>- oa_report_id_clear(stream, (void *)report);
>- oa_timestamp_clear(stream, (void *)report);
>+ oa_report_id_clear(stream, head);
>+ oa_timestamp_clear(stream, head);
> } else {
>- u8 *oa_buf_end = stream->oa_buffer.vaddr + stream->oa_buffer.circ_size;
>- u32 part = oa_buf_end - report;
>+ struct iosys_map *map = &stream->oa_buffer.bo->vmap;
>+ u32 part = stream->oa_buffer.circ_size - head;
>
> /* Zero out the entire report */
> if (report_size <= part) {
>- memset(report, 0, report_size);
>+ xe_map_memset(stream->oa->xe, map, head, 0, report_size);
> } else {
>- memset(report, 0, part);
>- memset(oa_buf_base, 0, report_size - part);
>+ xe_map_memset(stream->oa->xe, map, head, 0, part);
>+ xe_map_memset(stream->oa->xe, map, 0, 0, report_size - part);
> }
> }
> }
>@@ -436,7 +444,8 @@ static void xe_oa_init_oa_buffer(struct xe_oa_stream *stream)
> spin_unlock_irqrestore(&stream->oa_buffer.ptr_lock, flags);
>
> /* Zero out the OA buffer since we rely on zero report id and timestamp fields */
>- memset(stream->oa_buffer.vaddr, 0, xe_bo_size(stream->oa_buffer.bo));
>+ xe_map_memset(stream->oa->xe, &stream->oa_buffer.bo->vmap, 0, 0,
>+ xe_bo_size(stream->oa_buffer.bo));
> }
>
> static u32 __format_to_oactrl(const struct xe_oa_format *format, int counter_sel_mask)
>@@ -699,6 +708,7 @@ static int num_lri_dwords(int num_regs)
> static void xe_oa_free_oa_buffer(struct xe_oa_stream *stream)
> {
> xe_bo_unpin_map_no_vm(stream->oa_buffer.bo);
>+ kfree(stream->oa_buffer.bounce);
> }
>
> static void xe_oa_free_configs(struct xe_oa_stream *stream)
>@@ -889,9 +899,16 @@ static int xe_oa_alloc_oa_buffer(struct xe_oa_stream *stream, size_t size)
> return PTR_ERR(bo);
>
> stream->oa_buffer.bo = bo;
>+
> /* mmap implementation requires OA buffer to be in system memory */
> xe_assert(stream->oa->xe, bo->vmap.is_iomem == 0);
>- stream->oa_buffer.vaddr = bo->vmap.vaddr;
>+
>+ stream->oa_buffer.bounce = kmalloc(stream->oa_buffer.format->size, GFP_KERNEL);
>+ if (!stream->oa_buffer.bounce) {
>+ xe_bo_unpin_map_no_vm(stream->oa_buffer.bo);
>+ return -ENOMEM;
>+ }
>+
> return 0;
> }
>
>diff --git a/drivers/gpu/drm/xe/xe_oa_types.h b/drivers/gpu/drm/xe/xe_oa_types.h
>index 8906c3084b5f8..3d9ec8490899c 100644
>--- a/drivers/gpu/drm/xe/xe_oa_types.h
>+++ b/drivers/gpu/drm/xe/xe_oa_types.h
>@@ -164,8 +164,8 @@ struct xe_oa_buffer {
> /** @bo: xe_bo backing the OA buffer */
> struct xe_bo *bo;
>
>- /** @vaddr: mapped vaddr of the OA buffer */
>- u8 *vaddr;
>+ /** @bounce: bounce buffer used with xe_map layer */
>+ void *bounce;
>
> /** @ptr_lock: Lock protecting reads/writes to head/tail pointers */
> spinlock_t ptr_lock;
>--
>2.54.0
>
^ permalink raw reply [flat|nested] 25+ messages in thread
* [PATCH 1/3] drm/xe/oa: Use xe_map layer
2026-04-27 20:26 [PATCH v7 0/3] drm/xe/oa: Wa_14026633728 Ashutosh Dixit
@ 2026-04-27 20:26 ` Ashutosh Dixit
0 siblings, 0 replies; 25+ messages in thread
From: Ashutosh Dixit @ 2026-04-27 20:26 UTC (permalink / raw)
To: intel-xe; +Cc: Umesh Nerlige Ramappa
OA code should have used xe_map layer to begin with. In CRI, the OA buffer
can be located both in system and device memory. For these reasons, move OA
code to use the xe_map layer when accessing the OA buffer.
v2: Use xe_map layer and put_user in xe_oa_copy_to_user (Umesh)
v3: To avoid performance impact in v2, revert to v1 but move
xe_map_copy_to_user() to xe_map.h
v4: Use bounce buffer and copy_to_user in xe_oa_copy_to_user
v5: Fix offsets in oa_timestamp() and oa_timestamp_clear() (Umesh)
v6: Rename head/tail in helper args to report_offset (Umesh)
v7: Add xe_assert in xe_oa_copy_to_user (Umesh)
Signed-off-by: Ashutosh Dixit <ashutosh.dixit@intel.com>
Reviewed-by: Umesh Nerlige Ramappa <umesh.nerlige.ramappa@intel.com>
---
drivers/gpu/drm/xe/xe_oa.c | 97 +++++++++++++++++++-------------
drivers/gpu/drm/xe/xe_oa_types.h | 4 +-
2 files changed, 60 insertions(+), 41 deletions(-)
diff --git a/drivers/gpu/drm/xe/xe_oa.c b/drivers/gpu/drm/xe/xe_oa.c
index 6337e671c97ae..6ded3c2fce34e 100644
--- a/drivers/gpu/drm/xe/xe_oa.c
+++ b/drivers/gpu/drm/xe/xe_oa.c
@@ -213,32 +213,40 @@ static u32 xe_oa_hw_tail_read(struct xe_oa_stream *stream)
#define oa_report_header_64bit(__s) \
((__s)->oa_buffer.format->header == HDR_64_BIT)
-static u64 oa_report_id(struct xe_oa_stream *stream, void *report)
+static u64 oa_report_id(struct xe_oa_stream *stream, u32 report_offset)
{
- return oa_report_header_64bit(stream) ? *(u64 *)report : *(u32 *)report;
+ struct iosys_map *map = &stream->oa_buffer.bo->vmap;
+
+ return oa_report_header_64bit(stream) ?
+ xe_map_rd(stream->oa->xe, map, report_offset, u64) :
+ xe_map_rd(stream->oa->xe, map, report_offset, u32);
}
-static void oa_report_id_clear(struct xe_oa_stream *stream, u32 *report)
+static void oa_report_id_clear(struct xe_oa_stream *stream, u32 report_offset)
{
- if (oa_report_header_64bit(stream))
- *(u64 *)report = 0;
- else
- *report = 0;
+ struct iosys_map *map = &stream->oa_buffer.bo->vmap;
+
+ oa_report_header_64bit(stream) ?
+ xe_map_wr(stream->oa->xe, map, report_offset, u64, 0) :
+ xe_map_wr(stream->oa->xe, map, report_offset, u32, 0);
}
-static u64 oa_timestamp(struct xe_oa_stream *stream, void *report)
+static u64 oa_timestamp(struct xe_oa_stream *stream, u32 report_offset)
{
+ struct iosys_map *map = &stream->oa_buffer.bo->vmap;
+
return oa_report_header_64bit(stream) ?
- *((u64 *)report + 1) :
- *((u32 *)report + 1);
+ xe_map_rd(stream->oa->xe, map, report_offset + 8, u64) :
+ xe_map_rd(stream->oa->xe, map, report_offset + 4, u32);
}
-static void oa_timestamp_clear(struct xe_oa_stream *stream, u32 *report)
+static void oa_timestamp_clear(struct xe_oa_stream *stream, u32 report_offset)
{
- if (oa_report_header_64bit(stream))
- *(u64 *)&report[2] = 0;
- else
- report[1] = 0;
+ struct iosys_map *map = &stream->oa_buffer.bo->vmap;
+
+ oa_report_header_64bit(stream) ?
+ xe_map_wr(stream->oa->xe, map, report_offset + 8, u64, 0) :
+ xe_map_wr(stream->oa->xe, map, report_offset + 4, u32, 0);
}
static bool xe_oa_buffer_check_unlocked(struct xe_oa_stream *stream)
@@ -275,9 +283,7 @@ static bool xe_oa_buffer_check_unlocked(struct xe_oa_stream *stream)
* they were written. If not : (╯°□°)╯︵ ┻━┻
*/
while (xe_oa_circ_diff(stream, tail, stream->oa_buffer.tail) >= report_size) {
- void *report = stream->oa_buffer.vaddr + tail;
-
- if (oa_report_id(stream, report) || oa_timestamp(stream, report))
+ if (oa_report_id(stream, tail) || oa_timestamp(stream, tail))
break;
tail = xe_oa_circ_diff(stream, tail, report_size);
@@ -311,30 +317,37 @@ static enum hrtimer_restart xe_oa_poll_check_timer_cb(struct hrtimer *hrtimer)
return HRTIMER_RESTART;
}
+static unsigned long
+xe_oa_copy_to_user(struct xe_oa_stream *stream, void __user *dst, u32 report_offset, u32 len)
+{
+ xe_assert(stream->oa->xe, len <= stream->oa_buffer.format->size);
+
+ xe_map_memcpy_from(stream->oa->xe, stream->oa_buffer.bounce,
+ &stream->oa_buffer.bo->vmap, report_offset, len);
+ return copy_to_user(dst, stream->oa_buffer.bounce, len);
+}
+
static int xe_oa_append_report(struct xe_oa_stream *stream, char __user *buf,
- size_t count, size_t *offset, const u8 *report)
+ size_t count, size_t *offset, u32 report_offset)
{
int report_size = stream->oa_buffer.format->size;
int report_size_partial;
- u8 *oa_buf_end;
if ((count - *offset) < report_size)
return -ENOSPC;
buf += *offset;
- oa_buf_end = stream->oa_buffer.vaddr + stream->oa_buffer.circ_size;
- report_size_partial = oa_buf_end - report;
+ report_size_partial = stream->oa_buffer.circ_size - report_offset;
if (report_size_partial < report_size) {
- if (copy_to_user(buf, report, report_size_partial))
+ if (xe_oa_copy_to_user(stream, buf, report_offset, report_size_partial))
return -EFAULT;
buf += report_size_partial;
- if (copy_to_user(buf, stream->oa_buffer.vaddr,
- report_size - report_size_partial))
+ if (xe_oa_copy_to_user(stream, buf, 0, report_size - report_size_partial))
return -EFAULT;
- } else if (copy_to_user(buf, report, report_size)) {
+ } else if (xe_oa_copy_to_user(stream, buf, report_offset, report_size)) {
return -EFAULT;
}
@@ -347,7 +360,6 @@ static int xe_oa_append_reports(struct xe_oa_stream *stream, char __user *buf,
size_t count, size_t *offset)
{
int report_size = stream->oa_buffer.format->size;
- u8 *oa_buf_base = stream->oa_buffer.vaddr;
u32 gtt_offset = xe_bo_ggtt_addr(stream->oa_buffer.bo);
size_t start_offset = *offset;
unsigned long flags;
@@ -364,26 +376,24 @@ static int xe_oa_append_reports(struct xe_oa_stream *stream, char __user *buf,
for (; xe_oa_circ_diff(stream, tail, head);
head = xe_oa_circ_incr(stream, head, report_size)) {
- u8 *report = oa_buf_base + head;
-
- ret = xe_oa_append_report(stream, buf, count, offset, report);
+ ret = xe_oa_append_report(stream, buf, count, offset, head);
if (ret)
break;
if (!(stream->oa_buffer.circ_size % report_size)) {
/* Clear out report id and timestamp to detect unlanded reports */
- oa_report_id_clear(stream, (void *)report);
- oa_timestamp_clear(stream, (void *)report);
+ oa_report_id_clear(stream, head);
+ oa_timestamp_clear(stream, head);
} else {
- u8 *oa_buf_end = stream->oa_buffer.vaddr + stream->oa_buffer.circ_size;
- u32 part = oa_buf_end - report;
+ struct iosys_map *map = &stream->oa_buffer.bo->vmap;
+ u32 part = stream->oa_buffer.circ_size - head;
/* Zero out the entire report */
if (report_size <= part) {
- memset(report, 0, report_size);
+ xe_map_memset(stream->oa->xe, map, head, 0, report_size);
} else {
- memset(report, 0, part);
- memset(oa_buf_base, 0, report_size - part);
+ xe_map_memset(stream->oa->xe, map, head, 0, part);
+ xe_map_memset(stream->oa->xe, map, 0, 0, report_size - part);
}
}
}
@@ -436,7 +446,8 @@ static void xe_oa_init_oa_buffer(struct xe_oa_stream *stream)
spin_unlock_irqrestore(&stream->oa_buffer.ptr_lock, flags);
/* Zero out the OA buffer since we rely on zero report id and timestamp fields */
- memset(stream->oa_buffer.vaddr, 0, xe_bo_size(stream->oa_buffer.bo));
+ xe_map_memset(stream->oa->xe, &stream->oa_buffer.bo->vmap, 0, 0,
+ xe_bo_size(stream->oa_buffer.bo));
}
static u32 __format_to_oactrl(const struct xe_oa_format *format, int counter_sel_mask)
@@ -699,6 +710,7 @@ static int num_lri_dwords(int num_regs)
static void xe_oa_free_oa_buffer(struct xe_oa_stream *stream)
{
xe_bo_unpin_map_no_vm(stream->oa_buffer.bo);
+ kfree(stream->oa_buffer.bounce);
}
static void xe_oa_free_configs(struct xe_oa_stream *stream)
@@ -889,9 +901,16 @@ static int xe_oa_alloc_oa_buffer(struct xe_oa_stream *stream, size_t size)
return PTR_ERR(bo);
stream->oa_buffer.bo = bo;
+
/* mmap implementation requires OA buffer to be in system memory */
xe_assert(stream->oa->xe, bo->vmap.is_iomem == 0);
- stream->oa_buffer.vaddr = bo->vmap.vaddr;
+
+ stream->oa_buffer.bounce = kmalloc(stream->oa_buffer.format->size, GFP_KERNEL);
+ if (!stream->oa_buffer.bounce) {
+ xe_bo_unpin_map_no_vm(stream->oa_buffer.bo);
+ return -ENOMEM;
+ }
+
return 0;
}
diff --git a/drivers/gpu/drm/xe/xe_oa_types.h b/drivers/gpu/drm/xe/xe_oa_types.h
index 8906c3084b5f8..3d9ec8490899c 100644
--- a/drivers/gpu/drm/xe/xe_oa_types.h
+++ b/drivers/gpu/drm/xe/xe_oa_types.h
@@ -164,8 +164,8 @@ struct xe_oa_buffer {
/** @bo: xe_bo backing the OA buffer */
struct xe_bo *bo;
- /** @vaddr: mapped vaddr of the OA buffer */
- u8 *vaddr;
+ /** @bounce: bounce buffer used with xe_map layer */
+ void *bounce;
/** @ptr_lock: Lock protecting reads/writes to head/tail pointers */
spinlock_t ptr_lock;
--
2.54.0
^ permalink raw reply related [flat|nested] 25+ messages in thread
* Re: [PATCH 1/3] drm/xe/oa: Use xe_map layer
2026-04-27 19:34 ` Umesh Nerlige Ramappa
@ 2026-04-27 20:30 ` Dixit, Ashutosh
0 siblings, 0 replies; 25+ messages in thread
From: Dixit, Ashutosh @ 2026-04-27 20:30 UTC (permalink / raw)
To: Umesh Nerlige Ramappa; +Cc: intel-xe
On Mon, 27 Apr 2026 12:34:24 -0700, Umesh Nerlige Ramappa wrote:
>
> On Mon, Apr 27, 2026 at 12:02:21PM -0700, Ashutosh Dixit wrote:
> > OA code should have used xe_map layer to begin with. In CRI, the OA buffer
> > can be located both in system and device memory. For these reasons, move OA
> > code to use the xe_map layer when accessing the OA buffer.
> >
> > v2: Use xe_map layer and put_user in xe_oa_copy_to_user (Umesh)
> > v3: To avoid performance impact in v2, revert to v1 but move
> > xe_map_copy_to_user() to xe_map.h
> > v4: Use bounce buffer and copy_to_user in xe_oa_copy_to_user
> > v5: Fix offsets in oa_timestamp() and oa_timestamp_clear() (Umesh)
> > v6: Rename head/tail in helper args to report_offset (Umesh)
>
> Thanks for renaming. My comments are in the v5 version of the series.
>
> With an assert/check in the xe_oa_copy_to_user(), this should be
Add in v7. The only remaining comment is the one about the test, will make
a note of it.
>
> Reviewed-by: Umesh Nerlige Ramappa <umesh.nerlige.ramappa@intel.com>
Thanks.
--
Ashutosh
>
> >
> > Signed-off-by: Ashutosh Dixit <ashutosh.dixit@intel.com>
> > ---
> > drivers/gpu/drm/xe/xe_oa.c | 95 +++++++++++++++++++-------------
> > drivers/gpu/drm/xe/xe_oa_types.h | 4 +-
> > 2 files changed, 58 insertions(+), 41 deletions(-)
> >
> > diff --git a/drivers/gpu/drm/xe/xe_oa.c b/drivers/gpu/drm/xe/xe_oa.c
> > index 6337e671c97ae..1fff0e8e9e78e 100644
> > --- a/drivers/gpu/drm/xe/xe_oa.c
> > +++ b/drivers/gpu/drm/xe/xe_oa.c
> > @@ -213,32 +213,40 @@ static u32 xe_oa_hw_tail_read(struct xe_oa_stream *stream)
> > #define oa_report_header_64bit(__s) \
> > ((__s)->oa_buffer.format->header == HDR_64_BIT)
> >
> > -static u64 oa_report_id(struct xe_oa_stream *stream, void *report)
> > +static u64 oa_report_id(struct xe_oa_stream *stream, u32 report_offset)
> > {
> > - return oa_report_header_64bit(stream) ? *(u64 *)report : *(u32 *)report;
> > + struct iosys_map *map = &stream->oa_buffer.bo->vmap;
> > +
> > + return oa_report_header_64bit(stream) ?
> > + xe_map_rd(stream->oa->xe, map, report_offset, u64) :
> > + xe_map_rd(stream->oa->xe, map, report_offset, u32);
> > }
> >
> > -static void oa_report_id_clear(struct xe_oa_stream *stream, u32 *report)
> > +static void oa_report_id_clear(struct xe_oa_stream *stream, u32 report_offset)
> > {
> > - if (oa_report_header_64bit(stream))
> > - *(u64 *)report = 0;
> > - else
> > - *report = 0;
> > + struct iosys_map *map = &stream->oa_buffer.bo->vmap;
> > +
> > + oa_report_header_64bit(stream) ?
> > + xe_map_wr(stream->oa->xe, map, report_offset, u64, 0) :
> > + xe_map_wr(stream->oa->xe, map, report_offset, u32, 0);
> > }
> >
> > -static u64 oa_timestamp(struct xe_oa_stream *stream, void *report)
> > +static u64 oa_timestamp(struct xe_oa_stream *stream, u32 report_offset)
> > {
> > + struct iosys_map *map = &stream->oa_buffer.bo->vmap;
> > +
> > return oa_report_header_64bit(stream) ?
> > - *((u64 *)report + 1) :
> > - *((u32 *)report + 1);
> > + xe_map_rd(stream->oa->xe, map, report_offset + 8, u64) :
> > + xe_map_rd(stream->oa->xe, map, report_offset + 4, u32);
> > }
> >
> > -static void oa_timestamp_clear(struct xe_oa_stream *stream, u32 *report)
> > +static void oa_timestamp_clear(struct xe_oa_stream *stream, u32 report_offset)
> > {
> > - if (oa_report_header_64bit(stream))
> > - *(u64 *)&report[2] = 0;
> > - else
> > - report[1] = 0;
> > + struct iosys_map *map = &stream->oa_buffer.bo->vmap;
> > +
> > + oa_report_header_64bit(stream) ?
> > + xe_map_wr(stream->oa->xe, map, report_offset + 8, u64, 0) :
> > + xe_map_wr(stream->oa->xe, map, report_offset + 4, u32, 0);
> > }
> >
> > static bool xe_oa_buffer_check_unlocked(struct xe_oa_stream *stream)
> > @@ -275,9 +283,7 @@ static bool xe_oa_buffer_check_unlocked(struct xe_oa_stream *stream)
> > * they were written. If not : (╯°□°)╯︵ ┻━┻
> > */
> > while (xe_oa_circ_diff(stream, tail, stream->oa_buffer.tail) >= report_size) {
> > - void *report = stream->oa_buffer.vaddr + tail;
> > -
> > - if (oa_report_id(stream, report) || oa_timestamp(stream, report))
> > + if (oa_report_id(stream, tail) || oa_timestamp(stream, tail))
> > break;
> >
> > tail = xe_oa_circ_diff(stream, tail, report_size);
> > @@ -311,30 +317,35 @@ static enum hrtimer_restart xe_oa_poll_check_timer_cb(struct hrtimer *hrtimer)
> > return HRTIMER_RESTART;
> > }
> >
> > +static unsigned long
> > +xe_oa_copy_to_user(struct xe_oa_stream *stream, void __user *dst, u32 report_offset, u32 len)
> > +{
> > + xe_map_memcpy_from(stream->oa->xe, stream->oa_buffer.bounce,
> > + &stream->oa_buffer.bo->vmap, report_offset, len);
> > + return copy_to_user(dst, stream->oa_buffer.bounce, len);
> > +}
> > +
> > static int xe_oa_append_report(struct xe_oa_stream *stream, char __user *buf,
> > - size_t count, size_t *offset, const u8 *report)
> > + size_t count, size_t *offset, u32 report_offset)
> > {
> > int report_size = stream->oa_buffer.format->size;
> > int report_size_partial;
> > - u8 *oa_buf_end;
> >
> > if ((count - *offset) < report_size)
> > return -ENOSPC;
> >
> > buf += *offset;
> >
> > - oa_buf_end = stream->oa_buffer.vaddr + stream->oa_buffer.circ_size;
> > - report_size_partial = oa_buf_end - report;
> > + report_size_partial = stream->oa_buffer.circ_size - report_offset;
> >
> > if (report_size_partial < report_size) {
> > - if (copy_to_user(buf, report, report_size_partial))
> > + if (xe_oa_copy_to_user(stream, buf, report_offset, report_size_partial))
> > return -EFAULT;
> > buf += report_size_partial;
> >
> > - if (copy_to_user(buf, stream->oa_buffer.vaddr,
> > - report_size - report_size_partial))
> > + if (xe_oa_copy_to_user(stream, buf, 0, report_size - report_size_partial))
> > return -EFAULT;
> > - } else if (copy_to_user(buf, report, report_size)) {
> > + } else if (xe_oa_copy_to_user(stream, buf, report_offset, report_size)) {
> > return -EFAULT;
> > }
> >
> > @@ -347,7 +358,6 @@ static int xe_oa_append_reports(struct xe_oa_stream *stream, char __user *buf,
> > size_t count, size_t *offset)
> > {
> > int report_size = stream->oa_buffer.format->size;
> > - u8 *oa_buf_base = stream->oa_buffer.vaddr;
> > u32 gtt_offset = xe_bo_ggtt_addr(stream->oa_buffer.bo);
> > size_t start_offset = *offset;
> > unsigned long flags;
> > @@ -364,26 +374,24 @@ static int xe_oa_append_reports(struct xe_oa_stream *stream, char __user *buf,
> >
> > for (; xe_oa_circ_diff(stream, tail, head);
> > head = xe_oa_circ_incr(stream, head, report_size)) {
> > - u8 *report = oa_buf_base + head;
> > -
> > - ret = xe_oa_append_report(stream, buf, count, offset, report);
> > + ret = xe_oa_append_report(stream, buf, count, offset, head);
> > if (ret)
> > break;
> >
> > if (!(stream->oa_buffer.circ_size % report_size)) {
> > /* Clear out report id and timestamp to detect unlanded reports */
> > - oa_report_id_clear(stream, (void *)report);
> > - oa_timestamp_clear(stream, (void *)report);
> > + oa_report_id_clear(stream, head);
> > + oa_timestamp_clear(stream, head);
> > } else {
> > - u8 *oa_buf_end = stream->oa_buffer.vaddr + stream->oa_buffer.circ_size;
> > - u32 part = oa_buf_end - report;
> > + struct iosys_map *map = &stream->oa_buffer.bo->vmap;
> > + u32 part = stream->oa_buffer.circ_size - head;
> >
> > /* Zero out the entire report */
> > if (report_size <= part) {
> > - memset(report, 0, report_size);
> > + xe_map_memset(stream->oa->xe, map, head, 0, report_size);
> > } else {
> > - memset(report, 0, part);
> > - memset(oa_buf_base, 0, report_size - part);
> > + xe_map_memset(stream->oa->xe, map, head, 0, part);
> > + xe_map_memset(stream->oa->xe, map, 0, 0, report_size - part);
> > }
> > }
> > }
> > @@ -436,7 +444,8 @@ static void xe_oa_init_oa_buffer(struct xe_oa_stream *stream)
> > spin_unlock_irqrestore(&stream->oa_buffer.ptr_lock, flags);
> >
> > /* Zero out the OA buffer since we rely on zero report id and timestamp fields */
> > - memset(stream->oa_buffer.vaddr, 0, xe_bo_size(stream->oa_buffer.bo));
> > + xe_map_memset(stream->oa->xe, &stream->oa_buffer.bo->vmap, 0, 0,
> > + xe_bo_size(stream->oa_buffer.bo));
> > }
> >
> > static u32 __format_to_oactrl(const struct xe_oa_format *format, int counter_sel_mask)
> > @@ -699,6 +708,7 @@ static int num_lri_dwords(int num_regs)
> > static void xe_oa_free_oa_buffer(struct xe_oa_stream *stream)
> > {
> > xe_bo_unpin_map_no_vm(stream->oa_buffer.bo);
> > + kfree(stream->oa_buffer.bounce);
> > }
> >
> > static void xe_oa_free_configs(struct xe_oa_stream *stream)
> > @@ -889,9 +899,16 @@ static int xe_oa_alloc_oa_buffer(struct xe_oa_stream *stream, size_t size)
> > return PTR_ERR(bo);
> >
> > stream->oa_buffer.bo = bo;
> > +
> > /* mmap implementation requires OA buffer to be in system memory */
> > xe_assert(stream->oa->xe, bo->vmap.is_iomem == 0);
> > - stream->oa_buffer.vaddr = bo->vmap.vaddr;
> > +
> > + stream->oa_buffer.bounce = kmalloc(stream->oa_buffer.format->size, GFP_KERNEL);
> > + if (!stream->oa_buffer.bounce) {
> > + xe_bo_unpin_map_no_vm(stream->oa_buffer.bo);
> > + return -ENOMEM;
> > + }
> > +
> > return 0;
> > }
> >
> > diff --git a/drivers/gpu/drm/xe/xe_oa_types.h b/drivers/gpu/drm/xe/xe_oa_types.h
> > index 8906c3084b5f8..3d9ec8490899c 100644
> > --- a/drivers/gpu/drm/xe/xe_oa_types.h
> > +++ b/drivers/gpu/drm/xe/xe_oa_types.h
> > @@ -164,8 +164,8 @@ struct xe_oa_buffer {
> > /** @bo: xe_bo backing the OA buffer */
> > struct xe_bo *bo;
> >
> > - /** @vaddr: mapped vaddr of the OA buffer */
> > - u8 *vaddr;
> > + /** @bounce: bounce buffer used with xe_map layer */
> > + void *bounce;
> >
> > /** @ptr_lock: Lock protecting reads/writes to head/tail pointers */
> > spinlock_t ptr_lock;
> > --
> > 2.54.0
> >
^ permalink raw reply [flat|nested] 25+ messages in thread
* [PATCH 1/3] drm/xe/oa: Use xe_map layer
2026-04-27 22:11 [PATCH v8 0/3] drm/xe/oa: Wa_14026633728 Ashutosh Dixit
@ 2026-04-27 22:11 ` Ashutosh Dixit
0 siblings, 0 replies; 25+ messages in thread
From: Ashutosh Dixit @ 2026-04-27 22:11 UTC (permalink / raw)
To: intel-xe
OA code should have used xe_map layer to begin with. In CRI, the OA buffer
can be located both in system and device memory. For these reasons, move OA
code to use the xe_map layer when accessing the OA buffer.
v2: Use xe_map layer and put_user in xe_oa_copy_to_user (Umesh)
v3: To avoid performance impact in v2, revert to v1 but move
xe_map_copy_to_user() to xe_map.h
v4: Use bounce buffer and copy_to_user in xe_oa_copy_to_user
v5: Fix offsets in oa_timestamp() and oa_timestamp_clear() (Umesh)
v6: Rename head/tail in helper args to report_offset (Umesh)
v7: Add xe_assert in xe_oa_copy_to_user (Umesh)
Signed-off-by: Ashutosh Dixit <ashutosh.dixit@intel.com>
Reviewed-by: Umesh Nerlige Ramappa <umesh.nerlige.ramappa@intel.com>
---
drivers/gpu/drm/xe/xe_oa.c | 97 +++++++++++++++++++-------------
drivers/gpu/drm/xe/xe_oa_types.h | 4 +-
2 files changed, 60 insertions(+), 41 deletions(-)
diff --git a/drivers/gpu/drm/xe/xe_oa.c b/drivers/gpu/drm/xe/xe_oa.c
index 6337e671c97ae..6ded3c2fce34e 100644
--- a/drivers/gpu/drm/xe/xe_oa.c
+++ b/drivers/gpu/drm/xe/xe_oa.c
@@ -213,32 +213,40 @@ static u32 xe_oa_hw_tail_read(struct xe_oa_stream *stream)
#define oa_report_header_64bit(__s) \
((__s)->oa_buffer.format->header == HDR_64_BIT)
-static u64 oa_report_id(struct xe_oa_stream *stream, void *report)
+static u64 oa_report_id(struct xe_oa_stream *stream, u32 report_offset)
{
- return oa_report_header_64bit(stream) ? *(u64 *)report : *(u32 *)report;
+ struct iosys_map *map = &stream->oa_buffer.bo->vmap;
+
+ return oa_report_header_64bit(stream) ?
+ xe_map_rd(stream->oa->xe, map, report_offset, u64) :
+ xe_map_rd(stream->oa->xe, map, report_offset, u32);
}
-static void oa_report_id_clear(struct xe_oa_stream *stream, u32 *report)
+static void oa_report_id_clear(struct xe_oa_stream *stream, u32 report_offset)
{
- if (oa_report_header_64bit(stream))
- *(u64 *)report = 0;
- else
- *report = 0;
+ struct iosys_map *map = &stream->oa_buffer.bo->vmap;
+
+ oa_report_header_64bit(stream) ?
+ xe_map_wr(stream->oa->xe, map, report_offset, u64, 0) :
+ xe_map_wr(stream->oa->xe, map, report_offset, u32, 0);
}
-static u64 oa_timestamp(struct xe_oa_stream *stream, void *report)
+static u64 oa_timestamp(struct xe_oa_stream *stream, u32 report_offset)
{
+ struct iosys_map *map = &stream->oa_buffer.bo->vmap;
+
return oa_report_header_64bit(stream) ?
- *((u64 *)report + 1) :
- *((u32 *)report + 1);
+ xe_map_rd(stream->oa->xe, map, report_offset + 8, u64) :
+ xe_map_rd(stream->oa->xe, map, report_offset + 4, u32);
}
-static void oa_timestamp_clear(struct xe_oa_stream *stream, u32 *report)
+static void oa_timestamp_clear(struct xe_oa_stream *stream, u32 report_offset)
{
- if (oa_report_header_64bit(stream))
- *(u64 *)&report[2] = 0;
- else
- report[1] = 0;
+ struct iosys_map *map = &stream->oa_buffer.bo->vmap;
+
+ oa_report_header_64bit(stream) ?
+ xe_map_wr(stream->oa->xe, map, report_offset + 8, u64, 0) :
+ xe_map_wr(stream->oa->xe, map, report_offset + 4, u32, 0);
}
static bool xe_oa_buffer_check_unlocked(struct xe_oa_stream *stream)
@@ -275,9 +283,7 @@ static bool xe_oa_buffer_check_unlocked(struct xe_oa_stream *stream)
* they were written. If not : (╯°□°)╯︵ ┻━┻
*/
while (xe_oa_circ_diff(stream, tail, stream->oa_buffer.tail) >= report_size) {
- void *report = stream->oa_buffer.vaddr + tail;
-
- if (oa_report_id(stream, report) || oa_timestamp(stream, report))
+ if (oa_report_id(stream, tail) || oa_timestamp(stream, tail))
break;
tail = xe_oa_circ_diff(stream, tail, report_size);
@@ -311,30 +317,37 @@ static enum hrtimer_restart xe_oa_poll_check_timer_cb(struct hrtimer *hrtimer)
return HRTIMER_RESTART;
}
+static unsigned long
+xe_oa_copy_to_user(struct xe_oa_stream *stream, void __user *dst, u32 report_offset, u32 len)
+{
+ xe_assert(stream->oa->xe, len <= stream->oa_buffer.format->size);
+
+ xe_map_memcpy_from(stream->oa->xe, stream->oa_buffer.bounce,
+ &stream->oa_buffer.bo->vmap, report_offset, len);
+ return copy_to_user(dst, stream->oa_buffer.bounce, len);
+}
+
static int xe_oa_append_report(struct xe_oa_stream *stream, char __user *buf,
- size_t count, size_t *offset, const u8 *report)
+ size_t count, size_t *offset, u32 report_offset)
{
int report_size = stream->oa_buffer.format->size;
int report_size_partial;
- u8 *oa_buf_end;
if ((count - *offset) < report_size)
return -ENOSPC;
buf += *offset;
- oa_buf_end = stream->oa_buffer.vaddr + stream->oa_buffer.circ_size;
- report_size_partial = oa_buf_end - report;
+ report_size_partial = stream->oa_buffer.circ_size - report_offset;
if (report_size_partial < report_size) {
- if (copy_to_user(buf, report, report_size_partial))
+ if (xe_oa_copy_to_user(stream, buf, report_offset, report_size_partial))
return -EFAULT;
buf += report_size_partial;
- if (copy_to_user(buf, stream->oa_buffer.vaddr,
- report_size - report_size_partial))
+ if (xe_oa_copy_to_user(stream, buf, 0, report_size - report_size_partial))
return -EFAULT;
- } else if (copy_to_user(buf, report, report_size)) {
+ } else if (xe_oa_copy_to_user(stream, buf, report_offset, report_size)) {
return -EFAULT;
}
@@ -347,7 +360,6 @@ static int xe_oa_append_reports(struct xe_oa_stream *stream, char __user *buf,
size_t count, size_t *offset)
{
int report_size = stream->oa_buffer.format->size;
- u8 *oa_buf_base = stream->oa_buffer.vaddr;
u32 gtt_offset = xe_bo_ggtt_addr(stream->oa_buffer.bo);
size_t start_offset = *offset;
unsigned long flags;
@@ -364,26 +376,24 @@ static int xe_oa_append_reports(struct xe_oa_stream *stream, char __user *buf,
for (; xe_oa_circ_diff(stream, tail, head);
head = xe_oa_circ_incr(stream, head, report_size)) {
- u8 *report = oa_buf_base + head;
-
- ret = xe_oa_append_report(stream, buf, count, offset, report);
+ ret = xe_oa_append_report(stream, buf, count, offset, head);
if (ret)
break;
if (!(stream->oa_buffer.circ_size % report_size)) {
/* Clear out report id and timestamp to detect unlanded reports */
- oa_report_id_clear(stream, (void *)report);
- oa_timestamp_clear(stream, (void *)report);
+ oa_report_id_clear(stream, head);
+ oa_timestamp_clear(stream, head);
} else {
- u8 *oa_buf_end = stream->oa_buffer.vaddr + stream->oa_buffer.circ_size;
- u32 part = oa_buf_end - report;
+ struct iosys_map *map = &stream->oa_buffer.bo->vmap;
+ u32 part = stream->oa_buffer.circ_size - head;
/* Zero out the entire report */
if (report_size <= part) {
- memset(report, 0, report_size);
+ xe_map_memset(stream->oa->xe, map, head, 0, report_size);
} else {
- memset(report, 0, part);
- memset(oa_buf_base, 0, report_size - part);
+ xe_map_memset(stream->oa->xe, map, head, 0, part);
+ xe_map_memset(stream->oa->xe, map, 0, 0, report_size - part);
}
}
}
@@ -436,7 +446,8 @@ static void xe_oa_init_oa_buffer(struct xe_oa_stream *stream)
spin_unlock_irqrestore(&stream->oa_buffer.ptr_lock, flags);
/* Zero out the OA buffer since we rely on zero report id and timestamp fields */
- memset(stream->oa_buffer.vaddr, 0, xe_bo_size(stream->oa_buffer.bo));
+ xe_map_memset(stream->oa->xe, &stream->oa_buffer.bo->vmap, 0, 0,
+ xe_bo_size(stream->oa_buffer.bo));
}
static u32 __format_to_oactrl(const struct xe_oa_format *format, int counter_sel_mask)
@@ -699,6 +710,7 @@ static int num_lri_dwords(int num_regs)
static void xe_oa_free_oa_buffer(struct xe_oa_stream *stream)
{
xe_bo_unpin_map_no_vm(stream->oa_buffer.bo);
+ kfree(stream->oa_buffer.bounce);
}
static void xe_oa_free_configs(struct xe_oa_stream *stream)
@@ -889,9 +901,16 @@ static int xe_oa_alloc_oa_buffer(struct xe_oa_stream *stream, size_t size)
return PTR_ERR(bo);
stream->oa_buffer.bo = bo;
+
/* mmap implementation requires OA buffer to be in system memory */
xe_assert(stream->oa->xe, bo->vmap.is_iomem == 0);
- stream->oa_buffer.vaddr = bo->vmap.vaddr;
+
+ stream->oa_buffer.bounce = kmalloc(stream->oa_buffer.format->size, GFP_KERNEL);
+ if (!stream->oa_buffer.bounce) {
+ xe_bo_unpin_map_no_vm(stream->oa_buffer.bo);
+ return -ENOMEM;
+ }
+
return 0;
}
diff --git a/drivers/gpu/drm/xe/xe_oa_types.h b/drivers/gpu/drm/xe/xe_oa_types.h
index 8906c3084b5f8..3d9ec8490899c 100644
--- a/drivers/gpu/drm/xe/xe_oa_types.h
+++ b/drivers/gpu/drm/xe/xe_oa_types.h
@@ -164,8 +164,8 @@ struct xe_oa_buffer {
/** @bo: xe_bo backing the OA buffer */
struct xe_bo *bo;
- /** @vaddr: mapped vaddr of the OA buffer */
- u8 *vaddr;
+ /** @bounce: bounce buffer used with xe_map layer */
+ void *bounce;
/** @ptr_lock: Lock protecting reads/writes to head/tail pointers */
spinlock_t ptr_lock;
--
2.54.0
^ permalink raw reply related [flat|nested] 25+ messages in thread
end of thread, other threads:[~2026-04-27 22:11 UTC | newest]
Thread overview: 25+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2026-04-27 19:02 [PATCH v6 0/3] drm/xe/oa: Wa_14026633728 Ashutosh Dixit
2026-04-27 19:02 ` [PATCH 1/3] drm/xe/oa: Use xe_map layer Ashutosh Dixit
2026-04-27 19:34 ` Umesh Nerlige Ramappa
2026-04-27 20:30 ` Dixit, Ashutosh
2026-04-27 19:02 ` [PATCH 2/3] drm/xe/oa: Use drm_gem_mmap_obj for OA buffer mmap Ashutosh Dixit
2026-04-27 19:02 ` [PATCH 3/3] drm/xe/oa: Implement Wa_14026633728 Ashutosh Dixit
-- strict thread matches above, loose matches on Subject: below --
2026-04-27 22:11 [PATCH v8 0/3] drm/xe/oa: Wa_14026633728 Ashutosh Dixit
2026-04-27 22:11 ` [PATCH 1/3] drm/xe/oa: Use xe_map layer Ashutosh Dixit
2026-04-27 20:26 [PATCH v7 0/3] drm/xe/oa: Wa_14026633728 Ashutosh Dixit
2026-04-27 20:26 ` [PATCH 1/3] drm/xe/oa: Use xe_map layer Ashutosh Dixit
2026-04-25 0:14 [PATCH v5 0/3] drm/xe/oa: Wa_14026633728 Ashutosh Dixit
2026-04-25 0:14 ` [PATCH 1/3] drm/xe/oa: Use xe_map layer Ashutosh Dixit
2026-04-27 19:23 ` Umesh Nerlige Ramappa
2026-04-15 2:03 [PATCH v4 0/3] drm/xe/oa: Wa_14026633728 Ashutosh Dixit
2026-04-15 2:03 ` [PATCH 1/3] drm/xe/oa: Use xe_map layer Ashutosh Dixit
2026-04-24 21:05 ` Umesh Nerlige Ramappa
2026-04-24 23:56 ` Dixit, Ashutosh
2026-04-25 0:16 ` Dixit, Ashutosh
2026-04-27 18:27 ` Umesh Nerlige Ramappa
2026-04-27 19:04 ` Dixit, Ashutosh
2026-04-11 0:48 [PATCH v3 0/3] drm/xe/oa: Wa_14026633728 Ashutosh Dixit
2026-04-11 0:48 ` [PATCH 1/3] drm/xe/oa: Use xe_map layer Ashutosh Dixit
2026-04-09 23:17 [PATCH v2 0/3] drm/xe/oa: Wa_14026633728 Ashutosh Dixit
2026-04-09 23:17 ` [PATCH 1/3] drm/xe/oa: Use xe_map layer Ashutosh Dixit
2026-04-11 0:49 ` Dixit, Ashutosh
2026-04-13 18:05 ` Umesh Nerlige Ramappa
2026-04-13 22:06 ` Dixit, Ashutosh
2026-04-07 3:02 [PATCH 0/3] drm/xe/oa: Wa_14026633728 Ashutosh Dixit
2026-04-07 3:02 ` [PATCH 1/3] drm/xe/oa: Use xe_map layer Ashutosh Dixit
2026-04-07 22:49 ` Umesh Nerlige Ramappa
2026-04-08 15:32 ` Dixit, Ashutosh
2026-04-08 18:21 ` Umesh Nerlige Ramappa
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox