public inbox for intel-xe@lists.freedesktop.org
 help / color / mirror / Atom feed
* [PATCH v6 0/3] drm/xe/oa: Wa_14026633728
@ 2026-04-27 19:02 Ashutosh Dixit
  2026-04-27 19:02 ` [PATCH 1/3] drm/xe/oa: Use xe_map layer Ashutosh Dixit
                   ` (2 more replies)
  0 siblings, 3 replies; 6+ messages in thread
From: Ashutosh Dixit @ 2026-04-27 19:02 UTC (permalink / raw)
  To: intel-xe; +Cc: Umesh Nerlige Ramappa

v6 though v2: Change to Patch 1 (see changelog in Patch 1)

Ashutosh Dixit (3):
  drm/xe/oa: Use xe_map layer
  drm/xe/oa: Use drm_gem_mmap_obj for OA buffer mmap
  drm/xe/oa: Implement Wa_14026633728

 drivers/gpu/drm/xe/xe_oa.c         | 123 ++++++++++++++++-------------
 drivers/gpu/drm/xe/xe_oa_types.h   |   4 +-
 drivers/gpu/drm/xe/xe_wa_oob.rules |   1 +
 3 files changed, 70 insertions(+), 58 deletions(-)

-- 
2.54.0


^ permalink raw reply	[flat|nested] 6+ messages in thread

* [PATCH 1/3] drm/xe/oa: Use xe_map layer
  2026-04-27 19:02 [PATCH v6 0/3] drm/xe/oa: Wa_14026633728 Ashutosh Dixit
@ 2026-04-27 19:02 ` Ashutosh Dixit
  2026-04-27 19:34   ` Umesh Nerlige Ramappa
  2026-04-27 19:02 ` [PATCH 2/3] drm/xe/oa: Use drm_gem_mmap_obj for OA buffer mmap Ashutosh Dixit
  2026-04-27 19:02 ` [PATCH 3/3] drm/xe/oa: Implement Wa_14026633728 Ashutosh Dixit
  2 siblings, 1 reply; 6+ messages in thread
From: Ashutosh Dixit @ 2026-04-27 19:02 UTC (permalink / raw)
  To: intel-xe; +Cc: Umesh Nerlige Ramappa

OA code should have used xe_map layer to begin with. In CRI, the OA buffer
can be located both in system and device memory. For these reasons, move OA
code to use the xe_map layer when accessing the OA buffer.

v2: Use xe_map layer and put_user in xe_oa_copy_to_user (Umesh)
v3: To avoid performance impact in v2, revert to v1 but move
    xe_map_copy_to_user() to xe_map.h
v4: Use bounce buffer and copy_to_user in xe_oa_copy_to_user
v5: Fix offsets in oa_timestamp() and oa_timestamp_clear() (Umesh)
v6: Rename head/tail in helper args to report_offset (Umesh)

Signed-off-by: Ashutosh Dixit <ashutosh.dixit@intel.com>
---
 drivers/gpu/drm/xe/xe_oa.c       | 95 +++++++++++++++++++-------------
 drivers/gpu/drm/xe/xe_oa_types.h |  4 +-
 2 files changed, 58 insertions(+), 41 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_oa.c b/drivers/gpu/drm/xe/xe_oa.c
index 6337e671c97ae..1fff0e8e9e78e 100644
--- a/drivers/gpu/drm/xe/xe_oa.c
+++ b/drivers/gpu/drm/xe/xe_oa.c
@@ -213,32 +213,40 @@ static u32 xe_oa_hw_tail_read(struct xe_oa_stream *stream)
 #define oa_report_header_64bit(__s) \
 	((__s)->oa_buffer.format->header == HDR_64_BIT)
 
-static u64 oa_report_id(struct xe_oa_stream *stream, void *report)
+static u64 oa_report_id(struct xe_oa_stream *stream, u32 report_offset)
 {
-	return oa_report_header_64bit(stream) ? *(u64 *)report : *(u32 *)report;
+	struct iosys_map *map = &stream->oa_buffer.bo->vmap;
+
+	return oa_report_header_64bit(stream) ?
+		xe_map_rd(stream->oa->xe, map, report_offset, u64) :
+		xe_map_rd(stream->oa->xe, map, report_offset, u32);
 }
 
-static void oa_report_id_clear(struct xe_oa_stream *stream, u32 *report)
+static void oa_report_id_clear(struct xe_oa_stream *stream, u32 report_offset)
 {
-	if (oa_report_header_64bit(stream))
-		*(u64 *)report = 0;
-	else
-		*report = 0;
+	struct iosys_map *map = &stream->oa_buffer.bo->vmap;
+
+	oa_report_header_64bit(stream) ?
+		xe_map_wr(stream->oa->xe, map, report_offset, u64, 0) :
+		xe_map_wr(stream->oa->xe, map, report_offset, u32, 0);
 }
 
-static u64 oa_timestamp(struct xe_oa_stream *stream, void *report)
+static u64 oa_timestamp(struct xe_oa_stream *stream, u32 report_offset)
 {
+	struct iosys_map *map = &stream->oa_buffer.bo->vmap;
+
 	return oa_report_header_64bit(stream) ?
-		*((u64 *)report + 1) :
-		*((u32 *)report + 1);
+		xe_map_rd(stream->oa->xe, map, report_offset + 8, u64) :
+		xe_map_rd(stream->oa->xe, map, report_offset + 4, u32);
 }
 
-static void oa_timestamp_clear(struct xe_oa_stream *stream, u32 *report)
+static void oa_timestamp_clear(struct xe_oa_stream *stream, u32 report_offset)
 {
-	if (oa_report_header_64bit(stream))
-		*(u64 *)&report[2] = 0;
-	else
-		report[1] = 0;
+	struct iosys_map *map = &stream->oa_buffer.bo->vmap;
+
+	oa_report_header_64bit(stream) ?
+		xe_map_wr(stream->oa->xe, map, report_offset + 8, u64, 0) :
+		xe_map_wr(stream->oa->xe, map, report_offset + 4, u32, 0);
 }
 
 static bool xe_oa_buffer_check_unlocked(struct xe_oa_stream *stream)
@@ -275,9 +283,7 @@ static bool xe_oa_buffer_check_unlocked(struct xe_oa_stream *stream)
 	 * they were written.  If not : (╯°□°)╯︵ ┻━┻
 	 */
 	while (xe_oa_circ_diff(stream, tail, stream->oa_buffer.tail) >= report_size) {
-		void *report = stream->oa_buffer.vaddr + tail;
-
-		if (oa_report_id(stream, report) || oa_timestamp(stream, report))
+		if (oa_report_id(stream, tail) || oa_timestamp(stream, tail))
 			break;
 
 		tail = xe_oa_circ_diff(stream, tail, report_size);
@@ -311,30 +317,35 @@ static enum hrtimer_restart xe_oa_poll_check_timer_cb(struct hrtimer *hrtimer)
 	return HRTIMER_RESTART;
 }
 
+static unsigned long
+xe_oa_copy_to_user(struct xe_oa_stream *stream, void __user *dst, u32 report_offset, u32 len)
+{
+	xe_map_memcpy_from(stream->oa->xe, stream->oa_buffer.bounce,
+			   &stream->oa_buffer.bo->vmap, report_offset, len);
+	return copy_to_user(dst, stream->oa_buffer.bounce, len);
+}
+
 static int xe_oa_append_report(struct xe_oa_stream *stream, char __user *buf,
-			       size_t count, size_t *offset, const u8 *report)
+			       size_t count, size_t *offset, u32 report_offset)
 {
 	int report_size = stream->oa_buffer.format->size;
 	int report_size_partial;
-	u8 *oa_buf_end;
 
 	if ((count - *offset) < report_size)
 		return -ENOSPC;
 
 	buf += *offset;
 
-	oa_buf_end = stream->oa_buffer.vaddr + stream->oa_buffer.circ_size;
-	report_size_partial = oa_buf_end - report;
+	report_size_partial = stream->oa_buffer.circ_size - report_offset;
 
 	if (report_size_partial < report_size) {
-		if (copy_to_user(buf, report, report_size_partial))
+		if (xe_oa_copy_to_user(stream, buf, report_offset, report_size_partial))
 			return -EFAULT;
 		buf += report_size_partial;
 
-		if (copy_to_user(buf, stream->oa_buffer.vaddr,
-				 report_size - report_size_partial))
+		if (xe_oa_copy_to_user(stream, buf, 0, report_size - report_size_partial))
 			return -EFAULT;
-	} else if (copy_to_user(buf, report, report_size)) {
+	} else if (xe_oa_copy_to_user(stream, buf, report_offset, report_size)) {
 		return -EFAULT;
 	}
 
@@ -347,7 +358,6 @@ static int xe_oa_append_reports(struct xe_oa_stream *stream, char __user *buf,
 				size_t count, size_t *offset)
 {
 	int report_size = stream->oa_buffer.format->size;
-	u8 *oa_buf_base = stream->oa_buffer.vaddr;
 	u32 gtt_offset = xe_bo_ggtt_addr(stream->oa_buffer.bo);
 	size_t start_offset = *offset;
 	unsigned long flags;
@@ -364,26 +374,24 @@ static int xe_oa_append_reports(struct xe_oa_stream *stream, char __user *buf,
 
 	for (; xe_oa_circ_diff(stream, tail, head);
 	     head = xe_oa_circ_incr(stream, head, report_size)) {
-		u8 *report = oa_buf_base + head;
-
-		ret = xe_oa_append_report(stream, buf, count, offset, report);
+		ret = xe_oa_append_report(stream, buf, count, offset, head);
 		if (ret)
 			break;
 
 		if (!(stream->oa_buffer.circ_size % report_size)) {
 			/* Clear out report id and timestamp to detect unlanded reports */
-			oa_report_id_clear(stream, (void *)report);
-			oa_timestamp_clear(stream, (void *)report);
+			oa_report_id_clear(stream, head);
+			oa_timestamp_clear(stream, head);
 		} else {
-			u8 *oa_buf_end = stream->oa_buffer.vaddr + stream->oa_buffer.circ_size;
-			u32 part = oa_buf_end - report;
+			struct iosys_map *map = &stream->oa_buffer.bo->vmap;
+			u32 part = stream->oa_buffer.circ_size - head;
 
 			/* Zero out the entire report */
 			if (report_size <= part) {
-				memset(report, 0, report_size);
+				xe_map_memset(stream->oa->xe, map, head, 0, report_size);
 			} else {
-				memset(report, 0, part);
-				memset(oa_buf_base, 0, report_size - part);
+				xe_map_memset(stream->oa->xe, map, head, 0, part);
+				xe_map_memset(stream->oa->xe, map, 0, 0, report_size - part);
 			}
 		}
 	}
@@ -436,7 +444,8 @@ static void xe_oa_init_oa_buffer(struct xe_oa_stream *stream)
 	spin_unlock_irqrestore(&stream->oa_buffer.ptr_lock, flags);
 
 	/* Zero out the OA buffer since we rely on zero report id and timestamp fields */
-	memset(stream->oa_buffer.vaddr, 0, xe_bo_size(stream->oa_buffer.bo));
+	xe_map_memset(stream->oa->xe, &stream->oa_buffer.bo->vmap, 0, 0,
+		      xe_bo_size(stream->oa_buffer.bo));
 }
 
 static u32 __format_to_oactrl(const struct xe_oa_format *format, int counter_sel_mask)
@@ -699,6 +708,7 @@ static int num_lri_dwords(int num_regs)
 static void xe_oa_free_oa_buffer(struct xe_oa_stream *stream)
 {
 	xe_bo_unpin_map_no_vm(stream->oa_buffer.bo);
+	kfree(stream->oa_buffer.bounce);
 }
 
 static void xe_oa_free_configs(struct xe_oa_stream *stream)
@@ -889,9 +899,16 @@ static int xe_oa_alloc_oa_buffer(struct xe_oa_stream *stream, size_t size)
 		return PTR_ERR(bo);
 
 	stream->oa_buffer.bo = bo;
+
 	/* mmap implementation requires OA buffer to be in system memory */
 	xe_assert(stream->oa->xe, bo->vmap.is_iomem == 0);
-	stream->oa_buffer.vaddr = bo->vmap.vaddr;
+
+	stream->oa_buffer.bounce = kmalloc(stream->oa_buffer.format->size, GFP_KERNEL);
+	if (!stream->oa_buffer.bounce) {
+		xe_bo_unpin_map_no_vm(stream->oa_buffer.bo);
+		return -ENOMEM;
+	}
+
 	return 0;
 }
 
diff --git a/drivers/gpu/drm/xe/xe_oa_types.h b/drivers/gpu/drm/xe/xe_oa_types.h
index 8906c3084b5f8..3d9ec8490899c 100644
--- a/drivers/gpu/drm/xe/xe_oa_types.h
+++ b/drivers/gpu/drm/xe/xe_oa_types.h
@@ -164,8 +164,8 @@ struct xe_oa_buffer {
 	/** @bo: xe_bo backing the OA buffer */
 	struct xe_bo *bo;
 
-	/** @vaddr: mapped vaddr of the OA buffer */
-	u8 *vaddr;
+	/** @bounce: bounce buffer used with xe_map layer */
+	void *bounce;
 
 	/** @ptr_lock: Lock protecting reads/writes to head/tail pointers */
 	spinlock_t ptr_lock;
-- 
2.54.0


^ permalink raw reply related	[flat|nested] 6+ messages in thread

* [PATCH 2/3] drm/xe/oa: Use drm_gem_mmap_obj for OA buffer mmap
  2026-04-27 19:02 [PATCH v6 0/3] drm/xe/oa: Wa_14026633728 Ashutosh Dixit
  2026-04-27 19:02 ` [PATCH 1/3] drm/xe/oa: Use xe_map layer Ashutosh Dixit
@ 2026-04-27 19:02 ` Ashutosh Dixit
  2026-04-27 19:02 ` [PATCH 3/3] drm/xe/oa: Implement Wa_14026633728 Ashutosh Dixit
  2 siblings, 0 replies; 6+ messages in thread
From: Ashutosh Dixit @ 2026-04-27 19:02 UTC (permalink / raw)
  To: intel-xe; +Cc: Umesh Nerlige Ramappa

OA buffer mmap can currently only mmap OA buffer in system memory. CRI
MERTOA buffer can be located in device memory. Switch OA buffer mmap to
using drm_gem_mmap_obj, which can handle mmap's of both system and device
memory buffers.

Signed-off-by: Ashutosh Dixit <ashutosh.dixit@intel.com>
Reviewed-by: Umesh Nerlige Ramappa <umesh.nerlige.ramappa@intel.com>
---
 drivers/gpu/drm/xe/xe_oa.c | 20 +++-----------------
 1 file changed, 3 insertions(+), 17 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_oa.c b/drivers/gpu/drm/xe/xe_oa.c
index 1fff0e8e9e78e..d3994ae8bc82d 100644
--- a/drivers/gpu/drm/xe/xe_oa.c
+++ b/drivers/gpu/drm/xe/xe_oa.c
@@ -9,6 +9,7 @@
 #include <linux/poll.h>
 
 #include <drm/drm_drv.h>
+#include <drm/drm_gem.h>
 #include <drm/drm_managed.h>
 #include <drm/drm_syncobj.h>
 #include <uapi/drm/xe_drm.h>
@@ -900,9 +901,6 @@ static int xe_oa_alloc_oa_buffer(struct xe_oa_stream *stream, size_t size)
 
 	stream->oa_buffer.bo = bo;
 
-	/* mmap implementation requires OA buffer to be in system memory */
-	xe_assert(stream->oa->xe, bo->vmap.is_iomem == 0);
-
 	stream->oa_buffer.bounce = kmalloc(stream->oa_buffer.format->size, GFP_KERNEL);
 	if (!stream->oa_buffer.bounce) {
 		xe_bo_unpin_map_no_vm(stream->oa_buffer.bo);
@@ -1690,8 +1688,6 @@ static int xe_oa_mmap(struct file *file, struct vm_area_struct *vma)
 {
 	struct xe_oa_stream *stream = file->private_data;
 	struct xe_bo *bo = stream->oa_buffer.bo;
-	unsigned long start = vma->vm_start;
-	int i, ret;
 
 	if (xe_observation_paranoid && !perfmon_capable()) {
 		drm_dbg(&stream->oa->xe->drm, "Insufficient privilege to map OA buffer\n");
@@ -1699,7 +1695,7 @@ static int xe_oa_mmap(struct file *file, struct vm_area_struct *vma)
 	}
 
 	/* Can mmap the entire OA buffer or nothing (no partial OA buffer mmaps) */
-	if (vma->vm_end - vma->vm_start != xe_bo_size(stream->oa_buffer.bo)) {
+	if (vma->vm_end - vma->vm_start != xe_bo_size(bo)) {
 		drm_dbg(&stream->oa->xe->drm, "Wrong mmap size, must be OA buffer size\n");
 		return -EINVAL;
 	}
@@ -1715,17 +1711,7 @@ static int xe_oa_mmap(struct file *file, struct vm_area_struct *vma)
 	vm_flags_mod(vma, VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP | VM_DONTCOPY,
 		     VM_MAYWRITE | VM_MAYEXEC);
 
-	xe_assert(stream->oa->xe, bo->ttm.ttm->num_pages == vma_pages(vma));
-	for (i = 0; i < bo->ttm.ttm->num_pages; i++) {
-		ret = remap_pfn_range(vma, start, page_to_pfn(bo->ttm.ttm->pages[i]),
-				      PAGE_SIZE, vma->vm_page_prot);
-		if (ret)
-			break;
-
-		start += PAGE_SIZE;
-	}
-
-	return ret;
+	return drm_gem_mmap_obj(&bo->ttm.base, xe_bo_size(bo), vma);
 }
 
 static const struct file_operations xe_oa_fops = {
-- 
2.54.0


^ permalink raw reply related	[flat|nested] 6+ messages in thread

* [PATCH 3/3] drm/xe/oa: Implement Wa_14026633728
  2026-04-27 19:02 [PATCH v6 0/3] drm/xe/oa: Wa_14026633728 Ashutosh Dixit
  2026-04-27 19:02 ` [PATCH 1/3] drm/xe/oa: Use xe_map layer Ashutosh Dixit
  2026-04-27 19:02 ` [PATCH 2/3] drm/xe/oa: Use drm_gem_mmap_obj for OA buffer mmap Ashutosh Dixit
@ 2026-04-27 19:02 ` Ashutosh Dixit
  2 siblings, 0 replies; 6+ messages in thread
From: Ashutosh Dixit @ 2026-04-27 19:02 UTC (permalink / raw)
  To: intel-xe; +Cc: Umesh Nerlige Ramappa

CRI Wa_14026633728 requires MERTOA buffer to be allocated in device memory,
not system memory (which is the default for OA buffers).

Signed-off-by: Ashutosh Dixit <ashutosh.dixit@intel.com>
Reviewed-by: Umesh Nerlige Ramappa <umesh.nerlige.ramappa@intel.com>
---
 drivers/gpu/drm/xe/xe_oa.c         | 10 +++++++++-
 drivers/gpu/drm/xe/xe_wa_oob.rules |  1 +
 2 files changed, 10 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/xe/xe_oa.c b/drivers/gpu/drm/xe/xe_oa.c
index d3994ae8bc82d..b988b02c661c7 100644
--- a/drivers/gpu/drm/xe/xe_oa.c
+++ b/drivers/gpu/drm/xe/xe_oa.c
@@ -250,6 +250,11 @@ static void oa_timestamp_clear(struct xe_oa_stream *stream, u32 report_offset)
 		xe_map_wr(stream->oa->xe, map, report_offset + 4, u32, 0);
 }
 
+static bool mert_wa_14026633728(struct xe_oa_stream *s)
+{
+	return s->oa_unit->type == DRM_XE_OA_UNIT_TYPE_MERT && XE_GT_WA(s->gt, 14026633728);
+}
+
 static bool xe_oa_buffer_check_unlocked(struct xe_oa_stream *stream)
 {
 	u32 gtt_offset = xe_bo_ggtt_addr(stream->oa_buffer.bo);
@@ -891,11 +896,14 @@ static void xe_oa_stream_destroy(struct xe_oa_stream *stream)
 
 static int xe_oa_alloc_oa_buffer(struct xe_oa_stream *stream, size_t size)
 {
+	u32 vram = mert_wa_14026633728(stream) ?
+		XE_BO_FLAG_VRAM_IF_DGFX(xe_device_get_root_tile(stream->oa->xe)) :
+		XE_BO_FLAG_SYSTEM;
 	struct xe_bo *bo;
 
 	bo = xe_bo_create_pin_map_novm(stream->oa->xe, stream->gt->tile,
 				       size, ttm_bo_type_kernel,
-				       XE_BO_FLAG_SYSTEM | XE_BO_FLAG_GGTT, false);
+				       vram | XE_BO_FLAG_GGTT, false);
 	if (IS_ERR(bo))
 		return PTR_ERR(bo);
 
diff --git a/drivers/gpu/drm/xe/xe_wa_oob.rules b/drivers/gpu/drm/xe/xe_wa_oob.rules
index f8a185103b805..a7c1bd9bcb943 100644
--- a/drivers/gpu/drm/xe/xe_wa_oob.rules
+++ b/drivers/gpu/drm/xe/xe_wa_oob.rules
@@ -65,3 +65,4 @@
 
 14025883347	MEDIA_VERSION_RANGE(1301, 3503)
 		GRAPHICS_VERSION_RANGE(2004, 3005)
+14026633728	PLATFORM(CRESCENTISLAND)
-- 
2.54.0


^ permalink raw reply related	[flat|nested] 6+ messages in thread

* Re: [PATCH 1/3] drm/xe/oa: Use xe_map layer
  2026-04-27 19:02 ` [PATCH 1/3] drm/xe/oa: Use xe_map layer Ashutosh Dixit
@ 2026-04-27 19:34   ` Umesh Nerlige Ramappa
  2026-04-27 20:30     ` Dixit, Ashutosh
  0 siblings, 1 reply; 6+ messages in thread
From: Umesh Nerlige Ramappa @ 2026-04-27 19:34 UTC (permalink / raw)
  To: Ashutosh Dixit; +Cc: intel-xe

On Mon, Apr 27, 2026 at 12:02:21PM -0700, Ashutosh Dixit wrote:
>OA code should have used xe_map layer to begin with. In CRI, the OA buffer
>can be located both in system and device memory. For these reasons, move OA
>code to use the xe_map layer when accessing the OA buffer.
>
>v2: Use xe_map layer and put_user in xe_oa_copy_to_user (Umesh)
>v3: To avoid performance impact in v2, revert to v1 but move
>    xe_map_copy_to_user() to xe_map.h
>v4: Use bounce buffer and copy_to_user in xe_oa_copy_to_user
>v5: Fix offsets in oa_timestamp() and oa_timestamp_clear() (Umesh)
>v6: Rename head/tail in helper args to report_offset (Umesh)

Thanks for renaming. My comments are in the v5 version of the series.

With an assert/check in the xe_oa_copy_to_user(), this should be

Reviewed-by: Umesh Nerlige Ramappa <umesh.nerlige.ramappa@intel.com>

Umesh

>
>Signed-off-by: Ashutosh Dixit <ashutosh.dixit@intel.com>
>---
> drivers/gpu/drm/xe/xe_oa.c       | 95 +++++++++++++++++++-------------
> drivers/gpu/drm/xe/xe_oa_types.h |  4 +-
> 2 files changed, 58 insertions(+), 41 deletions(-)
>
>diff --git a/drivers/gpu/drm/xe/xe_oa.c b/drivers/gpu/drm/xe/xe_oa.c
>index 6337e671c97ae..1fff0e8e9e78e 100644
>--- a/drivers/gpu/drm/xe/xe_oa.c
>+++ b/drivers/gpu/drm/xe/xe_oa.c
>@@ -213,32 +213,40 @@ static u32 xe_oa_hw_tail_read(struct xe_oa_stream *stream)
> #define oa_report_header_64bit(__s) \
> 	((__s)->oa_buffer.format->header == HDR_64_BIT)
>
>-static u64 oa_report_id(struct xe_oa_stream *stream, void *report)
>+static u64 oa_report_id(struct xe_oa_stream *stream, u32 report_offset)
> {
>-	return oa_report_header_64bit(stream) ? *(u64 *)report : *(u32 *)report;
>+	struct iosys_map *map = &stream->oa_buffer.bo->vmap;
>+
>+	return oa_report_header_64bit(stream) ?
>+		xe_map_rd(stream->oa->xe, map, report_offset, u64) :
>+		xe_map_rd(stream->oa->xe, map, report_offset, u32);
> }
>
>-static void oa_report_id_clear(struct xe_oa_stream *stream, u32 *report)
>+static void oa_report_id_clear(struct xe_oa_stream *stream, u32 report_offset)
> {
>-	if (oa_report_header_64bit(stream))
>-		*(u64 *)report = 0;
>-	else
>-		*report = 0;
>+	struct iosys_map *map = &stream->oa_buffer.bo->vmap;
>+
>+	oa_report_header_64bit(stream) ?
>+		xe_map_wr(stream->oa->xe, map, report_offset, u64, 0) :
>+		xe_map_wr(stream->oa->xe, map, report_offset, u32, 0);
> }
>
>-static u64 oa_timestamp(struct xe_oa_stream *stream, void *report)
>+static u64 oa_timestamp(struct xe_oa_stream *stream, u32 report_offset)
> {
>+	struct iosys_map *map = &stream->oa_buffer.bo->vmap;
>+
> 	return oa_report_header_64bit(stream) ?
>-		*((u64 *)report + 1) :
>-		*((u32 *)report + 1);
>+		xe_map_rd(stream->oa->xe, map, report_offset + 8, u64) :
>+		xe_map_rd(stream->oa->xe, map, report_offset + 4, u32);
> }
>
>-static void oa_timestamp_clear(struct xe_oa_stream *stream, u32 *report)
>+static void oa_timestamp_clear(struct xe_oa_stream *stream, u32 report_offset)
> {
>-	if (oa_report_header_64bit(stream))
>-		*(u64 *)&report[2] = 0;
>-	else
>-		report[1] = 0;
>+	struct iosys_map *map = &stream->oa_buffer.bo->vmap;
>+
>+	oa_report_header_64bit(stream) ?
>+		xe_map_wr(stream->oa->xe, map, report_offset + 8, u64, 0) :
>+		xe_map_wr(stream->oa->xe, map, report_offset + 4, u32, 0);
> }
>
> static bool xe_oa_buffer_check_unlocked(struct xe_oa_stream *stream)
>@@ -275,9 +283,7 @@ static bool xe_oa_buffer_check_unlocked(struct xe_oa_stream *stream)
> 	 * they were written.  If not : (╯°□°)╯︵ ┻━┻
> 	 */
> 	while (xe_oa_circ_diff(stream, tail, stream->oa_buffer.tail) >= report_size) {
>-		void *report = stream->oa_buffer.vaddr + tail;
>-
>-		if (oa_report_id(stream, report) || oa_timestamp(stream, report))
>+		if (oa_report_id(stream, tail) || oa_timestamp(stream, tail))
> 			break;
>
> 		tail = xe_oa_circ_diff(stream, tail, report_size);
>@@ -311,30 +317,35 @@ static enum hrtimer_restart xe_oa_poll_check_timer_cb(struct hrtimer *hrtimer)
> 	return HRTIMER_RESTART;
> }
>
>+static unsigned long
>+xe_oa_copy_to_user(struct xe_oa_stream *stream, void __user *dst, u32 report_offset, u32 len)
>+{
>+	xe_map_memcpy_from(stream->oa->xe, stream->oa_buffer.bounce,
>+			   &stream->oa_buffer.bo->vmap, report_offset, len);
>+	return copy_to_user(dst, stream->oa_buffer.bounce, len);
>+}
>+
> static int xe_oa_append_report(struct xe_oa_stream *stream, char __user *buf,
>-			       size_t count, size_t *offset, const u8 *report)
>+			       size_t count, size_t *offset, u32 report_offset)
> {
> 	int report_size = stream->oa_buffer.format->size;
> 	int report_size_partial;
>-	u8 *oa_buf_end;
>
> 	if ((count - *offset) < report_size)
> 		return -ENOSPC;
>
> 	buf += *offset;
>
>-	oa_buf_end = stream->oa_buffer.vaddr + stream->oa_buffer.circ_size;
>-	report_size_partial = oa_buf_end - report;
>+	report_size_partial = stream->oa_buffer.circ_size - report_offset;
>
> 	if (report_size_partial < report_size) {
>-		if (copy_to_user(buf, report, report_size_partial))
>+		if (xe_oa_copy_to_user(stream, buf, report_offset, report_size_partial))
> 			return -EFAULT;
> 		buf += report_size_partial;
>
>-		if (copy_to_user(buf, stream->oa_buffer.vaddr,
>-				 report_size - report_size_partial))
>+		if (xe_oa_copy_to_user(stream, buf, 0, report_size - report_size_partial))
> 			return -EFAULT;
>-	} else if (copy_to_user(buf, report, report_size)) {
>+	} else if (xe_oa_copy_to_user(stream, buf, report_offset, report_size)) {
> 		return -EFAULT;
> 	}
>
>@@ -347,7 +358,6 @@ static int xe_oa_append_reports(struct xe_oa_stream *stream, char __user *buf,
> 				size_t count, size_t *offset)
> {
> 	int report_size = stream->oa_buffer.format->size;
>-	u8 *oa_buf_base = stream->oa_buffer.vaddr;
> 	u32 gtt_offset = xe_bo_ggtt_addr(stream->oa_buffer.bo);
> 	size_t start_offset = *offset;
> 	unsigned long flags;
>@@ -364,26 +374,24 @@ static int xe_oa_append_reports(struct xe_oa_stream *stream, char __user *buf,
>
> 	for (; xe_oa_circ_diff(stream, tail, head);
> 	     head = xe_oa_circ_incr(stream, head, report_size)) {
>-		u8 *report = oa_buf_base + head;
>-
>-		ret = xe_oa_append_report(stream, buf, count, offset, report);
>+		ret = xe_oa_append_report(stream, buf, count, offset, head);
> 		if (ret)
> 			break;
>
> 		if (!(stream->oa_buffer.circ_size % report_size)) {
> 			/* Clear out report id and timestamp to detect unlanded reports */
>-			oa_report_id_clear(stream, (void *)report);
>-			oa_timestamp_clear(stream, (void *)report);
>+			oa_report_id_clear(stream, head);
>+			oa_timestamp_clear(stream, head);
> 		} else {
>-			u8 *oa_buf_end = stream->oa_buffer.vaddr + stream->oa_buffer.circ_size;
>-			u32 part = oa_buf_end - report;
>+			struct iosys_map *map = &stream->oa_buffer.bo->vmap;
>+			u32 part = stream->oa_buffer.circ_size - head;
>
> 			/* Zero out the entire report */
> 			if (report_size <= part) {
>-				memset(report, 0, report_size);
>+				xe_map_memset(stream->oa->xe, map, head, 0, report_size);
> 			} else {
>-				memset(report, 0, part);
>-				memset(oa_buf_base, 0, report_size - part);
>+				xe_map_memset(stream->oa->xe, map, head, 0, part);
>+				xe_map_memset(stream->oa->xe, map, 0, 0, report_size - part);
> 			}
> 		}
> 	}
>@@ -436,7 +444,8 @@ static void xe_oa_init_oa_buffer(struct xe_oa_stream *stream)
> 	spin_unlock_irqrestore(&stream->oa_buffer.ptr_lock, flags);
>
> 	/* Zero out the OA buffer since we rely on zero report id and timestamp fields */
>-	memset(stream->oa_buffer.vaddr, 0, xe_bo_size(stream->oa_buffer.bo));
>+	xe_map_memset(stream->oa->xe, &stream->oa_buffer.bo->vmap, 0, 0,
>+		      xe_bo_size(stream->oa_buffer.bo));
> }
>
> static u32 __format_to_oactrl(const struct xe_oa_format *format, int counter_sel_mask)
>@@ -699,6 +708,7 @@ static int num_lri_dwords(int num_regs)
> static void xe_oa_free_oa_buffer(struct xe_oa_stream *stream)
> {
> 	xe_bo_unpin_map_no_vm(stream->oa_buffer.bo);
>+	kfree(stream->oa_buffer.bounce);
> }
>
> static void xe_oa_free_configs(struct xe_oa_stream *stream)
>@@ -889,9 +899,16 @@ static int xe_oa_alloc_oa_buffer(struct xe_oa_stream *stream, size_t size)
> 		return PTR_ERR(bo);
>
> 	stream->oa_buffer.bo = bo;
>+
> 	/* mmap implementation requires OA buffer to be in system memory */
> 	xe_assert(stream->oa->xe, bo->vmap.is_iomem == 0);
>-	stream->oa_buffer.vaddr = bo->vmap.vaddr;
>+
>+	stream->oa_buffer.bounce = kmalloc(stream->oa_buffer.format->size, GFP_KERNEL);
>+	if (!stream->oa_buffer.bounce) {
>+		xe_bo_unpin_map_no_vm(stream->oa_buffer.bo);
>+		return -ENOMEM;
>+	}
>+
> 	return 0;
> }
>
>diff --git a/drivers/gpu/drm/xe/xe_oa_types.h b/drivers/gpu/drm/xe/xe_oa_types.h
>index 8906c3084b5f8..3d9ec8490899c 100644
>--- a/drivers/gpu/drm/xe/xe_oa_types.h
>+++ b/drivers/gpu/drm/xe/xe_oa_types.h
>@@ -164,8 +164,8 @@ struct xe_oa_buffer {
> 	/** @bo: xe_bo backing the OA buffer */
> 	struct xe_bo *bo;
>
>-	/** @vaddr: mapped vaddr of the OA buffer */
>-	u8 *vaddr;
>+	/** @bounce: bounce buffer used with xe_map layer */
>+	void *bounce;
>
> 	/** @ptr_lock: Lock protecting reads/writes to head/tail pointers */
> 	spinlock_t ptr_lock;
>-- 
>2.54.0
>

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH 1/3] drm/xe/oa: Use xe_map layer
  2026-04-27 19:34   ` Umesh Nerlige Ramappa
@ 2026-04-27 20:30     ` Dixit, Ashutosh
  0 siblings, 0 replies; 6+ messages in thread
From: Dixit, Ashutosh @ 2026-04-27 20:30 UTC (permalink / raw)
  To: Umesh Nerlige Ramappa; +Cc: intel-xe

On Mon, 27 Apr 2026 12:34:24 -0700, Umesh Nerlige Ramappa wrote:
>
> On Mon, Apr 27, 2026 at 12:02:21PM -0700, Ashutosh Dixit wrote:
> > OA code should have used xe_map layer to begin with. In CRI, the OA buffer
> > can be located both in system and device memory. For these reasons, move OA
> > code to use the xe_map layer when accessing the OA buffer.
> >
> > v2: Use xe_map layer and put_user in xe_oa_copy_to_user (Umesh)
> > v3: To avoid performance impact in v2, revert to v1 but move
> >    xe_map_copy_to_user() to xe_map.h
> > v4: Use bounce buffer and copy_to_user in xe_oa_copy_to_user
> > v5: Fix offsets in oa_timestamp() and oa_timestamp_clear() (Umesh)
> > v6: Rename head/tail in helper args to report_offset (Umesh)
>
> Thanks for renaming. My comments are in the v5 version of the series.
>
> With an assert/check in the xe_oa_copy_to_user(), this should be

Add in v7. The only remaining comment is the one about the test, will make
a note of it.

>
> Reviewed-by: Umesh Nerlige Ramappa <umesh.nerlige.ramappa@intel.com>

Thanks.
--
Ashutosh


>
> >
> > Signed-off-by: Ashutosh Dixit <ashutosh.dixit@intel.com>
> > ---
> > drivers/gpu/drm/xe/xe_oa.c       | 95 +++++++++++++++++++-------------
> > drivers/gpu/drm/xe/xe_oa_types.h |  4 +-
> > 2 files changed, 58 insertions(+), 41 deletions(-)
> >
> > diff --git a/drivers/gpu/drm/xe/xe_oa.c b/drivers/gpu/drm/xe/xe_oa.c
> > index 6337e671c97ae..1fff0e8e9e78e 100644
> > --- a/drivers/gpu/drm/xe/xe_oa.c
> > +++ b/drivers/gpu/drm/xe/xe_oa.c
> > @@ -213,32 +213,40 @@ static u32 xe_oa_hw_tail_read(struct xe_oa_stream *stream)
> > #define oa_report_header_64bit(__s) \
> >	((__s)->oa_buffer.format->header == HDR_64_BIT)
> >
> > -static u64 oa_report_id(struct xe_oa_stream *stream, void *report)
> > +static u64 oa_report_id(struct xe_oa_stream *stream, u32 report_offset)
> > {
> > -	return oa_report_header_64bit(stream) ? *(u64 *)report : *(u32 *)report;
> > +	struct iosys_map *map = &stream->oa_buffer.bo->vmap;
> > +
> > +	return oa_report_header_64bit(stream) ?
> > +		xe_map_rd(stream->oa->xe, map, report_offset, u64) :
> > +		xe_map_rd(stream->oa->xe, map, report_offset, u32);
> > }
> >
> > -static void oa_report_id_clear(struct xe_oa_stream *stream, u32 *report)
> > +static void oa_report_id_clear(struct xe_oa_stream *stream, u32 report_offset)
> > {
> > -	if (oa_report_header_64bit(stream))
> > -		*(u64 *)report = 0;
> > -	else
> > -		*report = 0;
> > +	struct iosys_map *map = &stream->oa_buffer.bo->vmap;
> > +
> > +	oa_report_header_64bit(stream) ?
> > +		xe_map_wr(stream->oa->xe, map, report_offset, u64, 0) :
> > +		xe_map_wr(stream->oa->xe, map, report_offset, u32, 0);
> > }
> >
> > -static u64 oa_timestamp(struct xe_oa_stream *stream, void *report)
> > +static u64 oa_timestamp(struct xe_oa_stream *stream, u32 report_offset)
> > {
> > +	struct iosys_map *map = &stream->oa_buffer.bo->vmap;
> > +
> >	return oa_report_header_64bit(stream) ?
> > -		*((u64 *)report + 1) :
> > -		*((u32 *)report + 1);
> > +		xe_map_rd(stream->oa->xe, map, report_offset + 8, u64) :
> > +		xe_map_rd(stream->oa->xe, map, report_offset + 4, u32);
> > }
> >
> > -static void oa_timestamp_clear(struct xe_oa_stream *stream, u32 *report)
> > +static void oa_timestamp_clear(struct xe_oa_stream *stream, u32 report_offset)
> > {
> > -	if (oa_report_header_64bit(stream))
> > -		*(u64 *)&report[2] = 0;
> > -	else
> > -		report[1] = 0;
> > +	struct iosys_map *map = &stream->oa_buffer.bo->vmap;
> > +
> > +	oa_report_header_64bit(stream) ?
> > +		xe_map_wr(stream->oa->xe, map, report_offset + 8, u64, 0) :
> > +		xe_map_wr(stream->oa->xe, map, report_offset + 4, u32, 0);
> > }
> >
> > static bool xe_oa_buffer_check_unlocked(struct xe_oa_stream *stream)
> > @@ -275,9 +283,7 @@ static bool xe_oa_buffer_check_unlocked(struct xe_oa_stream *stream)
> >	 * they were written.  If not : (╯°□°)╯︵ ┻━┻
> >	 */
> >	while (xe_oa_circ_diff(stream, tail, stream->oa_buffer.tail) >= report_size) {
> > -		void *report = stream->oa_buffer.vaddr + tail;
> > -
> > -		if (oa_report_id(stream, report) || oa_timestamp(stream, report))
> > +		if (oa_report_id(stream, tail) || oa_timestamp(stream, tail))
> >			break;
> >
> >		tail = xe_oa_circ_diff(stream, tail, report_size);
> > @@ -311,30 +317,35 @@ static enum hrtimer_restart xe_oa_poll_check_timer_cb(struct hrtimer *hrtimer)
> >	return HRTIMER_RESTART;
> > }
> >
> > +static unsigned long
> > +xe_oa_copy_to_user(struct xe_oa_stream *stream, void __user *dst, u32 report_offset, u32 len)
> > +{
> > +	xe_map_memcpy_from(stream->oa->xe, stream->oa_buffer.bounce,
> > +			   &stream->oa_buffer.bo->vmap, report_offset, len);
> > +	return copy_to_user(dst, stream->oa_buffer.bounce, len);
> > +}
> > +
> > static int xe_oa_append_report(struct xe_oa_stream *stream, char __user *buf,
> > -			       size_t count, size_t *offset, const u8 *report)
> > +			       size_t count, size_t *offset, u32 report_offset)
> > {
> >	int report_size = stream->oa_buffer.format->size;
> >	int report_size_partial;
> > -	u8 *oa_buf_end;
> >
> >	if ((count - *offset) < report_size)
> >		return -ENOSPC;
> >
> >	buf += *offset;
> >
> > -	oa_buf_end = stream->oa_buffer.vaddr + stream->oa_buffer.circ_size;
> > -	report_size_partial = oa_buf_end - report;
> > +	report_size_partial = stream->oa_buffer.circ_size - report_offset;
> >
> >	if (report_size_partial < report_size) {
> > -		if (copy_to_user(buf, report, report_size_partial))
> > +		if (xe_oa_copy_to_user(stream, buf, report_offset, report_size_partial))
> >			return -EFAULT;
> >		buf += report_size_partial;
> >
> > -		if (copy_to_user(buf, stream->oa_buffer.vaddr,
> > -				 report_size - report_size_partial))
> > +		if (xe_oa_copy_to_user(stream, buf, 0, report_size - report_size_partial))
> >			return -EFAULT;
> > -	} else if (copy_to_user(buf, report, report_size)) {
> > +	} else if (xe_oa_copy_to_user(stream, buf, report_offset, report_size)) {
> >		return -EFAULT;
> >	}
> >
> > @@ -347,7 +358,6 @@ static int xe_oa_append_reports(struct xe_oa_stream *stream, char __user *buf,
> >				size_t count, size_t *offset)
> > {
> >	int report_size = stream->oa_buffer.format->size;
> > -	u8 *oa_buf_base = stream->oa_buffer.vaddr;
> >	u32 gtt_offset = xe_bo_ggtt_addr(stream->oa_buffer.bo);
> >	size_t start_offset = *offset;
> >	unsigned long flags;
> > @@ -364,26 +374,24 @@ static int xe_oa_append_reports(struct xe_oa_stream *stream, char __user *buf,
> >
> >	for (; xe_oa_circ_diff(stream, tail, head);
> >	     head = xe_oa_circ_incr(stream, head, report_size)) {
> > -		u8 *report = oa_buf_base + head;
> > -
> > -		ret = xe_oa_append_report(stream, buf, count, offset, report);
> > +		ret = xe_oa_append_report(stream, buf, count, offset, head);
> >		if (ret)
> >			break;
> >
> >		if (!(stream->oa_buffer.circ_size % report_size)) {
> >			/* Clear out report id and timestamp to detect unlanded reports */
> > -			oa_report_id_clear(stream, (void *)report);
> > -			oa_timestamp_clear(stream, (void *)report);
> > +			oa_report_id_clear(stream, head);
> > +			oa_timestamp_clear(stream, head);
> >		} else {
> > -			u8 *oa_buf_end = stream->oa_buffer.vaddr + stream->oa_buffer.circ_size;
> > -			u32 part = oa_buf_end - report;
> > +			struct iosys_map *map = &stream->oa_buffer.bo->vmap;
> > +			u32 part = stream->oa_buffer.circ_size - head;
> >
> >			/* Zero out the entire report */
> >			if (report_size <= part) {
> > -				memset(report, 0, report_size);
> > +				xe_map_memset(stream->oa->xe, map, head, 0, report_size);
> >			} else {
> > -				memset(report, 0, part);
> > -				memset(oa_buf_base, 0, report_size - part);
> > +				xe_map_memset(stream->oa->xe, map, head, 0, part);
> > +				xe_map_memset(stream->oa->xe, map, 0, 0, report_size - part);
> >			}
> >		}
> >	}
> > @@ -436,7 +444,8 @@ static void xe_oa_init_oa_buffer(struct xe_oa_stream *stream)
> >	spin_unlock_irqrestore(&stream->oa_buffer.ptr_lock, flags);
> >
> >	/* Zero out the OA buffer since we rely on zero report id and timestamp fields */
> > -	memset(stream->oa_buffer.vaddr, 0, xe_bo_size(stream->oa_buffer.bo));
> > +	xe_map_memset(stream->oa->xe, &stream->oa_buffer.bo->vmap, 0, 0,
> > +		      xe_bo_size(stream->oa_buffer.bo));
> > }
> >
> > static u32 __format_to_oactrl(const struct xe_oa_format *format, int counter_sel_mask)
> > @@ -699,6 +708,7 @@ static int num_lri_dwords(int num_regs)
> > static void xe_oa_free_oa_buffer(struct xe_oa_stream *stream)
> > {
> >	xe_bo_unpin_map_no_vm(stream->oa_buffer.bo);
> > +	kfree(stream->oa_buffer.bounce);
> > }
> >
> > static void xe_oa_free_configs(struct xe_oa_stream *stream)
> > @@ -889,9 +899,16 @@ static int xe_oa_alloc_oa_buffer(struct xe_oa_stream *stream, size_t size)
> >		return PTR_ERR(bo);
> >
> >	stream->oa_buffer.bo = bo;
> > +
> >	/* mmap implementation requires OA buffer to be in system memory */
> >	xe_assert(stream->oa->xe, bo->vmap.is_iomem == 0);
> > -	stream->oa_buffer.vaddr = bo->vmap.vaddr;
> > +
> > +	stream->oa_buffer.bounce = kmalloc(stream->oa_buffer.format->size, GFP_KERNEL);
> > +	if (!stream->oa_buffer.bounce) {
> > +		xe_bo_unpin_map_no_vm(stream->oa_buffer.bo);
> > +		return -ENOMEM;
> > +	}
> > +
> >	return 0;
> > }
> >
> > diff --git a/drivers/gpu/drm/xe/xe_oa_types.h b/drivers/gpu/drm/xe/xe_oa_types.h
> > index 8906c3084b5f8..3d9ec8490899c 100644
> > --- a/drivers/gpu/drm/xe/xe_oa_types.h
> > +++ b/drivers/gpu/drm/xe/xe_oa_types.h
> > @@ -164,8 +164,8 @@ struct xe_oa_buffer {
> >	/** @bo: xe_bo backing the OA buffer */
> >	struct xe_bo *bo;
> >
> > -	/** @vaddr: mapped vaddr of the OA buffer */
> > -	u8 *vaddr;
> > +	/** @bounce: bounce buffer used with xe_map layer */
> > +	void *bounce;
> >
> >	/** @ptr_lock: Lock protecting reads/writes to head/tail pointers */
> >	spinlock_t ptr_lock;
> > --
> > 2.54.0
> >

^ permalink raw reply	[flat|nested] 6+ messages in thread

end of thread, other threads:[~2026-04-27 20:30 UTC | newest]

Thread overview: 6+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2026-04-27 19:02 [PATCH v6 0/3] drm/xe/oa: Wa_14026633728 Ashutosh Dixit
2026-04-27 19:02 ` [PATCH 1/3] drm/xe/oa: Use xe_map layer Ashutosh Dixit
2026-04-27 19:34   ` Umesh Nerlige Ramappa
2026-04-27 20:30     ` Dixit, Ashutosh
2026-04-27 19:02 ` [PATCH 2/3] drm/xe/oa: Use drm_gem_mmap_obj for OA buffer mmap Ashutosh Dixit
2026-04-27 19:02 ` [PATCH 3/3] drm/xe/oa: Implement Wa_14026633728 Ashutosh Dixit

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox