* [PATCH 17/17] drm/xe/oa: Enable Xe2+ overrun mode
@ 2024-03-14 0:47 Ashutosh Dixit
0 siblings, 0 replies; 15+ messages in thread
From: Ashutosh Dixit @ 2024-03-14 0:47 UTC (permalink / raw)
To: intel-xe; +Cc: Umesh Nerlige Ramappa
Enable Xe2+ overrun mode. For Xe2+, when overrun mode is enabled, there are
no partial reports at the end of buffer, making the OA buffer effectively a
non-power-of-2 size circular buffer whose size, circ_size, is a multiple of
the report size.
Signed-off-by: Ashutosh Dixit <ashutosh.dixit@intel.com>
---
drivers/gpu/drm/xe/xe_oa.c | 35 ++++++++++++++++++++++++--------
drivers/gpu/drm/xe/xe_oa_types.h | 3 +++
2 files changed, 30 insertions(+), 8 deletions(-)
diff --git a/drivers/gpu/drm/xe/xe_oa.c b/drivers/gpu/drm/xe/xe_oa.c
index 6f5bbb0787d9..1ad17cc14532 100644
--- a/drivers/gpu/drm/xe/xe_oa.c
+++ b/drivers/gpu/drm/xe/xe_oa.c
@@ -106,7 +106,14 @@ static const struct xe_oa_format oa_formats[] = {
static u32 xe_oa_circ_diff(struct xe_oa_stream *stream, u32 tail, u32 head)
{
- return (tail - head) & (XE_OA_BUFFER_SIZE - 1);
+ return tail >= head ? tail - head :
+ tail + stream->oa_buffer.circ_size - head;
+}
+
+static u32 xe_oa_circ_incr(struct xe_oa_stream *stream, u32 ptr, u32 n)
+{
+ return ptr + n >= stream->oa_buffer.circ_size ?
+ ptr + n - stream->oa_buffer.circ_size : ptr + n;
}
static void xe_oa_config_release(struct kref *ref)
@@ -280,7 +287,7 @@ static int xe_oa_append_report(struct xe_oa_stream *stream, char __user *buf,
buf += *offset;
- oa_buf_end = stream->oa_buffer.vaddr + XE_OA_BUFFER_SIZE;
+ oa_buf_end = stream->oa_buffer.vaddr + stream->oa_buffer.circ_size;
report_size_partial = oa_buf_end - report;
if (report_size_partial < report_size) {
@@ -306,7 +313,6 @@ static int xe_oa_append_reports(struct xe_oa_stream *stream, char __user *buf,
int report_size = stream->oa_buffer.format->size;
u8 *oa_buf_base = stream->oa_buffer.vaddr;
u32 gtt_offset = xe_bo_ggtt_addr(stream->oa_buffer.bo);
- u32 mask = (XE_OA_BUFFER_SIZE - 1);
size_t start_offset = *offset;
unsigned long flags;
u32 head, tail;
@@ -317,21 +323,23 @@ static int xe_oa_append_reports(struct xe_oa_stream *stream, char __user *buf,
tail = stream->oa_buffer.tail;
spin_unlock_irqrestore(&stream->oa_buffer.ptr_lock, flags);
- xe_assert(stream->oa->xe, head < XE_OA_BUFFER_SIZE && tail < XE_OA_BUFFER_SIZE);
+ xe_assert(stream->oa->xe,
+ head < stream->oa_buffer.circ_size && tail < stream->oa_buffer.circ_size);
- for (; xe_oa_circ_diff(stream, tail, head); head = (head + report_size) & mask) {
+ for (; xe_oa_circ_diff(stream, tail, head);
+ head = xe_oa_circ_incr(stream, head, report_size)) {
u8 *report = oa_buf_base + head;
ret = xe_oa_append_report(stream, buf, count, offset, report);
if (ret)
break;
- if (is_power_of_2(report_size)) {
+ if (!(stream->oa_buffer.circ_size % report_size)) {
/* Clear out report id and timestamp to detect unlanded reports */
oa_report_id_clear(stream, (void *)report);
oa_timestamp_clear(stream, (void *)report);
} else {
- u8 *oa_buf_end = stream->oa_buffer.vaddr + XE_OA_BUFFER_SIZE;
+ u8 *oa_buf_end = stream->oa_buffer.vaddr + stream->oa_buffer.circ_size;
u32 part = oa_buf_end - report;
/* Zero out the entire report */
@@ -369,7 +377,6 @@ static void xe_oa_init_oa_buffer(struct xe_oa_stream *stream)
xe_mmio_write32(stream->gt, __oa_regs(stream)->oa_head_ptr,
gtt_offset & OAG_OAHEADPTR_MASK);
stream->oa_buffer.head = 0;
-
/*
* PRM says: "This MMIO must be set before the OATAILPTR register and after the
* OAHEADPTR register. This is to enable proper functionality of the overflow bit".
@@ -1256,6 +1263,18 @@ static int xe_oa_stream_init(struct xe_oa_stream *stream,
stream->periodic = param->period_exponent > 0;
stream->period_exponent = param->period_exponent;
+ /*
+ * For Xe2+, when overrun mode is enabled, there are no partial reports at the end
+ * of buffer, making the OA buffer effectively a non-power-of-2 size circular
+ * buffer whose size, circ_size, is a multiple of the report size
+ */
+ if (GRAPHICS_VER(stream->oa->xe) >= 20 &&
+ stream->hwe->oa_unit->type == DRM_XE_OA_UNIT_TYPE_OAG && stream->sample)
+ stream->oa_buffer.circ_size =
+ XE_OA_BUFFER_SIZE - XE_OA_BUFFER_SIZE % stream->oa_buffer.format->size;
+ else
+ stream->oa_buffer.circ_size = XE_OA_BUFFER_SIZE;
+
if (stream->exec_q && engine_supports_mi_query(stream->hwe)) {
/* If we don't find the context offset, just return error */
ret = xe_oa_set_ctx_ctrl_offset(stream);
diff --git a/drivers/gpu/drm/xe/xe_oa_types.h b/drivers/gpu/drm/xe/xe_oa_types.h
index 6984e7d04be5..d8d5c9d8c22e 100644
--- a/drivers/gpu/drm/xe/xe_oa_types.h
+++ b/drivers/gpu/drm/xe/xe_oa_types.h
@@ -163,6 +163,9 @@ struct xe_oa_buffer {
/** @tail: The last verified cached tail where HW has completed writing */
u32 tail;
+
+ /** @circ_size: The effective circular buffer size, for Xe2+ */
+ u32 circ_size;
};
/**
--
2.41.0
^ permalink raw reply related [flat|nested] 15+ messages in thread* [PATCH v19 00/17] Add OA functionality to Xe
@ 2024-06-18 1:45 Ashutosh Dixit
2024-06-18 1:46 ` [PATCH 17/17] drm/xe/oa: Enable Xe2+ overrun mode Ashutosh Dixit
0 siblings, 1 reply; 15+ messages in thread
From: Ashutosh Dixit @ 2024-06-18 1:45 UTC (permalink / raw)
To: intel-xe
Please see cover letter for v7 here:
https://patchwork.freedesktop.org/series/121084/#rev7
For changes in later versions see changelog below.
This series is also available at:
https://gitlab.freedesktop.org/adixit/kernel/-/tree/xe-oa
The series has been tested against this IGT series:
https://gitlab.freedesktop.org/adixit/igt-gpu-tools/-/tree/xe-oa
or,
https://patchwork.freedesktop.org/series/130033/
Opensource consumers using uapi in this series:
gpuvis:
https://github.com/mikesart/gpuvis/pull/86 (merged)
mesa:
https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/29312
Test-with: 20240607200847.1964629-1-ashutosh.dixit@intel.com
v2: Fix build
v3: Rebase, due to s/xe_engine/xe_exec_queue/
v4: Re-run for testing
v5: Address review comments, new patches 11 through 17
v6: New patches 18 through 21
v7: Patches are completely redone and don't start with i915 version of the uapi
v8: See https://patchwork.freedesktop.org/patch/575214/?series=128993&rev=1
v9: See https://patchwork.freedesktop.org/patch/577441/?series=128993&rev=2
v10: See https://patchwork.freedesktop.org/patch/577943/?series=128993&rev=3
v11: See https://patchwork.freedesktop.org/patch/581239/?series=130705&rev=1
v12: Add last two new patches to enable Xe2+ overrun mode
v13: Update last two patches after code review completion
v14: https://patchwork.freedesktop.org/patch/595447/?series=134028&rev=1
v15: https://patchwork.freedesktop.org/patch/595549/?series=134056&rev=1
v16: https://patchwork.freedesktop.org/patch/597768/?series=134634&rev=1
v17: https://patchwork.freedesktop.org/patch/598236/?series=134742&rev=1
v18: Rebase
v19: Add Rodrigo's A-b on the patches
Ashutosh Dixit (17):
drm/xe/perf/uapi: "Perf" layer to support multiple perf counter stream
types
drm/xe/perf/uapi: Add perf_stream_paranoid sysctl
drm/xe/oa/uapi: Add OA data formats
drm/xe/oa/uapi: Initialize OA units
drm/xe/oa/uapi: Add/remove OA config perf ops
drm/xe/oa/uapi: Define and parse OA stream properties
drm/xe/oa: OA stream initialization (OAG)
drm/xe/oa/uapi: Expose OA stream fd
drm/xe/oa/uapi: Read file_operation
drm/xe/oa: Add OAR support
drm/xe/oa: Add OAC support
drm/xe/oa/uapi: Query OA unit properties
drm/xe/oa/uapi: OA buffer mmap
drm/xe/oa: Add MMIO trigger support
drm/xe/oa: Override GuC RC with OA on PVC
drm/xe/oa: Changes to OA_TAKEN
drm/xe/oa: Enable Xe2+ overrun mode
drivers/gpu/drm/xe/Makefile | 2 +
.../gpu/drm/xe/instructions/xe_mi_commands.h | 1 +
drivers/gpu/drm/xe/regs/xe_engine_regs.h | 2 +
drivers/gpu/drm/xe/regs/xe_gt_regs.h | 3 +
drivers/gpu/drm/xe/regs/xe_oa_regs.h | 100 +
drivers/gpu/drm/xe/xe_device.c | 17 +-
drivers/gpu/drm/xe/xe_device_types.h | 4 +
drivers/gpu/drm/xe/xe_gt_types.h | 4 +
drivers/gpu/drm/xe/xe_guc_pc.c | 57 +
drivers/gpu/drm/xe/xe_guc_pc.h | 3 +
drivers/gpu/drm/xe/xe_hw_engine_types.h | 2 +
drivers/gpu/drm/xe/xe_lrc.c | 11 +-
drivers/gpu/drm/xe/xe_lrc.h | 1 +
drivers/gpu/drm/xe/xe_module.c | 5 +
drivers/gpu/drm/xe/xe_oa.c | 2428 +++++++++++++++++
drivers/gpu/drm/xe/xe_oa.h | 27 +
drivers/gpu/drm/xe/xe_oa_types.h | 239 ++
drivers/gpu/drm/xe/xe_perf.c | 92 +
drivers/gpu/drm/xe/xe_perf.h | 20 +
drivers/gpu/drm/xe/xe_query.c | 77 +
drivers/gpu/drm/xe/xe_reg_whitelist.c | 24 +-
include/uapi/drm/xe_drm.h | 305 +++
22 files changed, 3417 insertions(+), 7 deletions(-)
create mode 100644 drivers/gpu/drm/xe/regs/xe_oa_regs.h
create mode 100644 drivers/gpu/drm/xe/xe_oa.c
create mode 100644 drivers/gpu/drm/xe/xe_oa.h
create mode 100644 drivers/gpu/drm/xe/xe_oa_types.h
create mode 100644 drivers/gpu/drm/xe/xe_perf.c
create mode 100644 drivers/gpu/drm/xe/xe_perf.h
--
2.41.0
^ permalink raw reply [flat|nested] 15+ messages in thread* [PATCH 17/17] drm/xe/oa: Enable Xe2+ overrun mode
2024-06-18 1:45 [PATCH v19 00/17] Add OA functionality to Xe Ashutosh Dixit
@ 2024-06-18 1:46 ` Ashutosh Dixit
0 siblings, 0 replies; 15+ messages in thread
From: Ashutosh Dixit @ 2024-06-18 1:46 UTC (permalink / raw)
To: intel-xe
Enable Xe2+ overrun mode. For Xe2+, when overrun mode is enabled, there are
no partial reports at the end of buffer, making the OA buffer effectively a
non-power-of-2 size circular buffer whose size, circ_size, is a multiple of
the report size.
v2: Fix implementation of xe_oa_circ_diff/xe_oa_circ_incr (Umesh)
Acked-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Reviewed-by: Umesh Nerlige Ramappa <umesh.nerlige.ramappa@intel.com>
Signed-off-by: Ashutosh Dixit <ashutosh.dixit@intel.com>
---
drivers/gpu/drm/xe/xe_oa.c | 35 ++++++++++++++++++++++++--------
drivers/gpu/drm/xe/xe_oa_types.h | 3 +++
2 files changed, 30 insertions(+), 8 deletions(-)
diff --git a/drivers/gpu/drm/xe/xe_oa.c b/drivers/gpu/drm/xe/xe_oa.c
index 2d398b7231c1..34206e0b6a08 100644
--- a/drivers/gpu/drm/xe/xe_oa.c
+++ b/drivers/gpu/drm/xe/xe_oa.c
@@ -114,7 +114,14 @@ static const struct xe_oa_format oa_formats[] = {
static u32 xe_oa_circ_diff(struct xe_oa_stream *stream, u32 tail, u32 head)
{
- return (tail - head) & (XE_OA_BUFFER_SIZE - 1);
+ return tail >= head ? tail - head :
+ tail + stream->oa_buffer.circ_size - head;
+}
+
+static u32 xe_oa_circ_incr(struct xe_oa_stream *stream, u32 ptr, u32 n)
+{
+ return ptr + n >= stream->oa_buffer.circ_size ?
+ ptr + n - stream->oa_buffer.circ_size : ptr + n;
}
static void xe_oa_config_release(struct kref *ref)
@@ -288,7 +295,7 @@ static int xe_oa_append_report(struct xe_oa_stream *stream, char __user *buf,
buf += *offset;
- oa_buf_end = stream->oa_buffer.vaddr + XE_OA_BUFFER_SIZE;
+ oa_buf_end = stream->oa_buffer.vaddr + stream->oa_buffer.circ_size;
report_size_partial = oa_buf_end - report;
if (report_size_partial < report_size) {
@@ -314,7 +321,6 @@ static int xe_oa_append_reports(struct xe_oa_stream *stream, char __user *buf,
int report_size = stream->oa_buffer.format->size;
u8 *oa_buf_base = stream->oa_buffer.vaddr;
u32 gtt_offset = xe_bo_ggtt_addr(stream->oa_buffer.bo);
- u32 mask = (XE_OA_BUFFER_SIZE - 1);
size_t start_offset = *offset;
unsigned long flags;
u32 head, tail;
@@ -325,21 +331,23 @@ static int xe_oa_append_reports(struct xe_oa_stream *stream, char __user *buf,
tail = stream->oa_buffer.tail;
spin_unlock_irqrestore(&stream->oa_buffer.ptr_lock, flags);
- xe_assert(stream->oa->xe, head < XE_OA_BUFFER_SIZE && tail < XE_OA_BUFFER_SIZE);
+ xe_assert(stream->oa->xe,
+ head < stream->oa_buffer.circ_size && tail < stream->oa_buffer.circ_size);
- for (; xe_oa_circ_diff(stream, tail, head); head = (head + report_size) & mask) {
+ for (; xe_oa_circ_diff(stream, tail, head);
+ head = xe_oa_circ_incr(stream, head, report_size)) {
u8 *report = oa_buf_base + head;
ret = xe_oa_append_report(stream, buf, count, offset, report);
if (ret)
break;
- if (is_power_of_2(report_size)) {
+ if (!(stream->oa_buffer.circ_size % report_size)) {
/* Clear out report id and timestamp to detect unlanded reports */
oa_report_id_clear(stream, (void *)report);
oa_timestamp_clear(stream, (void *)report);
} else {
- u8 *oa_buf_end = stream->oa_buffer.vaddr + XE_OA_BUFFER_SIZE;
+ u8 *oa_buf_end = stream->oa_buffer.vaddr + stream->oa_buffer.circ_size;
u32 part = oa_buf_end - report;
/* Zero out the entire report */
@@ -377,7 +385,6 @@ static void xe_oa_init_oa_buffer(struct xe_oa_stream *stream)
xe_mmio_write32(stream->gt, __oa_regs(stream)->oa_head_ptr,
gtt_offset & OAG_OAHEADPTR_MASK);
stream->oa_buffer.head = 0;
-
/*
* PRM says: "This MMIO must be set before the OATAILPTR register and after the
* OAHEADPTR register. This is to enable proper functionality of the overflow bit".
@@ -1300,6 +1307,18 @@ static int xe_oa_stream_init(struct xe_oa_stream *stream,
stream->periodic = param->period_exponent > 0;
stream->period_exponent = param->period_exponent;
+ /*
+ * For Xe2+, when overrun mode is enabled, there are no partial reports at the end
+ * of buffer, making the OA buffer effectively a non-power-of-2 size circular
+ * buffer whose size, circ_size, is a multiple of the report size
+ */
+ if (GRAPHICS_VER(stream->oa->xe) >= 20 &&
+ stream->hwe->oa_unit->type == DRM_XE_OA_UNIT_TYPE_OAG && stream->sample)
+ stream->oa_buffer.circ_size =
+ XE_OA_BUFFER_SIZE - XE_OA_BUFFER_SIZE % stream->oa_buffer.format->size;
+ else
+ stream->oa_buffer.circ_size = XE_OA_BUFFER_SIZE;
+
if (stream->exec_q && engine_supports_mi_query(stream->hwe)) {
/* If we don't find the context offset, just return error */
ret = xe_oa_set_ctx_ctrl_offset(stream);
diff --git a/drivers/gpu/drm/xe/xe_oa_types.h b/drivers/gpu/drm/xe/xe_oa_types.h
index 0981f0e57676..706d45577dae 100644
--- a/drivers/gpu/drm/xe/xe_oa_types.h
+++ b/drivers/gpu/drm/xe/xe_oa_types.h
@@ -170,6 +170,9 @@ struct xe_oa_buffer {
/** @tail: The last verified cached tail where HW has completed writing */
u32 tail;
+
+ /** @circ_size: The effective circular buffer size, for Xe2+ */
+ u32 circ_size;
};
/**
--
2.41.0
^ permalink raw reply related [flat|nested] 15+ messages in thread
* [PATCH v18 00/17] Add OA functionality to Xe
@ 2024-06-17 22:36 Ashutosh Dixit
2024-06-17 22:36 ` [PATCH 17/17] drm/xe/oa: Enable Xe2+ overrun mode Ashutosh Dixit
0 siblings, 1 reply; 15+ messages in thread
From: Ashutosh Dixit @ 2024-06-17 22:36 UTC (permalink / raw)
To: intel-xe
Please see cover letter for v7 here:
https://patchwork.freedesktop.org/series/121084/#rev7
For changes in later versions see changelog below.
This series is also available at:
https://gitlab.freedesktop.org/adixit/kernel/-/tree/xe-oa
The series has been tested against this IGT series:
https://gitlab.freedesktop.org/adixit/igt-gpu-tools/-/tree/xe-oa
or,
https://patchwork.freedesktop.org/series/130033/
Opensource consumers using uapi in this series:
gpuvis:
https://github.com/mikesart/gpuvis/pull/86 (merged)
mesa:
https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/29312
Test-with: 20240607200847.1964629-1-ashutosh.dixit@intel.com
v2: Fix build
v3: Rebase, due to s/xe_engine/xe_exec_queue/
v4: Re-run for testing
v5: Address review comments, new patches 11 through 17
v6: New patches 18 through 21
v7: Patches are completely redone and don't start with i915 version of the uapi
v8: See https://patchwork.freedesktop.org/patch/575214/?series=128993&rev=1
v9: See https://patchwork.freedesktop.org/patch/577441/?series=128993&rev=2
v10: See https://patchwork.freedesktop.org/patch/577943/?series=128993&rev=3
v11: See https://patchwork.freedesktop.org/patch/581239/?series=130705&rev=1
v12: Add last two new patches to enable Xe2+ overrun mode
v13: Update last two patches after code review completion
v14: https://patchwork.freedesktop.org/patch/595447/?series=134028&rev=1
v15: https://patchwork.freedesktop.org/patch/595549/?series=134056&rev=1
v16: https://patchwork.freedesktop.org/patch/597768/?series=134634&rev=1
v17: https://patchwork.freedesktop.org/patch/598236/?series=134742&rev=1
v18: Rebase
Ashutosh Dixit (17):
drm/xe/perf/uapi: "Perf" layer to support multiple perf counter stream
types
drm/xe/perf/uapi: Add perf_stream_paranoid sysctl
drm/xe/oa/uapi: Add OA data formats
drm/xe/oa/uapi: Initialize OA units
drm/xe/oa/uapi: Add/remove OA config perf ops
drm/xe/oa/uapi: Define and parse OA stream properties
drm/xe/oa: OA stream initialization (OAG)
drm/xe/oa/uapi: Expose OA stream fd
drm/xe/oa/uapi: Read file_operation
drm/xe/oa: Add OAR support
drm/xe/oa: Add OAC support
drm/xe/oa/uapi: Query OA unit properties
drm/xe/oa/uapi: OA buffer mmap
drm/xe/oa: Add MMIO trigger support
drm/xe/oa: Override GuC RC with OA on PVC
drm/xe/oa: Changes to OA_TAKEN
drm/xe/oa: Enable Xe2+ overrun mode
drivers/gpu/drm/xe/Makefile | 2 +
.../gpu/drm/xe/instructions/xe_mi_commands.h | 1 +
drivers/gpu/drm/xe/regs/xe_engine_regs.h | 2 +
drivers/gpu/drm/xe/regs/xe_gt_regs.h | 3 +
drivers/gpu/drm/xe/regs/xe_oa_regs.h | 100 +
drivers/gpu/drm/xe/xe_device.c | 17 +-
drivers/gpu/drm/xe/xe_device_types.h | 4 +
drivers/gpu/drm/xe/xe_gt_types.h | 4 +
drivers/gpu/drm/xe/xe_guc_pc.c | 57 +
drivers/gpu/drm/xe/xe_guc_pc.h | 3 +
drivers/gpu/drm/xe/xe_hw_engine_types.h | 2 +
drivers/gpu/drm/xe/xe_lrc.c | 11 +-
drivers/gpu/drm/xe/xe_lrc.h | 1 +
drivers/gpu/drm/xe/xe_module.c | 5 +
drivers/gpu/drm/xe/xe_oa.c | 2428 +++++++++++++++++
drivers/gpu/drm/xe/xe_oa.h | 27 +
drivers/gpu/drm/xe/xe_oa_types.h | 239 ++
drivers/gpu/drm/xe/xe_perf.c | 92 +
drivers/gpu/drm/xe/xe_perf.h | 20 +
drivers/gpu/drm/xe/xe_query.c | 77 +
drivers/gpu/drm/xe/xe_reg_whitelist.c | 24 +-
include/uapi/drm/xe_drm.h | 305 +++
22 files changed, 3417 insertions(+), 7 deletions(-)
create mode 100644 drivers/gpu/drm/xe/regs/xe_oa_regs.h
create mode 100644 drivers/gpu/drm/xe/xe_oa.c
create mode 100644 drivers/gpu/drm/xe/xe_oa.h
create mode 100644 drivers/gpu/drm/xe/xe_oa_types.h
create mode 100644 drivers/gpu/drm/xe/xe_perf.c
create mode 100644 drivers/gpu/drm/xe/xe_perf.h
--
2.41.0
^ permalink raw reply [flat|nested] 15+ messages in thread* [PATCH 17/17] drm/xe/oa: Enable Xe2+ overrun mode
2024-06-17 22:36 [PATCH v18 00/17] Add OA functionality to Xe Ashutosh Dixit
@ 2024-06-17 22:36 ` Ashutosh Dixit
0 siblings, 0 replies; 15+ messages in thread
From: Ashutosh Dixit @ 2024-06-17 22:36 UTC (permalink / raw)
To: intel-xe
Enable Xe2+ overrun mode. For Xe2+, when overrun mode is enabled, there are
no partial reports at the end of buffer, making the OA buffer effectively a
non-power-of-2 size circular buffer whose size, circ_size, is a multiple of
the report size.
v2: Fix implementation of xe_oa_circ_diff/xe_oa_circ_incr (Umesh)
Reviewed-by: Umesh Nerlige Ramappa <umesh.nerlige.ramappa@intel.com>
Signed-off-by: Ashutosh Dixit <ashutosh.dixit@intel.com>
---
drivers/gpu/drm/xe/xe_oa.c | 35 ++++++++++++++++++++++++--------
drivers/gpu/drm/xe/xe_oa_types.h | 3 +++
2 files changed, 30 insertions(+), 8 deletions(-)
diff --git a/drivers/gpu/drm/xe/xe_oa.c b/drivers/gpu/drm/xe/xe_oa.c
index 2d398b7231c1..34206e0b6a08 100644
--- a/drivers/gpu/drm/xe/xe_oa.c
+++ b/drivers/gpu/drm/xe/xe_oa.c
@@ -114,7 +114,14 @@ static const struct xe_oa_format oa_formats[] = {
static u32 xe_oa_circ_diff(struct xe_oa_stream *stream, u32 tail, u32 head)
{
- return (tail - head) & (XE_OA_BUFFER_SIZE - 1);
+ return tail >= head ? tail - head :
+ tail + stream->oa_buffer.circ_size - head;
+}
+
+static u32 xe_oa_circ_incr(struct xe_oa_stream *stream, u32 ptr, u32 n)
+{
+ return ptr + n >= stream->oa_buffer.circ_size ?
+ ptr + n - stream->oa_buffer.circ_size : ptr + n;
}
static void xe_oa_config_release(struct kref *ref)
@@ -288,7 +295,7 @@ static int xe_oa_append_report(struct xe_oa_stream *stream, char __user *buf,
buf += *offset;
- oa_buf_end = stream->oa_buffer.vaddr + XE_OA_BUFFER_SIZE;
+ oa_buf_end = stream->oa_buffer.vaddr + stream->oa_buffer.circ_size;
report_size_partial = oa_buf_end - report;
if (report_size_partial < report_size) {
@@ -314,7 +321,6 @@ static int xe_oa_append_reports(struct xe_oa_stream *stream, char __user *buf,
int report_size = stream->oa_buffer.format->size;
u8 *oa_buf_base = stream->oa_buffer.vaddr;
u32 gtt_offset = xe_bo_ggtt_addr(stream->oa_buffer.bo);
- u32 mask = (XE_OA_BUFFER_SIZE - 1);
size_t start_offset = *offset;
unsigned long flags;
u32 head, tail;
@@ -325,21 +331,23 @@ static int xe_oa_append_reports(struct xe_oa_stream *stream, char __user *buf,
tail = stream->oa_buffer.tail;
spin_unlock_irqrestore(&stream->oa_buffer.ptr_lock, flags);
- xe_assert(stream->oa->xe, head < XE_OA_BUFFER_SIZE && tail < XE_OA_BUFFER_SIZE);
+ xe_assert(stream->oa->xe,
+ head < stream->oa_buffer.circ_size && tail < stream->oa_buffer.circ_size);
- for (; xe_oa_circ_diff(stream, tail, head); head = (head + report_size) & mask) {
+ for (; xe_oa_circ_diff(stream, tail, head);
+ head = xe_oa_circ_incr(stream, head, report_size)) {
u8 *report = oa_buf_base + head;
ret = xe_oa_append_report(stream, buf, count, offset, report);
if (ret)
break;
- if (is_power_of_2(report_size)) {
+ if (!(stream->oa_buffer.circ_size % report_size)) {
/* Clear out report id and timestamp to detect unlanded reports */
oa_report_id_clear(stream, (void *)report);
oa_timestamp_clear(stream, (void *)report);
} else {
- u8 *oa_buf_end = stream->oa_buffer.vaddr + XE_OA_BUFFER_SIZE;
+ u8 *oa_buf_end = stream->oa_buffer.vaddr + stream->oa_buffer.circ_size;
u32 part = oa_buf_end - report;
/* Zero out the entire report */
@@ -377,7 +385,6 @@ static void xe_oa_init_oa_buffer(struct xe_oa_stream *stream)
xe_mmio_write32(stream->gt, __oa_regs(stream)->oa_head_ptr,
gtt_offset & OAG_OAHEADPTR_MASK);
stream->oa_buffer.head = 0;
-
/*
* PRM says: "This MMIO must be set before the OATAILPTR register and after the
* OAHEADPTR register. This is to enable proper functionality of the overflow bit".
@@ -1300,6 +1307,18 @@ static int xe_oa_stream_init(struct xe_oa_stream *stream,
stream->periodic = param->period_exponent > 0;
stream->period_exponent = param->period_exponent;
+ /*
+ * For Xe2+, when overrun mode is enabled, there are no partial reports at the end
+ * of buffer, making the OA buffer effectively a non-power-of-2 size circular
+ * buffer whose size, circ_size, is a multiple of the report size
+ */
+ if (GRAPHICS_VER(stream->oa->xe) >= 20 &&
+ stream->hwe->oa_unit->type == DRM_XE_OA_UNIT_TYPE_OAG && stream->sample)
+ stream->oa_buffer.circ_size =
+ XE_OA_BUFFER_SIZE - XE_OA_BUFFER_SIZE % stream->oa_buffer.format->size;
+ else
+ stream->oa_buffer.circ_size = XE_OA_BUFFER_SIZE;
+
if (stream->exec_q && engine_supports_mi_query(stream->hwe)) {
/* If we don't find the context offset, just return error */
ret = xe_oa_set_ctx_ctrl_offset(stream);
diff --git a/drivers/gpu/drm/xe/xe_oa_types.h b/drivers/gpu/drm/xe/xe_oa_types.h
index 0981f0e57676..706d45577dae 100644
--- a/drivers/gpu/drm/xe/xe_oa_types.h
+++ b/drivers/gpu/drm/xe/xe_oa_types.h
@@ -170,6 +170,9 @@ struct xe_oa_buffer {
/** @tail: The last verified cached tail where HW has completed writing */
u32 tail;
+
+ /** @circ_size: The effective circular buffer size, for Xe2+ */
+ u32 circ_size;
};
/**
--
2.41.0
^ permalink raw reply related [flat|nested] 15+ messages in thread
* [PATCH v17 00/17] Add OA functionality to Xe
@ 2024-06-12 2:05 Ashutosh Dixit
2024-06-12 2:05 ` [PATCH 17/17] drm/xe/oa: Enable Xe2+ overrun mode Ashutosh Dixit
0 siblings, 1 reply; 15+ messages in thread
From: Ashutosh Dixit @ 2024-06-12 2:05 UTC (permalink / raw)
To: intel-xe
Please see cover letter for v7 here:
https://patchwork.freedesktop.org/series/121084/#rev7
For changes in later versions see changelog below.
This series is also available at:
https://gitlab.freedesktop.org/adixit/kernel/-/tree/xe-oa
The series has been tested against this IGT series:
https://gitlab.freedesktop.org/adixit/igt-gpu-tools/-/tree/xe-oa
or,
https://patchwork.freedesktop.org/series/130033/
Opensource consumers using uapi in this series:
gpuvis:
https://github.com/mikesart/gpuvis/pull/86 (merged)
mesa:
https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/29312
Test-with: 20240607200847.1964629-1-ashutosh.dixit@intel.com
v2: Fix build
v3: Rebase, due to s/xe_engine/xe_exec_queue/
v4: Re-run for testing
v5: Address review comments, new patches 11 through 17
v6: New patches 18 through 21
v7: Patches are completely redone and don't start with i915 version of the uapi
v8: See https://patchwork.freedesktop.org/patch/575214/?series=128993&rev=1
v9: See https://patchwork.freedesktop.org/patch/577441/?series=128993&rev=2
v10: See https://patchwork.freedesktop.org/patch/577943/?series=128993&rev=3
v11: See https://patchwork.freedesktop.org/patch/581239/?series=130705&rev=1
v12: Add last two new patches to enable Xe2+ overrun mode
v13: Update last two patches after code review completion
v14: https://patchwork.freedesktop.org/patch/595447/?series=134028&rev=1
v15: https://patchwork.freedesktop.org/patch/595549/?series=134056&rev=1
v16: https://patchwork.freedesktop.org/patch/597768/?series=134634&rev=1
v17: https://patchwork.freedesktop.org/patch/598236/?series=134742&rev=1
Ashutosh Dixit (17):
drm/xe/perf/uapi: "Perf" layer to support multiple perf counter stream
types
drm/xe/perf/uapi: Add perf_stream_paranoid sysctl
drm/xe/oa/uapi: Add OA data formats
drm/xe/oa/uapi: Initialize OA units
drm/xe/oa/uapi: Add/remove OA config perf ops
drm/xe/oa/uapi: Define and parse OA stream properties
drm/xe/oa: OA stream initialization (OAG)
drm/xe/oa/uapi: Expose OA stream fd
drm/xe/oa/uapi: Read file_operation
drm/xe/oa: Add OAR support
drm/xe/oa: Add OAC support
drm/xe/oa/uapi: Query OA unit properties
drm/xe/oa/uapi: OA buffer mmap
drm/xe/oa: Add MMIO trigger support
drm/xe/oa: Override GuC RC with OA on PVC
drm/xe/oa: Changes to OA_TAKEN
drm/xe/oa: Enable Xe2+ overrun mode
drivers/gpu/drm/xe/Makefile | 2 +
.../gpu/drm/xe/instructions/xe_mi_commands.h | 1 +
drivers/gpu/drm/xe/regs/xe_engine_regs.h | 2 +
drivers/gpu/drm/xe/regs/xe_gt_regs.h | 3 +
drivers/gpu/drm/xe/regs/xe_oa_regs.h | 100 +
drivers/gpu/drm/xe/xe_device.c | 17 +-
drivers/gpu/drm/xe/xe_device_types.h | 4 +
drivers/gpu/drm/xe/xe_gt_types.h | 4 +
drivers/gpu/drm/xe/xe_guc_pc.c | 57 +
drivers/gpu/drm/xe/xe_guc_pc.h | 3 +
drivers/gpu/drm/xe/xe_hw_engine_types.h | 2 +
drivers/gpu/drm/xe/xe_lrc.c | 11 +-
drivers/gpu/drm/xe/xe_lrc.h | 1 +
drivers/gpu/drm/xe/xe_module.c | 5 +
drivers/gpu/drm/xe/xe_oa.c | 2423 +++++++++++++++++
drivers/gpu/drm/xe/xe_oa.h | 27 +
drivers/gpu/drm/xe/xe_oa_types.h | 239 ++
drivers/gpu/drm/xe/xe_perf.c | 92 +
drivers/gpu/drm/xe/xe_perf.h | 20 +
drivers/gpu/drm/xe/xe_query.c | 77 +
drivers/gpu/drm/xe/xe_reg_whitelist.c | 24 +-
include/uapi/drm/xe_drm.h | 305 +++
22 files changed, 3412 insertions(+), 7 deletions(-)
create mode 100644 drivers/gpu/drm/xe/regs/xe_oa_regs.h
create mode 100644 drivers/gpu/drm/xe/xe_oa.c
create mode 100644 drivers/gpu/drm/xe/xe_oa.h
create mode 100644 drivers/gpu/drm/xe/xe_oa_types.h
create mode 100644 drivers/gpu/drm/xe/xe_perf.c
create mode 100644 drivers/gpu/drm/xe/xe_perf.h
--
2.41.0
^ permalink raw reply [flat|nested] 15+ messages in thread* [PATCH 17/17] drm/xe/oa: Enable Xe2+ overrun mode
2024-06-12 2:05 [PATCH v17 00/17] Add OA functionality to Xe Ashutosh Dixit
@ 2024-06-12 2:05 ` Ashutosh Dixit
0 siblings, 0 replies; 15+ messages in thread
From: Ashutosh Dixit @ 2024-06-12 2:05 UTC (permalink / raw)
To: intel-xe
Enable Xe2+ overrun mode. For Xe2+, when overrun mode is enabled, there are
no partial reports at the end of buffer, making the OA buffer effectively a
non-power-of-2 size circular buffer whose size, circ_size, is a multiple of
the report size.
v2: Fix implementation of xe_oa_circ_diff/xe_oa_circ_incr (Umesh)
Reviewed-by: Umesh Nerlige Ramappa <umesh.nerlige.ramappa@intel.com>
Signed-off-by: Ashutosh Dixit <ashutosh.dixit@intel.com>
---
drivers/gpu/drm/xe/xe_oa.c | 35 ++++++++++++++++++++++++--------
drivers/gpu/drm/xe/xe_oa_types.h | 3 +++
2 files changed, 30 insertions(+), 8 deletions(-)
diff --git a/drivers/gpu/drm/xe/xe_oa.c b/drivers/gpu/drm/xe/xe_oa.c
index 992170b2945d..a1dbece4b848 100644
--- a/drivers/gpu/drm/xe/xe_oa.c
+++ b/drivers/gpu/drm/xe/xe_oa.c
@@ -114,7 +114,14 @@ static const struct xe_oa_format oa_formats[] = {
static u32 xe_oa_circ_diff(struct xe_oa_stream *stream, u32 tail, u32 head)
{
- return (tail - head) & (XE_OA_BUFFER_SIZE - 1);
+ return tail >= head ? tail - head :
+ tail + stream->oa_buffer.circ_size - head;
+}
+
+static u32 xe_oa_circ_incr(struct xe_oa_stream *stream, u32 ptr, u32 n)
+{
+ return ptr + n >= stream->oa_buffer.circ_size ?
+ ptr + n - stream->oa_buffer.circ_size : ptr + n;
}
static void xe_oa_config_release(struct kref *ref)
@@ -288,7 +295,7 @@ static int xe_oa_append_report(struct xe_oa_stream *stream, char __user *buf,
buf += *offset;
- oa_buf_end = stream->oa_buffer.vaddr + XE_OA_BUFFER_SIZE;
+ oa_buf_end = stream->oa_buffer.vaddr + stream->oa_buffer.circ_size;
report_size_partial = oa_buf_end - report;
if (report_size_partial < report_size) {
@@ -314,7 +321,6 @@ static int xe_oa_append_reports(struct xe_oa_stream *stream, char __user *buf,
int report_size = stream->oa_buffer.format->size;
u8 *oa_buf_base = stream->oa_buffer.vaddr;
u32 gtt_offset = xe_bo_ggtt_addr(stream->oa_buffer.bo);
- u32 mask = (XE_OA_BUFFER_SIZE - 1);
size_t start_offset = *offset;
unsigned long flags;
u32 head, tail;
@@ -325,21 +331,23 @@ static int xe_oa_append_reports(struct xe_oa_stream *stream, char __user *buf,
tail = stream->oa_buffer.tail;
spin_unlock_irqrestore(&stream->oa_buffer.ptr_lock, flags);
- xe_assert(stream->oa->xe, head < XE_OA_BUFFER_SIZE && tail < XE_OA_BUFFER_SIZE);
+ xe_assert(stream->oa->xe,
+ head < stream->oa_buffer.circ_size && tail < stream->oa_buffer.circ_size);
- for (; xe_oa_circ_diff(stream, tail, head); head = (head + report_size) & mask) {
+ for (; xe_oa_circ_diff(stream, tail, head);
+ head = xe_oa_circ_incr(stream, head, report_size)) {
u8 *report = oa_buf_base + head;
ret = xe_oa_append_report(stream, buf, count, offset, report);
if (ret)
break;
- if (is_power_of_2(report_size)) {
+ if (!(stream->oa_buffer.circ_size % report_size)) {
/* Clear out report id and timestamp to detect unlanded reports */
oa_report_id_clear(stream, (void *)report);
oa_timestamp_clear(stream, (void *)report);
} else {
- u8 *oa_buf_end = stream->oa_buffer.vaddr + XE_OA_BUFFER_SIZE;
+ u8 *oa_buf_end = stream->oa_buffer.vaddr + stream->oa_buffer.circ_size;
u32 part = oa_buf_end - report;
/* Zero out the entire report */
@@ -377,7 +385,6 @@ static void xe_oa_init_oa_buffer(struct xe_oa_stream *stream)
xe_mmio_write32(stream->gt, __oa_regs(stream)->oa_head_ptr,
gtt_offset & OAG_OAHEADPTR_MASK);
stream->oa_buffer.head = 0;
-
/*
* PRM says: "This MMIO must be set before the OATAILPTR register and after the
* OAHEADPTR register. This is to enable proper functionality of the overflow bit".
@@ -1300,6 +1307,18 @@ static int xe_oa_stream_init(struct xe_oa_stream *stream,
stream->periodic = param->period_exponent > 0;
stream->period_exponent = param->period_exponent;
+ /*
+ * For Xe2+, when overrun mode is enabled, there are no partial reports at the end
+ * of buffer, making the OA buffer effectively a non-power-of-2 size circular
+ * buffer whose size, circ_size, is a multiple of the report size
+ */
+ if (GRAPHICS_VER(stream->oa->xe) >= 20 &&
+ stream->hwe->oa_unit->type == DRM_XE_OA_UNIT_TYPE_OAG && stream->sample)
+ stream->oa_buffer.circ_size =
+ XE_OA_BUFFER_SIZE - XE_OA_BUFFER_SIZE % stream->oa_buffer.format->size;
+ else
+ stream->oa_buffer.circ_size = XE_OA_BUFFER_SIZE;
+
if (stream->exec_q && engine_supports_mi_query(stream->hwe)) {
/* If we don't find the context offset, just return error */
ret = xe_oa_set_ctx_ctrl_offset(stream);
diff --git a/drivers/gpu/drm/xe/xe_oa_types.h b/drivers/gpu/drm/xe/xe_oa_types.h
index 0981f0e57676..706d45577dae 100644
--- a/drivers/gpu/drm/xe/xe_oa_types.h
+++ b/drivers/gpu/drm/xe/xe_oa_types.h
@@ -170,6 +170,9 @@ struct xe_oa_buffer {
/** @tail: The last verified cached tail where HW has completed writing */
u32 tail;
+
+ /** @circ_size: The effective circular buffer size, for Xe2+ */
+ u32 circ_size;
};
/**
--
2.41.0
^ permalink raw reply related [flat|nested] 15+ messages in thread
* [PATCH v16 00/17] Add OA functionality to Xe
@ 2024-06-07 20:43 Ashutosh Dixit
2024-06-07 20:43 ` [PATCH 17/17] drm/xe/oa: Enable Xe2+ overrun mode Ashutosh Dixit
0 siblings, 1 reply; 15+ messages in thread
From: Ashutosh Dixit @ 2024-06-07 20:43 UTC (permalink / raw)
To: intel-xe
Please see cover letter for v7 here:
https://patchwork.freedesktop.org/series/121084/#rev7
For changes in later versions see changelog below.
This series is also available at:
https://gitlab.freedesktop.org/adixit/kernel/-/tree/xe-oa
The series has been tested against this IGT series:
https://gitlab.freedesktop.org/adixit/igt-gpu-tools/-/tree/xe-oa
or,
https://patchwork.freedesktop.org/series/130033/
Opensource consumers using uapi in this series:
gpuvis:
https://github.com/mikesart/gpuvis/pull/86 (merged)
mesa:
https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/29312
Test-with: 20240607200847.1964629-1-ashutosh.dixit@intel.com
v2: Fix build
v3: Rebase, due to s/xe_engine/xe_exec_queue/
v4: Re-run for testing
v5: Address review comments, new patches 11 through 17
v6: New patches 18 through 21
v7: Patches are completely redone and don't start with i915 version of the uapi
v8: See https://patchwork.freedesktop.org/patch/575214/?series=128993&rev=1
v9: See https://patchwork.freedesktop.org/patch/577441/?series=128993&rev=2
v10: See https://patchwork.freedesktop.org/patch/577943/?series=128993&rev=3
v11: See https://patchwork.freedesktop.org/patch/581239/?series=130705&rev=1
v12: Add last two new patches to enable Xe2+ overrun mode
v13: Update last two patches after code review completion
v14: https://patchwork.freedesktop.org/patch/595447/?series=134028&rev=1
v15: https://patchwork.freedesktop.org/patch/595549/?series=134056&rev=1
v16: https://patchwork.freedesktop.org/patch/597768/?series=134634&rev=1
Ashutosh Dixit (17):
drm/xe/perf/uapi: "Perf" layer to support multiple perf counter stream
types
drm/xe/perf/uapi: Add perf_stream_paranoid sysctl
drm/xe/oa/uapi: Add OA data formats
drm/xe/oa/uapi: Initialize OA units
drm/xe/oa/uapi: Add/remove OA config perf ops
drm/xe/oa/uapi: Define and parse OA stream properties
drm/xe/oa: OA stream initialization (OAG)
drm/xe/oa/uapi: Expose OA stream fd
drm/xe/oa/uapi: Read file_operation
drm/xe/oa: Add OAR support
drm/xe/oa: Add OAC support
drm/xe/oa/uapi: Query OA unit properties
drm/xe/oa/uapi: OA buffer mmap
drm/xe/oa: Add MMIO trigger support
drm/xe/oa: Override GuC RC with OA on PVC
drm/xe/oa: Changes to OA_TAKEN
drm/xe/oa: Enable Xe2+ overrun mode
drivers/gpu/drm/xe/Makefile | 2 +
.../gpu/drm/xe/instructions/xe_mi_commands.h | 3 +
drivers/gpu/drm/xe/regs/xe_engine_regs.h | 2 +
drivers/gpu/drm/xe/regs/xe_gt_regs.h | 3 +
drivers/gpu/drm/xe/regs/xe_oa_regs.h | 103 +
drivers/gpu/drm/xe/xe_device.c | 19 +-
drivers/gpu/drm/xe/xe_device_types.h | 4 +
drivers/gpu/drm/xe/xe_gt_types.h | 4 +
drivers/gpu/drm/xe/xe_guc_pc.c | 57 +
drivers/gpu/drm/xe/xe_guc_pc.h | 3 +
drivers/gpu/drm/xe/xe_hw_engine_types.h | 2 +
drivers/gpu/drm/xe/xe_lrc.c | 11 +-
drivers/gpu/drm/xe/xe_lrc.h | 1 +
drivers/gpu/drm/xe/xe_module.c | 6 +
drivers/gpu/drm/xe/xe_oa.c | 2370 +++++++++++++++++
drivers/gpu/drm/xe/xe_oa.h | 27 +
drivers/gpu/drm/xe/xe_oa_types.h | 232 ++
drivers/gpu/drm/xe/xe_perf.c | 67 +
drivers/gpu/drm/xe/xe_perf.h | 20 +
drivers/gpu/drm/xe/xe_query.c | 77 +
drivers/gpu/drm/xe/xe_reg_whitelist.c | 24 +-
include/uapi/drm/xe_drm.h | 291 ++
22 files changed, 3321 insertions(+), 7 deletions(-)
create mode 100644 drivers/gpu/drm/xe/regs/xe_oa_regs.h
create mode 100644 drivers/gpu/drm/xe/xe_oa.c
create mode 100644 drivers/gpu/drm/xe/xe_oa.h
create mode 100644 drivers/gpu/drm/xe/xe_oa_types.h
create mode 100644 drivers/gpu/drm/xe/xe_perf.c
create mode 100644 drivers/gpu/drm/xe/xe_perf.h
--
2.41.0
^ permalink raw reply [flat|nested] 15+ messages in thread* [PATCH 17/17] drm/xe/oa: Enable Xe2+ overrun mode
2024-06-07 20:43 [PATCH v16 00/17] Add OA functionality to Xe Ashutosh Dixit
@ 2024-06-07 20:43 ` Ashutosh Dixit
0 siblings, 0 replies; 15+ messages in thread
From: Ashutosh Dixit @ 2024-06-07 20:43 UTC (permalink / raw)
To: intel-xe
Enable Xe2+ overrun mode. For Xe2+, when overrun mode is enabled, there are
no partial reports at the end of buffer, making the OA buffer effectively a
non-power-of-2 size circular buffer whose size, circ_size, is a multiple of
the report size.
v2: Fix implementation of xe_oa_circ_diff/xe_oa_circ_incr (Umesh)
Reviewed-by: Umesh Nerlige Ramappa <umesh.nerlige.ramappa@intel.com>
Signed-off-by: Ashutosh Dixit <ashutosh.dixit@intel.com>
---
drivers/gpu/drm/xe/xe_oa.c | 35 ++++++++++++++++++++++++--------
drivers/gpu/drm/xe/xe_oa_types.h | 3 +++
2 files changed, 30 insertions(+), 8 deletions(-)
diff --git a/drivers/gpu/drm/xe/xe_oa.c b/drivers/gpu/drm/xe/xe_oa.c
index f4531e2d4de6..d0a46485571a 100644
--- a/drivers/gpu/drm/xe/xe_oa.c
+++ b/drivers/gpu/drm/xe/xe_oa.c
@@ -111,7 +111,14 @@ static const struct xe_oa_format oa_formats[] = {
static u32 xe_oa_circ_diff(struct xe_oa_stream *stream, u32 tail, u32 head)
{
- return (tail - head) & (XE_OA_BUFFER_SIZE - 1);
+ return tail >= head ? tail - head :
+ tail + stream->oa_buffer.circ_size - head;
+}
+
+static u32 xe_oa_circ_incr(struct xe_oa_stream *stream, u32 ptr, u32 n)
+{
+ return ptr + n >= stream->oa_buffer.circ_size ?
+ ptr + n - stream->oa_buffer.circ_size : ptr + n;
}
static void xe_oa_config_release(struct kref *ref)
@@ -285,7 +292,7 @@ static int xe_oa_append_report(struct xe_oa_stream *stream, char __user *buf,
buf += *offset;
- oa_buf_end = stream->oa_buffer.vaddr + XE_OA_BUFFER_SIZE;
+ oa_buf_end = stream->oa_buffer.vaddr + stream->oa_buffer.circ_size;
report_size_partial = oa_buf_end - report;
if (report_size_partial < report_size) {
@@ -311,7 +318,6 @@ static int xe_oa_append_reports(struct xe_oa_stream *stream, char __user *buf,
int report_size = stream->oa_buffer.format->size;
u8 *oa_buf_base = stream->oa_buffer.vaddr;
u32 gtt_offset = xe_bo_ggtt_addr(stream->oa_buffer.bo);
- u32 mask = (XE_OA_BUFFER_SIZE - 1);
size_t start_offset = *offset;
unsigned long flags;
u32 head, tail;
@@ -322,21 +328,23 @@ static int xe_oa_append_reports(struct xe_oa_stream *stream, char __user *buf,
tail = stream->oa_buffer.tail;
spin_unlock_irqrestore(&stream->oa_buffer.ptr_lock, flags);
- xe_assert(stream->oa->xe, head < XE_OA_BUFFER_SIZE && tail < XE_OA_BUFFER_SIZE);
+ xe_assert(stream->oa->xe,
+ head < stream->oa_buffer.circ_size && tail < stream->oa_buffer.circ_size);
- for (; xe_oa_circ_diff(stream, tail, head); head = (head + report_size) & mask) {
+ for (; xe_oa_circ_diff(stream, tail, head);
+ head = xe_oa_circ_incr(stream, head, report_size)) {
u8 *report = oa_buf_base + head;
ret = xe_oa_append_report(stream, buf, count, offset, report);
if (ret)
break;
- if (is_power_of_2(report_size)) {
+ if (!(stream->oa_buffer.circ_size % report_size)) {
/* Clear out report id and timestamp to detect unlanded reports */
oa_report_id_clear(stream, (void *)report);
oa_timestamp_clear(stream, (void *)report);
} else {
- u8 *oa_buf_end = stream->oa_buffer.vaddr + XE_OA_BUFFER_SIZE;
+ u8 *oa_buf_end = stream->oa_buffer.vaddr + stream->oa_buffer.circ_size;
u32 part = oa_buf_end - report;
/* Zero out the entire report */
@@ -374,7 +382,6 @@ static void xe_oa_init_oa_buffer(struct xe_oa_stream *stream)
xe_mmio_write32(stream->gt, __oa_regs(stream)->oa_head_ptr,
gtt_offset & OAG_OAHEADPTR_MASK);
stream->oa_buffer.head = 0;
-
/*
* PRM says: "This MMIO must be set before the OATAILPTR register and after the
* OAHEADPTR register. This is to enable proper functionality of the overflow bit".
@@ -1294,6 +1301,18 @@ static int xe_oa_stream_init(struct xe_oa_stream *stream,
stream->periodic = param->period_exponent > 0;
stream->period_exponent = param->period_exponent;
+ /*
+ * For Xe2+, when overrun mode is enabled, there are no partial reports at the end
+ * of buffer, making the OA buffer effectively a non-power-of-2 size circular
+ * buffer whose size, circ_size, is a multiple of the report size
+ */
+ if (GRAPHICS_VER(stream->oa->xe) >= 20 &&
+ stream->hwe->oa_unit->type == DRM_XE_OA_UNIT_TYPE_OAG && stream->sample)
+ stream->oa_buffer.circ_size =
+ XE_OA_BUFFER_SIZE - XE_OA_BUFFER_SIZE % stream->oa_buffer.format->size;
+ else
+ stream->oa_buffer.circ_size = XE_OA_BUFFER_SIZE;
+
if (stream->exec_q && engine_supports_mi_query(stream->hwe)) {
/* If we don't find the context offset, just return error */
ret = xe_oa_set_ctx_ctrl_offset(stream);
diff --git a/drivers/gpu/drm/xe/xe_oa_types.h b/drivers/gpu/drm/xe/xe_oa_types.h
index 7775fe91616f..c62811482934 100644
--- a/drivers/gpu/drm/xe/xe_oa_types.h
+++ b/drivers/gpu/drm/xe/xe_oa_types.h
@@ -163,6 +163,9 @@ struct xe_oa_buffer {
/** @tail: The last verified cached tail where HW has completed writing */
u32 tail;
+
+ /** @circ_size: The effective circular buffer size, for Xe2+ */
+ u32 circ_size;
};
/**
--
2.41.0
^ permalink raw reply related [flat|nested] 15+ messages in thread
* [PATCH v15 00/17] Add OA functionality to Xe
@ 2024-05-27 1:43 Ashutosh Dixit
2024-05-27 1:43 ` [PATCH 17/17] drm/xe/oa: Enable Xe2+ overrun mode Ashutosh Dixit
0 siblings, 1 reply; 15+ messages in thread
From: Ashutosh Dixit @ 2024-05-27 1:43 UTC (permalink / raw)
To: intel-xe
Please see cover letter for v7 here:
https://patchwork.freedesktop.org/series/121084/#rev7
For changes in later versions see changelog below.
This series is also available at:
https://gitlab.freedesktop.org/adixit/kernel/-/tree/xe-oa
The series has been tested against this IGT series:
https://gitlab.freedesktop.org/adixit/igt-gpu-tools/-/tree/xe-oa
or,
https://patchwork.freedesktop.org/series/130033/
Opensource consumers using uapi in this series:
gpuvis:
https://github.com/mikesart/gpuvis/pull/86
mesa:
https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/29312
Test-with: 20240524031439.106332-1-ashutosh.dixit@intel.com
v2: Fix build
v3: Rebase, due to s/xe_engine/xe_exec_queue/
v4: Re-run for testing
v5: Address review comments, new patches 11 through 17
v6: New patches 18 through 21
v7: Patches are completely redone and don't start with i915 version of the uapi
v8: See https://patchwork.freedesktop.org/patch/575214/?series=128993&rev=1
v9: See https://patchwork.freedesktop.org/patch/577441/?series=128993&rev=2
v10: See https://patchwork.freedesktop.org/patch/577943/?series=128993&rev=3
v11: See https://patchwork.freedesktop.org/patch/581239/?series=130705&rev=1
v12: Add last two new patches to enable Xe2+ overrun mode
v13: Update last two patches after code review completion
v14: https://patchwork.freedesktop.org/patch/595447/?series=134028&rev=1
v15: https://patchwork.freedesktop.org/patch/595549/?series=134056&rev=1
Ashutosh Dixit (17):
drm/xe/perf/uapi: "Perf" layer to support multiple perf counter stream
types
drm/xe/perf/uapi: Add perf_stream_paranoid sysctl
drm/xe/oa/uapi: Add OA data formats
drm/xe/oa/uapi: Initialize OA units
drm/xe/oa/uapi: Add/remove OA config perf ops
drm/xe/oa/uapi: Define and parse OA stream properties
drm/xe/oa: OA stream initialization (OAG)
drm/xe/oa/uapi: Expose OA stream fd
drm/xe/oa/uapi: Read file_operation
drm/xe/oa: Add OAR support
drm/xe/oa: Add OAC support
drm/xe/oa/uapi: Query OA unit properties
drm/xe/oa/uapi: OA buffer mmap
drm/xe/oa: Add MMIO trigger support
drm/xe/oa: Override GuC RC with OA on PVC
drm/xe/oa: Changes to OA_TAKEN
drm/xe/oa: Enable Xe2+ overrun mode
drivers/gpu/drm/xe/Makefile | 2 +
.../gpu/drm/xe/instructions/xe_mi_commands.h | 3 +
drivers/gpu/drm/xe/regs/xe_engine_regs.h | 2 +
drivers/gpu/drm/xe/regs/xe_gt_regs.h | 3 +
drivers/gpu/drm/xe/regs/xe_oa_regs.h | 103 +
drivers/gpu/drm/xe/xe_device.c | 19 +-
drivers/gpu/drm/xe/xe_device_types.h | 4 +
drivers/gpu/drm/xe/xe_gt_types.h | 4 +
drivers/gpu/drm/xe/xe_guc_pc.c | 57 +
drivers/gpu/drm/xe/xe_guc_pc.h | 3 +
drivers/gpu/drm/xe/xe_hw_engine_types.h | 2 +
drivers/gpu/drm/xe/xe_lrc.c | 11 +-
drivers/gpu/drm/xe/xe_lrc.h | 1 +
drivers/gpu/drm/xe/xe_module.c | 6 +
drivers/gpu/drm/xe/xe_oa.c | 2370 +++++++++++++++++
drivers/gpu/drm/xe/xe_oa.h | 27 +
drivers/gpu/drm/xe/xe_oa_types.h | 232 ++
drivers/gpu/drm/xe/xe_perf.c | 67 +
drivers/gpu/drm/xe/xe_perf.h | 20 +
drivers/gpu/drm/xe/xe_query.c | 77 +
drivers/gpu/drm/xe/xe_reg_whitelist.c | 24 +-
include/uapi/drm/xe_drm.h | 291 ++
22 files changed, 3321 insertions(+), 7 deletions(-)
create mode 100644 drivers/gpu/drm/xe/regs/xe_oa_regs.h
create mode 100644 drivers/gpu/drm/xe/xe_oa.c
create mode 100644 drivers/gpu/drm/xe/xe_oa.h
create mode 100644 drivers/gpu/drm/xe/xe_oa_types.h
create mode 100644 drivers/gpu/drm/xe/xe_perf.c
create mode 100644 drivers/gpu/drm/xe/xe_perf.h
--
2.41.0
^ permalink raw reply [flat|nested] 15+ messages in thread* [PATCH 17/17] drm/xe/oa: Enable Xe2+ overrun mode
2024-05-27 1:43 [PATCH v15 00/17] Add OA functionality to Xe Ashutosh Dixit
@ 2024-05-27 1:43 ` Ashutosh Dixit
0 siblings, 0 replies; 15+ messages in thread
From: Ashutosh Dixit @ 2024-05-27 1:43 UTC (permalink / raw)
To: intel-xe
Enable Xe2+ overrun mode. For Xe2+, when overrun mode is enabled, there are
no partial reports at the end of buffer, making the OA buffer effectively a
non-power-of-2 size circular buffer whose size, circ_size, is a multiple of
the report size.
v2: Fix implementation of xe_oa_circ_diff/xe_oa_circ_incr (Umesh)
Reviewed-by: Umesh Nerlige Ramappa <umesh.nerlige.ramappa@intel.com>
Signed-off-by: Ashutosh Dixit <ashutosh.dixit@intel.com>
---
drivers/gpu/drm/xe/xe_oa.c | 35 ++++++++++++++++++++++++--------
drivers/gpu/drm/xe/xe_oa_types.h | 3 +++
2 files changed, 30 insertions(+), 8 deletions(-)
diff --git a/drivers/gpu/drm/xe/xe_oa.c b/drivers/gpu/drm/xe/xe_oa.c
index 5ef4c4abea32..3ed4ca19f9cd 100644
--- a/drivers/gpu/drm/xe/xe_oa.c
+++ b/drivers/gpu/drm/xe/xe_oa.c
@@ -111,7 +111,14 @@ static const struct xe_oa_format oa_formats[] = {
static u32 xe_oa_circ_diff(struct xe_oa_stream *stream, u32 tail, u32 head)
{
- return (tail - head) & (XE_OA_BUFFER_SIZE - 1);
+ return tail >= head ? tail - head :
+ tail + stream->oa_buffer.circ_size - head;
+}
+
+static u32 xe_oa_circ_incr(struct xe_oa_stream *stream, u32 ptr, u32 n)
+{
+ return ptr + n >= stream->oa_buffer.circ_size ?
+ ptr + n - stream->oa_buffer.circ_size : ptr + n;
}
static void xe_oa_config_release(struct kref *ref)
@@ -285,7 +292,7 @@ static int xe_oa_append_report(struct xe_oa_stream *stream, char __user *buf,
buf += *offset;
- oa_buf_end = stream->oa_buffer.vaddr + XE_OA_BUFFER_SIZE;
+ oa_buf_end = stream->oa_buffer.vaddr + stream->oa_buffer.circ_size;
report_size_partial = oa_buf_end - report;
if (report_size_partial < report_size) {
@@ -311,7 +318,6 @@ static int xe_oa_append_reports(struct xe_oa_stream *stream, char __user *buf,
int report_size = stream->oa_buffer.format->size;
u8 *oa_buf_base = stream->oa_buffer.vaddr;
u32 gtt_offset = xe_bo_ggtt_addr(stream->oa_buffer.bo);
- u32 mask = (XE_OA_BUFFER_SIZE - 1);
size_t start_offset = *offset;
unsigned long flags;
u32 head, tail;
@@ -322,21 +328,23 @@ static int xe_oa_append_reports(struct xe_oa_stream *stream, char __user *buf,
tail = stream->oa_buffer.tail;
spin_unlock_irqrestore(&stream->oa_buffer.ptr_lock, flags);
- xe_assert(stream->oa->xe, head < XE_OA_BUFFER_SIZE && tail < XE_OA_BUFFER_SIZE);
+ xe_assert(stream->oa->xe,
+ head < stream->oa_buffer.circ_size && tail < stream->oa_buffer.circ_size);
- for (; xe_oa_circ_diff(stream, tail, head); head = (head + report_size) & mask) {
+ for (; xe_oa_circ_diff(stream, tail, head);
+ head = xe_oa_circ_incr(stream, head, report_size)) {
u8 *report = oa_buf_base + head;
ret = xe_oa_append_report(stream, buf, count, offset, report);
if (ret)
break;
- if (is_power_of_2(report_size)) {
+ if (!(stream->oa_buffer.circ_size % report_size)) {
/* Clear out report id and timestamp to detect unlanded reports */
oa_report_id_clear(stream, (void *)report);
oa_timestamp_clear(stream, (void *)report);
} else {
- u8 *oa_buf_end = stream->oa_buffer.vaddr + XE_OA_BUFFER_SIZE;
+ u8 *oa_buf_end = stream->oa_buffer.vaddr + stream->oa_buffer.circ_size;
u32 part = oa_buf_end - report;
/* Zero out the entire report */
@@ -374,7 +382,6 @@ static void xe_oa_init_oa_buffer(struct xe_oa_stream *stream)
xe_mmio_write32(stream->gt, __oa_regs(stream)->oa_head_ptr,
gtt_offset & OAG_OAHEADPTR_MASK);
stream->oa_buffer.head = 0;
-
/*
* PRM says: "This MMIO must be set before the OATAILPTR register and after the
* OAHEADPTR register. This is to enable proper functionality of the overflow bit".
@@ -1294,6 +1301,18 @@ static int xe_oa_stream_init(struct xe_oa_stream *stream,
stream->periodic = param->period_exponent > 0;
stream->period_exponent = param->period_exponent;
+ /*
+ * For Xe2+, when overrun mode is enabled, there are no partial reports at the end
+ * of buffer, making the OA buffer effectively a non-power-of-2 size circular
+ * buffer whose size, circ_size, is a multiple of the report size
+ */
+ if (GRAPHICS_VER(stream->oa->xe) >= 20 &&
+ stream->hwe->oa_unit->type == DRM_XE_OA_UNIT_TYPE_OAG && stream->sample)
+ stream->oa_buffer.circ_size =
+ XE_OA_BUFFER_SIZE - XE_OA_BUFFER_SIZE % stream->oa_buffer.format->size;
+ else
+ stream->oa_buffer.circ_size = XE_OA_BUFFER_SIZE;
+
if (stream->exec_q && engine_supports_mi_query(stream->hwe)) {
/* If we don't find the context offset, just return error */
ret = xe_oa_set_ctx_ctrl_offset(stream);
diff --git a/drivers/gpu/drm/xe/xe_oa_types.h b/drivers/gpu/drm/xe/xe_oa_types.h
index 7775fe91616f..c62811482934 100644
--- a/drivers/gpu/drm/xe/xe_oa_types.h
+++ b/drivers/gpu/drm/xe/xe_oa_types.h
@@ -163,6 +163,9 @@ struct xe_oa_buffer {
/** @tail: The last verified cached tail where HW has completed writing */
u32 tail;
+
+ /** @circ_size: The effective circular buffer size, for Xe2+ */
+ u32 circ_size;
};
/**
--
2.41.0
^ permalink raw reply related [flat|nested] 15+ messages in thread
* [PATCH v14 00/17] Add OA functionality to Xe
@ 2024-05-24 19:01 Ashutosh Dixit
2024-05-24 19:01 ` [PATCH 17/17] drm/xe/oa: Enable Xe2+ overrun mode Ashutosh Dixit
0 siblings, 1 reply; 15+ messages in thread
From: Ashutosh Dixit @ 2024-05-24 19:01 UTC (permalink / raw)
To: intel-xe
Please see cover letter for v7 here:
https://patchwork.freedesktop.org/series/121084/#rev7
For changes in later versions see changelog below.
This series is also available at:
https://gitlab.freedesktop.org/adixit/kernel/-/tree/xe-oa
The series has been tested against this IGT series:
https://gitlab.freedesktop.org/adixit/igt-gpu-tools/-/tree/xe-oa
or,
https://patchwork.freedesktop.org/series/130033/
Opensource consumers using uapi in this series:
gpuvis:
https://github.com/mikesart/gpuvis/pull/86
mesa:
https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/29312
Test-with: 20240524031439.106332-1-ashutosh.dixit@intel.com
v2: Fix build
v3: Rebase, due to s/xe_engine/xe_exec_queue/
v4: Re-run for testing
v5: Address review comments, new patches 11 through 17
v6: New patches 18 through 21
v7: Patches are completely redone and don't start with i915 version of the uapi
v8: See https://patchwork.freedesktop.org/patch/575214/?series=128993&rev=1
v9: See https://patchwork.freedesktop.org/patch/577441/?series=128993&rev=2
v10: See https://patchwork.freedesktop.org/patch/577943/?series=128993&rev=3
v11: See https://patchwork.freedesktop.org/patch/581239/?series=130705&rev=1
v12: Add last two new patches to enable Xe2+ overrun mode
v13: Update last two patches after code review completion
v14: https://patchwork.freedesktop.org/patch/595447/?series=134028&rev=1
Ashutosh Dixit (17):
drm/xe/perf/uapi: "Perf" layer to support multiple perf counter stream
types
drm/xe/perf/uapi: Add perf_stream_paranoid sysctl
drm/xe/oa/uapi: Add OA data formats
drm/xe/oa/uapi: Initialize OA units
drm/xe/oa/uapi: Add/remove OA config perf ops
drm/xe/oa/uapi: Define and parse OA stream properties
drm/xe/oa: OA stream initialization (OAG)
drm/xe/oa/uapi: Expose OA stream fd
drm/xe/oa/uapi: Read file_operation
drm/xe/oa: Add OAR support
drm/xe/oa: Add OAC support
drm/xe/oa/uapi: Query OA unit properties
drm/xe/oa/uapi: OA buffer mmap
drm/xe/oa: Add MMIO trigger support
drm/xe/oa: Override GuC RC with OA on PVC
drm/xe/oa: Changes to OA_TAKEN
drm/xe/oa: Enable Xe2+ overrun mode
drivers/gpu/drm/xe/Makefile | 2 +
.../gpu/drm/xe/instructions/xe_mi_commands.h | 3 +
drivers/gpu/drm/xe/regs/xe_engine_regs.h | 2 +
drivers/gpu/drm/xe/regs/xe_gt_regs.h | 3 +
drivers/gpu/drm/xe/regs/xe_oa_regs.h | 103 +
drivers/gpu/drm/xe/xe_device.c | 19 +-
drivers/gpu/drm/xe/xe_device_types.h | 4 +
drivers/gpu/drm/xe/xe_gt_types.h | 4 +
drivers/gpu/drm/xe/xe_guc_pc.c | 57 +
drivers/gpu/drm/xe/xe_guc_pc.h | 3 +
drivers/gpu/drm/xe/xe_hw_engine_types.h | 2 +
drivers/gpu/drm/xe/xe_lrc.c | 11 +-
drivers/gpu/drm/xe/xe_lrc.h | 1 +
drivers/gpu/drm/xe/xe_module.c | 6 +
drivers/gpu/drm/xe/xe_oa.c | 2372 +++++++++++++++++
drivers/gpu/drm/xe/xe_oa.h | 30 +
drivers/gpu/drm/xe/xe_oa_types.h | 232 ++
drivers/gpu/drm/xe/xe_perf.c | 67 +
drivers/gpu/drm/xe/xe_perf.h | 20 +
drivers/gpu/drm/xe/xe_query.c | 77 +
drivers/gpu/drm/xe/xe_reg_whitelist.c | 24 +-
include/uapi/drm/xe_drm.h | 291 ++
22 files changed, 3326 insertions(+), 7 deletions(-)
create mode 100644 drivers/gpu/drm/xe/regs/xe_oa_regs.h
create mode 100644 drivers/gpu/drm/xe/xe_oa.c
create mode 100644 drivers/gpu/drm/xe/xe_oa.h
create mode 100644 drivers/gpu/drm/xe/xe_oa_types.h
create mode 100644 drivers/gpu/drm/xe/xe_perf.c
create mode 100644 drivers/gpu/drm/xe/xe_perf.h
--
2.41.0
^ permalink raw reply [flat|nested] 15+ messages in thread* [PATCH 17/17] drm/xe/oa: Enable Xe2+ overrun mode
2024-05-24 19:01 [PATCH v14 00/17] Add OA functionality to Xe Ashutosh Dixit
@ 2024-05-24 19:01 ` Ashutosh Dixit
0 siblings, 0 replies; 15+ messages in thread
From: Ashutosh Dixit @ 2024-05-24 19:01 UTC (permalink / raw)
To: intel-xe
Enable Xe2+ overrun mode. For Xe2+, when overrun mode is enabled, there are
no partial reports at the end of buffer, making the OA buffer effectively a
non-power-of-2 size circular buffer whose size, circ_size, is a multiple of
the report size.
v2: Fix implementation of xe_oa_circ_diff/xe_oa_circ_incr (Umesh)
Reviewed-by: Umesh Nerlige Ramappa <umesh.nerlige.ramappa@intel.com>
Signed-off-by: Ashutosh Dixit <ashutosh.dixit@intel.com>
---
drivers/gpu/drm/xe/xe_oa.c | 35 ++++++++++++++++++++++++--------
drivers/gpu/drm/xe/xe_oa_types.h | 3 +++
2 files changed, 30 insertions(+), 8 deletions(-)
diff --git a/drivers/gpu/drm/xe/xe_oa.c b/drivers/gpu/drm/xe/xe_oa.c
index 5714ea4d5f12..38e6364a60b1 100644
--- a/drivers/gpu/drm/xe/xe_oa.c
+++ b/drivers/gpu/drm/xe/xe_oa.c
@@ -111,7 +111,14 @@ static const struct xe_oa_format oa_formats[] = {
static u32 xe_oa_circ_diff(struct xe_oa_stream *stream, u32 tail, u32 head)
{
- return (tail - head) & (XE_OA_BUFFER_SIZE - 1);
+ return tail >= head ? tail - head :
+ tail + stream->oa_buffer.circ_size - head;
+}
+
+static u32 xe_oa_circ_incr(struct xe_oa_stream *stream, u32 ptr, u32 n)
+{
+ return ptr + n >= stream->oa_buffer.circ_size ?
+ ptr + n - stream->oa_buffer.circ_size : ptr + n;
}
static void xe_oa_config_release(struct kref *ref)
@@ -285,7 +292,7 @@ static int xe_oa_append_report(struct xe_oa_stream *stream, char __user *buf,
buf += *offset;
- oa_buf_end = stream->oa_buffer.vaddr + XE_OA_BUFFER_SIZE;
+ oa_buf_end = stream->oa_buffer.vaddr + stream->oa_buffer.circ_size;
report_size_partial = oa_buf_end - report;
if (report_size_partial < report_size) {
@@ -311,7 +318,6 @@ static int xe_oa_append_reports(struct xe_oa_stream *stream, char __user *buf,
int report_size = stream->oa_buffer.format->size;
u8 *oa_buf_base = stream->oa_buffer.vaddr;
u32 gtt_offset = xe_bo_ggtt_addr(stream->oa_buffer.bo);
- u32 mask = (XE_OA_BUFFER_SIZE - 1);
size_t start_offset = *offset;
unsigned long flags;
u32 head, tail;
@@ -322,21 +328,23 @@ static int xe_oa_append_reports(struct xe_oa_stream *stream, char __user *buf,
tail = stream->oa_buffer.tail;
spin_unlock_irqrestore(&stream->oa_buffer.ptr_lock, flags);
- xe_assert(stream->oa->xe, head < XE_OA_BUFFER_SIZE && tail < XE_OA_BUFFER_SIZE);
+ xe_assert(stream->oa->xe,
+ head < stream->oa_buffer.circ_size && tail < stream->oa_buffer.circ_size);
- for (; xe_oa_circ_diff(stream, tail, head); head = (head + report_size) & mask) {
+ for (; xe_oa_circ_diff(stream, tail, head);
+ head = xe_oa_circ_incr(stream, head, report_size)) {
u8 *report = oa_buf_base + head;
ret = xe_oa_append_report(stream, buf, count, offset, report);
if (ret)
break;
- if (is_power_of_2(report_size)) {
+ if (!(stream->oa_buffer.circ_size % report_size)) {
/* Clear out report id and timestamp to detect unlanded reports */
oa_report_id_clear(stream, (void *)report);
oa_timestamp_clear(stream, (void *)report);
} else {
- u8 *oa_buf_end = stream->oa_buffer.vaddr + XE_OA_BUFFER_SIZE;
+ u8 *oa_buf_end = stream->oa_buffer.vaddr + stream->oa_buffer.circ_size;
u32 part = oa_buf_end - report;
/* Zero out the entire report */
@@ -374,7 +382,6 @@ static void xe_oa_init_oa_buffer(struct xe_oa_stream *stream)
xe_mmio_write32(stream->gt, __oa_regs(stream)->oa_head_ptr,
gtt_offset & OAG_OAHEADPTR_MASK);
stream->oa_buffer.head = 0;
-
/*
* PRM says: "This MMIO must be set before the OATAILPTR register and after the
* OAHEADPTR register. This is to enable proper functionality of the overflow bit".
@@ -1294,6 +1301,18 @@ static int xe_oa_stream_init(struct xe_oa_stream *stream,
stream->periodic = param->period_exponent > 0;
stream->period_exponent = param->period_exponent;
+ /*
+ * For Xe2+, when overrun mode is enabled, there are no partial reports at the end
+ * of buffer, making the OA buffer effectively a non-power-of-2 size circular
+ * buffer whose size, circ_size, is a multiple of the report size
+ */
+ if (GRAPHICS_VER(stream->oa->xe) >= 20 &&
+ stream->hwe->oa_unit->type == DRM_XE_OA_UNIT_TYPE_OAG && stream->sample)
+ stream->oa_buffer.circ_size =
+ XE_OA_BUFFER_SIZE - XE_OA_BUFFER_SIZE % stream->oa_buffer.format->size;
+ else
+ stream->oa_buffer.circ_size = XE_OA_BUFFER_SIZE;
+
if (stream->exec_q && engine_supports_mi_query(stream->hwe)) {
/* If we don't find the context offset, just return error */
ret = xe_oa_set_ctx_ctrl_offset(stream);
diff --git a/drivers/gpu/drm/xe/xe_oa_types.h b/drivers/gpu/drm/xe/xe_oa_types.h
index 7775fe91616f..c62811482934 100644
--- a/drivers/gpu/drm/xe/xe_oa_types.h
+++ b/drivers/gpu/drm/xe/xe_oa_types.h
@@ -163,6 +163,9 @@ struct xe_oa_buffer {
/** @tail: The last verified cached tail where HW has completed writing */
u32 tail;
+
+ /** @circ_size: The effective circular buffer size, for Xe2+ */
+ u32 circ_size;
};
/**
--
2.41.0
^ permalink raw reply related [flat|nested] 15+ messages in thread
* [PATCH 00/17] Add OA functionality to Xe
@ 2024-03-15 1:35 Ashutosh Dixit
2024-03-15 1:35 ` [PATCH 17/17] drm/xe/oa: Enable Xe2+ overrun mode Ashutosh Dixit
0 siblings, 1 reply; 15+ messages in thread
From: Ashutosh Dixit @ 2024-03-15 1:35 UTC (permalink / raw)
To: intel-xe
Please see cover letter for v7 here:
https://patchwork.freedesktop.org/series/121084/#rev7
For changes in v8 through v10, see:
https://patchwork.freedesktop.org/series/128993/
For changes in v11, see:
https://patchwork.freedesktop.org/series/130705/
This series is also available at:
https://gitlab.freedesktop.org/adixit/kernel/-/tree/xe-oa
The series has been tested against this IGT series:
https://gitlab.freedesktop.org/adixit/igt-gpu-tools/-/tree/xe-oa, or,
https://patchwork.freedesktop.org/series/130033/
v2: Fix build
v3: Rebase, due to s/xe_engine/xe_exec_queue/
v4: Re-run for testing
v5: Address review comments, new patches 11 through 17
v6: New patches 18 through 21
v7: Patches are completely redone and don't start with i915 version of the uapi
v8: See https://patchwork.freedesktop.org/patch/575214/?series=128993&rev=1
v9: See https://patchwork.freedesktop.org/patch/577441/?series=128993&rev=2
v10: See https://patchwork.freedesktop.org/patch/577943/?series=128993&rev=3
v11: See https://patchwork.freedesktop.org/patch/581239/?series=130705&rev=1
v12: Add last two new patches to enable Xe2+ overrun mode
v13: Update last two patches after code review completion
Ashutosh Dixit (17):
drm/xe/perf/uapi: "Perf" layer to support multiple perf counter stream
types
drm/xe/perf/uapi: Add perf_stream_paranoid sysctl
drm/xe/oa/uapi: Add OA data formats
drm/xe/oa/uapi: Initialize OA units
drm/xe/oa/uapi: Add/remove OA config perf ops
drm/xe/oa/uapi: Define and parse OA stream properties
drm/xe/oa: OA stream initialization (OAG)
drm/xe/oa/uapi: Expose OA stream fd
drm/xe/oa/uapi: Read file_operation
drm/xe/oa: Add OAR support
drm/xe/oa: Add OAC support
drm/xe/oa/uapi: Query OA unit properties
drm/xe/oa/uapi: OA buffer mmap
drm/xe/oa: Add MMIO trigger support
drm/xe/oa: Override GuC RC with OA on PVC
drm/xe/oa: Changes to OA_TAKEN
drm/xe/oa: Enable Xe2+ overrun mode
drivers/gpu/drm/xe/Makefile | 2 +
.../gpu/drm/xe/instructions/xe_mi_commands.h | 3 +
drivers/gpu/drm/xe/regs/xe_engine_regs.h | 4 +-
drivers/gpu/drm/xe/regs/xe_gt_regs.h | 3 +
drivers/gpu/drm/xe/regs/xe_oa_regs.h | 99 +
drivers/gpu/drm/xe/xe_device.c | 18 +-
drivers/gpu/drm/xe/xe_device_types.h | 4 +
drivers/gpu/drm/xe/xe_gt_types.h | 4 +
drivers/gpu/drm/xe/xe_guc_pc.c | 56 +
drivers/gpu/drm/xe/xe_guc_pc.h | 3 +
drivers/gpu/drm/xe/xe_hw_engine_types.h | 2 +
drivers/gpu/drm/xe/xe_lrc.c | 11 +-
drivers/gpu/drm/xe/xe_lrc.h | 1 +
drivers/gpu/drm/xe/xe_module.c | 6 +
drivers/gpu/drm/xe/xe_oa.c | 2334 +++++++++++++++++
drivers/gpu/drm/xe/xe_oa.h | 30 +
drivers/gpu/drm/xe/xe_oa_types.h | 229 ++
drivers/gpu/drm/xe/xe_perf.c | 67 +
drivers/gpu/drm/xe/xe_perf.h | 20 +
drivers/gpu/drm/xe/xe_query.c | 77 +
drivers/gpu/drm/xe/xe_reg_whitelist.c | 24 +-
include/uapi/drm/xe_drm.h | 286 ++
22 files changed, 3275 insertions(+), 8 deletions(-)
create mode 100644 drivers/gpu/drm/xe/regs/xe_oa_regs.h
create mode 100644 drivers/gpu/drm/xe/xe_oa.c
create mode 100644 drivers/gpu/drm/xe/xe_oa.h
create mode 100644 drivers/gpu/drm/xe/xe_oa_types.h
create mode 100644 drivers/gpu/drm/xe/xe_perf.c
create mode 100644 drivers/gpu/drm/xe/xe_perf.h
--
2.41.0
^ permalink raw reply [flat|nested] 15+ messages in thread* [PATCH 17/17] drm/xe/oa: Enable Xe2+ overrun mode
2024-03-15 1:35 [PATCH 00/17] Add OA functionality to Xe Ashutosh Dixit
@ 2024-03-15 1:35 ` Ashutosh Dixit
0 siblings, 0 replies; 15+ messages in thread
From: Ashutosh Dixit @ 2024-03-15 1:35 UTC (permalink / raw)
To: intel-xe
Enable Xe2+ overrun mode. For Xe2+, when overrun mode is enabled, there are
no partial reports at the end of buffer, making the OA buffer effectively a
non-power-of-2 size circular buffer whose size, circ_size, is a multiple of
the report size.
v2: Fix implementation of xe_oa_circ_diff/xe_oa_circ_incr (Umesh)
Reviewed-by: Umesh Nerlige Ramappa <umesh.nerlige.ramappa@intel.com>
Signed-off-by: Ashutosh Dixit <ashutosh.dixit@intel.com>
---
drivers/gpu/drm/xe/xe_oa.c | 35 ++++++++++++++++++++++++--------
drivers/gpu/drm/xe/xe_oa_types.h | 3 +++
2 files changed, 30 insertions(+), 8 deletions(-)
diff --git a/drivers/gpu/drm/xe/xe_oa.c b/drivers/gpu/drm/xe/xe_oa.c
index 6f5bbb0787d9..1ad17cc14532 100644
--- a/drivers/gpu/drm/xe/xe_oa.c
+++ b/drivers/gpu/drm/xe/xe_oa.c
@@ -106,7 +106,14 @@ static const struct xe_oa_format oa_formats[] = {
static u32 xe_oa_circ_diff(struct xe_oa_stream *stream, u32 tail, u32 head)
{
- return (tail - head) & (XE_OA_BUFFER_SIZE - 1);
+ return tail >= head ? tail - head :
+ tail + stream->oa_buffer.circ_size - head;
+}
+
+static u32 xe_oa_circ_incr(struct xe_oa_stream *stream, u32 ptr, u32 n)
+{
+ return ptr + n >= stream->oa_buffer.circ_size ?
+ ptr + n - stream->oa_buffer.circ_size : ptr + n;
}
static void xe_oa_config_release(struct kref *ref)
@@ -280,7 +287,7 @@ static int xe_oa_append_report(struct xe_oa_stream *stream, char __user *buf,
buf += *offset;
- oa_buf_end = stream->oa_buffer.vaddr + XE_OA_BUFFER_SIZE;
+ oa_buf_end = stream->oa_buffer.vaddr + stream->oa_buffer.circ_size;
report_size_partial = oa_buf_end - report;
if (report_size_partial < report_size) {
@@ -306,7 +313,6 @@ static int xe_oa_append_reports(struct xe_oa_stream *stream, char __user *buf,
int report_size = stream->oa_buffer.format->size;
u8 *oa_buf_base = stream->oa_buffer.vaddr;
u32 gtt_offset = xe_bo_ggtt_addr(stream->oa_buffer.bo);
- u32 mask = (XE_OA_BUFFER_SIZE - 1);
size_t start_offset = *offset;
unsigned long flags;
u32 head, tail;
@@ -317,21 +323,23 @@ static int xe_oa_append_reports(struct xe_oa_stream *stream, char __user *buf,
tail = stream->oa_buffer.tail;
spin_unlock_irqrestore(&stream->oa_buffer.ptr_lock, flags);
- xe_assert(stream->oa->xe, head < XE_OA_BUFFER_SIZE && tail < XE_OA_BUFFER_SIZE);
+ xe_assert(stream->oa->xe,
+ head < stream->oa_buffer.circ_size && tail < stream->oa_buffer.circ_size);
- for (; xe_oa_circ_diff(stream, tail, head); head = (head + report_size) & mask) {
+ for (; xe_oa_circ_diff(stream, tail, head);
+ head = xe_oa_circ_incr(stream, head, report_size)) {
u8 *report = oa_buf_base + head;
ret = xe_oa_append_report(stream, buf, count, offset, report);
if (ret)
break;
- if (is_power_of_2(report_size)) {
+ if (!(stream->oa_buffer.circ_size % report_size)) {
/* Clear out report id and timestamp to detect unlanded reports */
oa_report_id_clear(stream, (void *)report);
oa_timestamp_clear(stream, (void *)report);
} else {
- u8 *oa_buf_end = stream->oa_buffer.vaddr + XE_OA_BUFFER_SIZE;
+ u8 *oa_buf_end = stream->oa_buffer.vaddr + stream->oa_buffer.circ_size;
u32 part = oa_buf_end - report;
/* Zero out the entire report */
@@ -369,7 +377,6 @@ static void xe_oa_init_oa_buffer(struct xe_oa_stream *stream)
xe_mmio_write32(stream->gt, __oa_regs(stream)->oa_head_ptr,
gtt_offset & OAG_OAHEADPTR_MASK);
stream->oa_buffer.head = 0;
-
/*
* PRM says: "This MMIO must be set before the OATAILPTR register and after the
* OAHEADPTR register. This is to enable proper functionality of the overflow bit".
@@ -1256,6 +1263,18 @@ static int xe_oa_stream_init(struct xe_oa_stream *stream,
stream->periodic = param->period_exponent > 0;
stream->period_exponent = param->period_exponent;
+ /*
+ * For Xe2+, when overrun mode is enabled, there are no partial reports at the end
+ * of buffer, making the OA buffer effectively a non-power-of-2 size circular
+ * buffer whose size, circ_size, is a multiple of the report size
+ */
+ if (GRAPHICS_VER(stream->oa->xe) >= 20 &&
+ stream->hwe->oa_unit->type == DRM_XE_OA_UNIT_TYPE_OAG && stream->sample)
+ stream->oa_buffer.circ_size =
+ XE_OA_BUFFER_SIZE - XE_OA_BUFFER_SIZE % stream->oa_buffer.format->size;
+ else
+ stream->oa_buffer.circ_size = XE_OA_BUFFER_SIZE;
+
if (stream->exec_q && engine_supports_mi_query(stream->hwe)) {
/* If we don't find the context offset, just return error */
ret = xe_oa_set_ctx_ctrl_offset(stream);
diff --git a/drivers/gpu/drm/xe/xe_oa_types.h b/drivers/gpu/drm/xe/xe_oa_types.h
index 6984e7d04be5..d8d5c9d8c22e 100644
--- a/drivers/gpu/drm/xe/xe_oa_types.h
+++ b/drivers/gpu/drm/xe/xe_oa_types.h
@@ -163,6 +163,9 @@ struct xe_oa_buffer {
/** @tail: The last verified cached tail where HW has completed writing */
u32 tail;
+
+ /** @circ_size: The effective circular buffer size, for Xe2+ */
+ u32 circ_size;
};
/**
--
2.41.0
^ permalink raw reply related [flat|nested] 15+ messages in thread
* [PATCH 17/17] drm/xe/oa: Enable Xe2+ overrun mode
@ 2024-03-14 3:25 Ashutosh Dixit
2024-03-15 0:27 ` Umesh Nerlige Ramappa
0 siblings, 1 reply; 15+ messages in thread
From: Ashutosh Dixit @ 2024-03-14 3:25 UTC (permalink / raw)
To: intel-xe; +Cc: Umesh Nerlige Ramappa
Enable Xe2+ overrun mode. For Xe2+, when overrun mode is enabled, there are
no partial reports at the end of buffer, making the OA buffer effectively a
non-power-of-2 size circular buffer whose size, circ_size, is a multiple of
the report size.
v2: Fix implementation of xe_oa_circ_diff/xe_oa_circ_incr (Umesh)
Signed-off-by: Ashutosh Dixit <ashutosh.dixit@intel.com>
---
drivers/gpu/drm/xe/xe_oa.c | 35 ++++++++++++++++++++++++--------
drivers/gpu/drm/xe/xe_oa_types.h | 3 +++
2 files changed, 30 insertions(+), 8 deletions(-)
diff --git a/drivers/gpu/drm/xe/xe_oa.c b/drivers/gpu/drm/xe/xe_oa.c
index 6f5bbb0787d9..1ad17cc14532 100644
--- a/drivers/gpu/drm/xe/xe_oa.c
+++ b/drivers/gpu/drm/xe/xe_oa.c
@@ -106,7 +106,14 @@ static const struct xe_oa_format oa_formats[] = {
static u32 xe_oa_circ_diff(struct xe_oa_stream *stream, u32 tail, u32 head)
{
- return (tail - head) & (XE_OA_BUFFER_SIZE - 1);
+ return tail >= head ? tail - head :
+ tail + stream->oa_buffer.circ_size - head;
+}
+
+static u32 xe_oa_circ_incr(struct xe_oa_stream *stream, u32 ptr, u32 n)
+{
+ return ptr + n >= stream->oa_buffer.circ_size ?
+ ptr + n - stream->oa_buffer.circ_size : ptr + n;
}
static void xe_oa_config_release(struct kref *ref)
@@ -280,7 +287,7 @@ static int xe_oa_append_report(struct xe_oa_stream *stream, char __user *buf,
buf += *offset;
- oa_buf_end = stream->oa_buffer.vaddr + XE_OA_BUFFER_SIZE;
+ oa_buf_end = stream->oa_buffer.vaddr + stream->oa_buffer.circ_size;
report_size_partial = oa_buf_end - report;
if (report_size_partial < report_size) {
@@ -306,7 +313,6 @@ static int xe_oa_append_reports(struct xe_oa_stream *stream, char __user *buf,
int report_size = stream->oa_buffer.format->size;
u8 *oa_buf_base = stream->oa_buffer.vaddr;
u32 gtt_offset = xe_bo_ggtt_addr(stream->oa_buffer.bo);
- u32 mask = (XE_OA_BUFFER_SIZE - 1);
size_t start_offset = *offset;
unsigned long flags;
u32 head, tail;
@@ -317,21 +323,23 @@ static int xe_oa_append_reports(struct xe_oa_stream *stream, char __user *buf,
tail = stream->oa_buffer.tail;
spin_unlock_irqrestore(&stream->oa_buffer.ptr_lock, flags);
- xe_assert(stream->oa->xe, head < XE_OA_BUFFER_SIZE && tail < XE_OA_BUFFER_SIZE);
+ xe_assert(stream->oa->xe,
+ head < stream->oa_buffer.circ_size && tail < stream->oa_buffer.circ_size);
- for (; xe_oa_circ_diff(stream, tail, head); head = (head + report_size) & mask) {
+ for (; xe_oa_circ_diff(stream, tail, head);
+ head = xe_oa_circ_incr(stream, head, report_size)) {
u8 *report = oa_buf_base + head;
ret = xe_oa_append_report(stream, buf, count, offset, report);
if (ret)
break;
- if (is_power_of_2(report_size)) {
+ if (!(stream->oa_buffer.circ_size % report_size)) {
/* Clear out report id and timestamp to detect unlanded reports */
oa_report_id_clear(stream, (void *)report);
oa_timestamp_clear(stream, (void *)report);
} else {
- u8 *oa_buf_end = stream->oa_buffer.vaddr + XE_OA_BUFFER_SIZE;
+ u8 *oa_buf_end = stream->oa_buffer.vaddr + stream->oa_buffer.circ_size;
u32 part = oa_buf_end - report;
/* Zero out the entire report */
@@ -369,7 +377,6 @@ static void xe_oa_init_oa_buffer(struct xe_oa_stream *stream)
xe_mmio_write32(stream->gt, __oa_regs(stream)->oa_head_ptr,
gtt_offset & OAG_OAHEADPTR_MASK);
stream->oa_buffer.head = 0;
-
/*
* PRM says: "This MMIO must be set before the OATAILPTR register and after the
* OAHEADPTR register. This is to enable proper functionality of the overflow bit".
@@ -1256,6 +1263,18 @@ static int xe_oa_stream_init(struct xe_oa_stream *stream,
stream->periodic = param->period_exponent > 0;
stream->period_exponent = param->period_exponent;
+ /*
+ * For Xe2+, when overrun mode is enabled, there are no partial reports at the end
+ * of buffer, making the OA buffer effectively a non-power-of-2 size circular
+ * buffer whose size, circ_size, is a multiple of the report size
+ */
+ if (GRAPHICS_VER(stream->oa->xe) >= 20 &&
+ stream->hwe->oa_unit->type == DRM_XE_OA_UNIT_TYPE_OAG && stream->sample)
+ stream->oa_buffer.circ_size =
+ XE_OA_BUFFER_SIZE - XE_OA_BUFFER_SIZE % stream->oa_buffer.format->size;
+ else
+ stream->oa_buffer.circ_size = XE_OA_BUFFER_SIZE;
+
if (stream->exec_q && engine_supports_mi_query(stream->hwe)) {
/* If we don't find the context offset, just return error */
ret = xe_oa_set_ctx_ctrl_offset(stream);
diff --git a/drivers/gpu/drm/xe/xe_oa_types.h b/drivers/gpu/drm/xe/xe_oa_types.h
index 6984e7d04be5..d8d5c9d8c22e 100644
--- a/drivers/gpu/drm/xe/xe_oa_types.h
+++ b/drivers/gpu/drm/xe/xe_oa_types.h
@@ -163,6 +163,9 @@ struct xe_oa_buffer {
/** @tail: The last verified cached tail where HW has completed writing */
u32 tail;
+
+ /** @circ_size: The effective circular buffer size, for Xe2+ */
+ u32 circ_size;
};
/**
--
2.41.0
^ permalink raw reply related [flat|nested] 15+ messages in thread* Re: [PATCH 17/17] drm/xe/oa: Enable Xe2+ overrun mode
2024-03-14 3:25 Ashutosh Dixit
@ 2024-03-15 0:27 ` Umesh Nerlige Ramappa
0 siblings, 0 replies; 15+ messages in thread
From: Umesh Nerlige Ramappa @ 2024-03-15 0:27 UTC (permalink / raw)
To: Ashutosh Dixit; +Cc: intel-xe
On Wed, Mar 13, 2024 at 08:25:32PM -0700, Ashutosh Dixit wrote:
>Enable Xe2+ overrun mode. For Xe2+, when overrun mode is enabled, there are
>no partial reports at the end of buffer, making the OA buffer effectively a
>non-power-of-2 size circular buffer whose size, circ_size, is a multiple of
>the report size.
>
>v2: Fix implementation of xe_oa_circ_diff/xe_oa_circ_incr (Umesh)
>
>Signed-off-by: Ashutosh Dixit <ashutosh.dixit@intel.com>
Reviewed-by: Umesh Nerlige Ramappa <umesh.nerlige.ramappa@intel.com>
Thanks,
Umesh
>---
> drivers/gpu/drm/xe/xe_oa.c | 35 ++++++++++++++++++++++++--------
> drivers/gpu/drm/xe/xe_oa_types.h | 3 +++
> 2 files changed, 30 insertions(+), 8 deletions(-)
>
>diff --git a/drivers/gpu/drm/xe/xe_oa.c b/drivers/gpu/drm/xe/xe_oa.c
>index 6f5bbb0787d9..1ad17cc14532 100644
>--- a/drivers/gpu/drm/xe/xe_oa.c
>+++ b/drivers/gpu/drm/xe/xe_oa.c
>@@ -106,7 +106,14 @@ static const struct xe_oa_format oa_formats[] = {
>
> static u32 xe_oa_circ_diff(struct xe_oa_stream *stream, u32 tail, u32 head)
> {
>- return (tail - head) & (XE_OA_BUFFER_SIZE - 1);
>+ return tail >= head ? tail - head :
>+ tail + stream->oa_buffer.circ_size - head;
>+}
>+
>+static u32 xe_oa_circ_incr(struct xe_oa_stream *stream, u32 ptr, u32 n)
>+{
>+ return ptr + n >= stream->oa_buffer.circ_size ?
>+ ptr + n - stream->oa_buffer.circ_size : ptr + n;
> }
>
> static void xe_oa_config_release(struct kref *ref)
>@@ -280,7 +287,7 @@ static int xe_oa_append_report(struct xe_oa_stream *stream, char __user *buf,
>
> buf += *offset;
>
>- oa_buf_end = stream->oa_buffer.vaddr + XE_OA_BUFFER_SIZE;
>+ oa_buf_end = stream->oa_buffer.vaddr + stream->oa_buffer.circ_size;
> report_size_partial = oa_buf_end - report;
>
> if (report_size_partial < report_size) {
>@@ -306,7 +313,6 @@ static int xe_oa_append_reports(struct xe_oa_stream *stream, char __user *buf,
> int report_size = stream->oa_buffer.format->size;
> u8 *oa_buf_base = stream->oa_buffer.vaddr;
> u32 gtt_offset = xe_bo_ggtt_addr(stream->oa_buffer.bo);
>- u32 mask = (XE_OA_BUFFER_SIZE - 1);
> size_t start_offset = *offset;
> unsigned long flags;
> u32 head, tail;
>@@ -317,21 +323,23 @@ static int xe_oa_append_reports(struct xe_oa_stream *stream, char __user *buf,
> tail = stream->oa_buffer.tail;
> spin_unlock_irqrestore(&stream->oa_buffer.ptr_lock, flags);
>
>- xe_assert(stream->oa->xe, head < XE_OA_BUFFER_SIZE && tail < XE_OA_BUFFER_SIZE);
>+ xe_assert(stream->oa->xe,
>+ head < stream->oa_buffer.circ_size && tail < stream->oa_buffer.circ_size);
>
>- for (; xe_oa_circ_diff(stream, tail, head); head = (head + report_size) & mask) {
>+ for (; xe_oa_circ_diff(stream, tail, head);
>+ head = xe_oa_circ_incr(stream, head, report_size)) {
> u8 *report = oa_buf_base + head;
>
> ret = xe_oa_append_report(stream, buf, count, offset, report);
> if (ret)
> break;
>
>- if (is_power_of_2(report_size)) {
>+ if (!(stream->oa_buffer.circ_size % report_size)) {
> /* Clear out report id and timestamp to detect unlanded reports */
> oa_report_id_clear(stream, (void *)report);
> oa_timestamp_clear(stream, (void *)report);
> } else {
>- u8 *oa_buf_end = stream->oa_buffer.vaddr + XE_OA_BUFFER_SIZE;
>+ u8 *oa_buf_end = stream->oa_buffer.vaddr + stream->oa_buffer.circ_size;
> u32 part = oa_buf_end - report;
>
> /* Zero out the entire report */
>@@ -369,7 +377,6 @@ static void xe_oa_init_oa_buffer(struct xe_oa_stream *stream)
> xe_mmio_write32(stream->gt, __oa_regs(stream)->oa_head_ptr,
> gtt_offset & OAG_OAHEADPTR_MASK);
> stream->oa_buffer.head = 0;
>-
> /*
> * PRM says: "This MMIO must be set before the OATAILPTR register and after the
> * OAHEADPTR register. This is to enable proper functionality of the overflow bit".
>@@ -1256,6 +1263,18 @@ static int xe_oa_stream_init(struct xe_oa_stream *stream,
> stream->periodic = param->period_exponent > 0;
> stream->period_exponent = param->period_exponent;
>
>+ /*
>+ * For Xe2+, when overrun mode is enabled, there are no partial reports at the end
>+ * of buffer, making the OA buffer effectively a non-power-of-2 size circular
>+ * buffer whose size, circ_size, is a multiple of the report size
>+ */
>+ if (GRAPHICS_VER(stream->oa->xe) >= 20 &&
>+ stream->hwe->oa_unit->type == DRM_XE_OA_UNIT_TYPE_OAG && stream->sample)
>+ stream->oa_buffer.circ_size =
>+ XE_OA_BUFFER_SIZE - XE_OA_BUFFER_SIZE % stream->oa_buffer.format->size;
>+ else
>+ stream->oa_buffer.circ_size = XE_OA_BUFFER_SIZE;
>+
> if (stream->exec_q && engine_supports_mi_query(stream->hwe)) {
> /* If we don't find the context offset, just return error */
> ret = xe_oa_set_ctx_ctrl_offset(stream);
>diff --git a/drivers/gpu/drm/xe/xe_oa_types.h b/drivers/gpu/drm/xe/xe_oa_types.h
>index 6984e7d04be5..d8d5c9d8c22e 100644
>--- a/drivers/gpu/drm/xe/xe_oa_types.h
>+++ b/drivers/gpu/drm/xe/xe_oa_types.h
>@@ -163,6 +163,9 @@ struct xe_oa_buffer {
>
> /** @tail: The last verified cached tail where HW has completed writing */
> u32 tail;
>+
>+ /** @circ_size: The effective circular buffer size, for Xe2+ */
>+ u32 circ_size;
> };
>
> /**
>--
>2.41.0
>
^ permalink raw reply [flat|nested] 15+ messages in thread
* [PATCH 16/17] drm/xe/oa: Changes to OA_TAKEN
@ 2024-03-12 3:40 Ashutosh Dixit
2024-03-12 3:40 ` [PATCH 17/17] drm/xe/oa: Enable Xe2+ overrun mode Ashutosh Dixit
0 siblings, 1 reply; 15+ messages in thread
From: Ashutosh Dixit @ 2024-03-12 3:40 UTC (permalink / raw)
To: intel-xe; +Cc: Umesh Nerlige Ramappa
Rename OA_TAKEN to xe_oa_circ_diff, since xe_oa_circ_diff better describes
what the macro actually does. Also convert to function and add xe_oa_stream
arg. These will be used in the following patch.
Signed-off-by: Ashutosh Dixit <ashutosh.dixit@intel.com>
---
drivers/gpu/drm/xe/xe_oa.c | 22 +++++++++++++---------
1 file changed, 13 insertions(+), 9 deletions(-)
diff --git a/drivers/gpu/drm/xe/xe_oa.c b/drivers/gpu/drm/xe/xe_oa.c
index b33976db1da5..6f5bbb0787d9 100644
--- a/drivers/gpu/drm/xe/xe_oa.c
+++ b/drivers/gpu/drm/xe/xe_oa.c
@@ -28,7 +28,6 @@
#include "xe_sched_job.h"
#include "xe_perf.h"
-#define OA_TAKEN(tail, head) (((tail) - (head)) & (XE_OA_BUFFER_SIZE - 1))
#define DEFAULT_POLL_FREQUENCY_HZ 200
#define DEFAULT_POLL_PERIOD_NS (NSEC_PER_SEC / DEFAULT_POLL_FREQUENCY_HZ)
#define XE_OA_UNIT_INVALID U32_MAX
@@ -105,6 +104,11 @@ static const struct xe_oa_format oa_formats[] = {
[XE_OA_FORMAT_PEC36u64_G1_4_G2_32] = { 4, 320, DRM_FMT(PEC), HDR_64_BIT, 1, 0 },
};
+static u32 xe_oa_circ_diff(struct xe_oa_stream *stream, u32 tail, u32 head)
+{
+ return (tail - head) & (XE_OA_BUFFER_SIZE - 1);
+}
+
static void xe_oa_config_release(struct kref *ref)
{
struct xe_oa_config *oa_config =
@@ -209,11 +213,11 @@ static bool xe_oa_buffer_check_unlocked(struct xe_oa_stream *stream)
* increments. Also report size may not be a power of 2. Compute potential
* partially landed report in OA buffer.
*/
- partial_report_size = OA_TAKEN(hw_tail, stream->oa_buffer.tail);
+ partial_report_size = xe_oa_circ_diff(stream, hw_tail, stream->oa_buffer.tail);
partial_report_size %= report_size;
/* Subtract partial amount off the tail */
- hw_tail = OA_TAKEN(hw_tail, partial_report_size);
+ hw_tail = xe_oa_circ_diff(stream, hw_tail, partial_report_size);
tail = hw_tail;
@@ -225,24 +229,24 @@ static bool xe_oa_buffer_check_unlocked(struct xe_oa_stream *stream)
* This is assuming that the writes of the OA unit land in memory in the order
* they were written. If not : (╯°□°)╯︵ ┻━┻
*/
- while (OA_TAKEN(tail, stream->oa_buffer.tail) >= report_size) {
+ while (xe_oa_circ_diff(stream, tail, stream->oa_buffer.tail) >= report_size) {
void *report = stream->oa_buffer.vaddr + tail;
if (oa_report_id(stream, report) || oa_timestamp(stream, report))
break;
- tail = OA_TAKEN(tail, report_size);
+ tail = xe_oa_circ_diff(stream, tail, report_size);
}
- if (OA_TAKEN(hw_tail, tail) > report_size)
+ if (xe_oa_circ_diff(stream, hw_tail, tail) > report_size)
drm_dbg(&stream->oa->xe->drm,
"unlanded report(s) head=0x%x tail=0x%x hw_tail=0x%x\n",
stream->oa_buffer.head, tail, hw_tail);
stream->oa_buffer.tail = tail;
- pollin = OA_TAKEN(stream->oa_buffer.tail,
- stream->oa_buffer.head) >= report_size;
+ pollin = xe_oa_circ_diff(stream, stream->oa_buffer.tail,
+ stream->oa_buffer.head) >= report_size;
spin_unlock_irqrestore(&stream->oa_buffer.ptr_lock, flags);
@@ -315,7 +319,7 @@ static int xe_oa_append_reports(struct xe_oa_stream *stream, char __user *buf,
xe_assert(stream->oa->xe, head < XE_OA_BUFFER_SIZE && tail < XE_OA_BUFFER_SIZE);
- for (; OA_TAKEN(tail, head); head = (head + report_size) & mask) {
+ for (; xe_oa_circ_diff(stream, tail, head); head = (head + report_size) & mask) {
u8 *report = oa_buf_base + head;
ret = xe_oa_append_report(stream, buf, count, offset, report);
--
2.41.0
^ permalink raw reply related [flat|nested] 15+ messages in thread* [PATCH 17/17] drm/xe/oa: Enable Xe2+ overrun mode
2024-03-12 3:40 [PATCH 16/17] drm/xe/oa: Changes to OA_TAKEN Ashutosh Dixit
@ 2024-03-12 3:40 ` Ashutosh Dixit
2024-03-12 20:14 ` Umesh Nerlige Ramappa
0 siblings, 1 reply; 15+ messages in thread
From: Ashutosh Dixit @ 2024-03-12 3:40 UTC (permalink / raw)
To: intel-xe; +Cc: Umesh Nerlige Ramappa
Enable Xe2+ overrun mode. For Xe2+, when overrun mode is enabled, there are
no partial reports at the end of buffer, making the OA buffer effectively a
non-power-of-2 size circular buffer whose size, circ_size, is a multiple of
the report size.
Signed-off-by: Ashutosh Dixit <ashutosh.dixit@intel.com>
---
drivers/gpu/drm/xe/xe_oa.c | 36 +++++++++++++++++++++++++-------
drivers/gpu/drm/xe/xe_oa_types.h | 3 +++
2 files changed, 31 insertions(+), 8 deletions(-)
diff --git a/drivers/gpu/drm/xe/xe_oa.c b/drivers/gpu/drm/xe/xe_oa.c
index 6f5bbb0787d9..6a0d2e229254 100644
--- a/drivers/gpu/drm/xe/xe_oa.c
+++ b/drivers/gpu/drm/xe/xe_oa.c
@@ -106,7 +106,15 @@ static const struct xe_oa_format oa_formats[] = {
static u32 xe_oa_circ_diff(struct xe_oa_stream *stream, u32 tail, u32 head)
{
- return (tail - head) & (XE_OA_BUFFER_SIZE - 1);
+ if (stream->oa_buffer.circ_size == XE_OA_BUFFER_SIZE)
+ return (tail - head) & (XE_OA_BUFFER_SIZE - 1);
+ else
+ return (tail - head) % stream->oa_buffer.circ_size;
+}
+
+static u32 xe_oa_circ_incr(struct xe_oa_stream *stream, u32 ptr, u32 n)
+{
+ return xe_oa_circ_diff(stream, ptr, -n);
}
static void xe_oa_config_release(struct kref *ref)
@@ -280,7 +288,7 @@ static int xe_oa_append_report(struct xe_oa_stream *stream, char __user *buf,
buf += *offset;
- oa_buf_end = stream->oa_buffer.vaddr + XE_OA_BUFFER_SIZE;
+ oa_buf_end = stream->oa_buffer.vaddr + stream->oa_buffer.circ_size;
report_size_partial = oa_buf_end - report;
if (report_size_partial < report_size) {
@@ -306,7 +314,6 @@ static int xe_oa_append_reports(struct xe_oa_stream *stream, char __user *buf,
int report_size = stream->oa_buffer.format->size;
u8 *oa_buf_base = stream->oa_buffer.vaddr;
u32 gtt_offset = xe_bo_ggtt_addr(stream->oa_buffer.bo);
- u32 mask = (XE_OA_BUFFER_SIZE - 1);
size_t start_offset = *offset;
unsigned long flags;
u32 head, tail;
@@ -317,21 +324,23 @@ static int xe_oa_append_reports(struct xe_oa_stream *stream, char __user *buf,
tail = stream->oa_buffer.tail;
spin_unlock_irqrestore(&stream->oa_buffer.ptr_lock, flags);
- xe_assert(stream->oa->xe, head < XE_OA_BUFFER_SIZE && tail < XE_OA_BUFFER_SIZE);
+ xe_assert(stream->oa->xe,
+ head < stream->oa_buffer.circ_size && tail < stream->oa_buffer.circ_size);
- for (; xe_oa_circ_diff(stream, tail, head); head = (head + report_size) & mask) {
+ for (; xe_oa_circ_diff(stream, tail, head);
+ head = xe_oa_circ_incr(stream, head, report_size)) {
u8 *report = oa_buf_base + head;
ret = xe_oa_append_report(stream, buf, count, offset, report);
if (ret)
break;
- if (is_power_of_2(report_size)) {
+ if (!(stream->oa_buffer.circ_size % report_size)) {
/* Clear out report id and timestamp to detect unlanded reports */
oa_report_id_clear(stream, (void *)report);
oa_timestamp_clear(stream, (void *)report);
} else {
- u8 *oa_buf_end = stream->oa_buffer.vaddr + XE_OA_BUFFER_SIZE;
+ u8 *oa_buf_end = stream->oa_buffer.vaddr + stream->oa_buffer.circ_size;
u32 part = oa_buf_end - report;
/* Zero out the entire report */
@@ -369,7 +378,6 @@ static void xe_oa_init_oa_buffer(struct xe_oa_stream *stream)
xe_mmio_write32(stream->gt, __oa_regs(stream)->oa_head_ptr,
gtt_offset & OAG_OAHEADPTR_MASK);
stream->oa_buffer.head = 0;
-
/*
* PRM says: "This MMIO must be set before the OATAILPTR register and after the
* OAHEADPTR register. This is to enable proper functionality of the overflow bit".
@@ -1256,6 +1264,18 @@ static int xe_oa_stream_init(struct xe_oa_stream *stream,
stream->periodic = param->period_exponent > 0;
stream->period_exponent = param->period_exponent;
+ /*
+ * For Xe2+, when overrun mode is enabled, there are no partial reports at the end
+ * of buffer, making the OA buffer effectively a non-power-of-2 size circular
+ * buffer whose size, circ_size, is a multiple of the report size
+ */
+ if (GRAPHICS_VER(stream->oa->xe) >= 20 &&
+ stream->hwe->oa_unit->type == DRM_XE_OA_UNIT_TYPE_OAG && stream->sample)
+ stream->oa_buffer.circ_size =
+ XE_OA_BUFFER_SIZE - XE_OA_BUFFER_SIZE % stream->oa_buffer.format->size;
+ else
+ stream->oa_buffer.circ_size = XE_OA_BUFFER_SIZE;
+
if (stream->exec_q && engine_supports_mi_query(stream->hwe)) {
/* If we don't find the context offset, just return error */
ret = xe_oa_set_ctx_ctrl_offset(stream);
diff --git a/drivers/gpu/drm/xe/xe_oa_types.h b/drivers/gpu/drm/xe/xe_oa_types.h
index 6984e7d04be5..d8d5c9d8c22e 100644
--- a/drivers/gpu/drm/xe/xe_oa_types.h
+++ b/drivers/gpu/drm/xe/xe_oa_types.h
@@ -163,6 +163,9 @@ struct xe_oa_buffer {
/** @tail: The last verified cached tail where HW has completed writing */
u32 tail;
+
+ /** @circ_size: The effective circular buffer size, for Xe2+ */
+ u32 circ_size;
};
/**
--
2.41.0
^ permalink raw reply related [flat|nested] 15+ messages in thread* Re: [PATCH 17/17] drm/xe/oa: Enable Xe2+ overrun mode
2024-03-12 3:40 ` [PATCH 17/17] drm/xe/oa: Enable Xe2+ overrun mode Ashutosh Dixit
@ 2024-03-12 20:14 ` Umesh Nerlige Ramappa
2024-03-12 20:17 ` Umesh Nerlige Ramappa
2024-03-14 0:48 ` Dixit, Ashutosh
0 siblings, 2 replies; 15+ messages in thread
From: Umesh Nerlige Ramappa @ 2024-03-12 20:14 UTC (permalink / raw)
To: Ashutosh Dixit; +Cc: intel-xe
On Mon, Mar 11, 2024 at 08:40:03PM -0700, Ashutosh Dixit wrote:
>Enable Xe2+ overrun mode. For Xe2+, when overrun mode is enabled, there are
>no partial reports at the end of buffer, making the OA buffer effectively a
>non-power-of-2 size circular buffer whose size, circ_size, is a multiple of
>the report size.
>
>Signed-off-by: Ashutosh Dixit <ashutosh.dixit@intel.com>
>---
> drivers/gpu/drm/xe/xe_oa.c | 36 +++++++++++++++++++++++++-------
> drivers/gpu/drm/xe/xe_oa_types.h | 3 +++
> 2 files changed, 31 insertions(+), 8 deletions(-)
>
>diff --git a/drivers/gpu/drm/xe/xe_oa.c b/drivers/gpu/drm/xe/xe_oa.c
>index 6f5bbb0787d9..6a0d2e229254 100644
>--- a/drivers/gpu/drm/xe/xe_oa.c
>+++ b/drivers/gpu/drm/xe/xe_oa.c
>@@ -106,7 +106,15 @@ static const struct xe_oa_format oa_formats[] = {
>
> static u32 xe_oa_circ_diff(struct xe_oa_stream *stream, u32 tail, u32 head)
> {
>- return (tail - head) & (XE_OA_BUFFER_SIZE - 1);
>+ if (stream->oa_buffer.circ_size == XE_OA_BUFFER_SIZE)
>+ return (tail - head) & (XE_OA_BUFFER_SIZE - 1);
>+ else
>+ return (tail - head) % stream->oa_buffer.circ_size;
>+}
For ex: consider a 16 MB buffer with a report size of 384 bytes. At the
end of the buffer, you would have an empty space of 256 bytes (16 MB %
384)
(For ref: 16 MB = 0x1000000, 384 = 0x180)
In this case circ_size = 0xFFFF00
Let's say your head is pointing to 0xFFFD80 and tail is pointing to
0x180 (essentially there is one unread report at the end of the buffer
and one unread report at the beginning of the buffer).
In this case, (tail - head) % stream->oa_buffer.circ_size, is not
calculating the correct size. Should be 0x300, but I am not getting
that. Can you please check/verify?
I am thinking we need something like this (roughly). We don't need the
mod operation.
static u32 xe_oa_circ_diff(struct xe_oa_stream *stream, u32 tail, u32 > head)
{
u32 available = (tail - head) & (XE_OA_BUFFER_SIZE - 1);
/* head is always aligned to report size, but tail may not be */
available = available % format_size;
return tail >= head ? available : available - empty_space;
}
>+
>+static u32 xe_oa_circ_incr(struct xe_oa_stream *stream, u32 ptr, u32 n)
>+{
>+ return xe_oa_circ_diff(stream, ptr, -n);
> }
>
> static void xe_oa_config_release(struct kref *ref)
>@@ -280,7 +288,7 @@ static int xe_oa_append_report(struct xe_oa_stream *stream, char __user *buf,
>
> buf += *offset;
>
>- oa_buf_end = stream->oa_buffer.vaddr + XE_OA_BUFFER_SIZE;
>+ oa_buf_end = stream->oa_buffer.vaddr + stream->oa_buffer.circ_size;
> report_size_partial = oa_buf_end - report;
>
> if (report_size_partial < report_size) {
>@@ -306,7 +314,6 @@ static int xe_oa_append_reports(struct xe_oa_stream *stream, char __user *buf,
> int report_size = stream->oa_buffer.format->size;
> u8 *oa_buf_base = stream->oa_buffer.vaddr;
> u32 gtt_offset = xe_bo_ggtt_addr(stream->oa_buffer.bo);
>- u32 mask = (XE_OA_BUFFER_SIZE - 1);
> size_t start_offset = *offset;
> unsigned long flags;
> u32 head, tail;
>@@ -317,21 +324,23 @@ static int xe_oa_append_reports(struct xe_oa_stream *stream, char __user *buf,
> tail = stream->oa_buffer.tail;
> spin_unlock_irqrestore(&stream->oa_buffer.ptr_lock, flags);
>
>- xe_assert(stream->oa->xe, head < XE_OA_BUFFER_SIZE && tail < XE_OA_BUFFER_SIZE);
>+ xe_assert(stream->oa->xe,
>+ head < stream->oa_buffer.circ_size && tail < stream->oa_buffer.circ_size);
>
>- for (; xe_oa_circ_diff(stream, tail, head); head = (head + report_size) & mask) {
>+ for (; xe_oa_circ_diff(stream, tail, head);
>+ head = xe_oa_circ_incr(stream, head, report_size)) {
> u8 *report = oa_buf_base + head;
>
> ret = xe_oa_append_report(stream, buf, count, offset, report);
> if (ret)
> break;
>
>- if (is_power_of_2(report_size)) {
>+ if (!(stream->oa_buffer.circ_size % report_size)) {
> /* Clear out report id and timestamp to detect unlanded reports */
> oa_report_id_clear(stream, (void *)report);
> oa_timestamp_clear(stream, (void *)report);
> } else {
>- u8 *oa_buf_end = stream->oa_buffer.vaddr + XE_OA_BUFFER_SIZE;
>+ u8 *oa_buf_end = stream->oa_buffer.vaddr + stream->oa_buffer.circ_size;
> u32 part = oa_buf_end - report;
>
> /* Zero out the entire report */
>@@ -369,7 +378,6 @@ static void xe_oa_init_oa_buffer(struct xe_oa_stream *stream)
> xe_mmio_write32(stream->gt, __oa_regs(stream)->oa_head_ptr,
> gtt_offset & OAG_OAHEADPTR_MASK);
> stream->oa_buffer.head = 0;
>-
> /*
> * PRM says: "This MMIO must be set before the OATAILPTR register and after the
> * OAHEADPTR register. This is to enable proper functionality of the overflow bit".
>@@ -1256,6 +1264,18 @@ static int xe_oa_stream_init(struct xe_oa_stream *stream,
> stream->periodic = param->period_exponent > 0;
> stream->period_exponent = param->period_exponent;
>
>+ /*
>+ * For Xe2+, when overrun mode is enabled, there are no partial reports at the end
>+ * of buffer, making the OA buffer effectively a non-power-of-2 size circular
>+ * buffer whose size, circ_size, is a multiple of the report size
>+ */
>+ if (GRAPHICS_VER(stream->oa->xe) >= 20 &&
>+ stream->hwe->oa_unit->type == DRM_XE_OA_UNIT_TYPE_OAG && stream->sample)
>+ stream->oa_buffer.circ_size =
>+ XE_OA_BUFFER_SIZE - XE_OA_BUFFER_SIZE % stream->oa_buffer.format->size;
>+ else
>+ stream->oa_buffer.circ_size = XE_OA_BUFFER_SIZE;
>+
> if (stream->exec_q && engine_supports_mi_query(stream->hwe)) {
> /* If we don't find the context offset, just return error */
> ret = xe_oa_set_ctx_ctrl_offset(stream);
>diff --git a/drivers/gpu/drm/xe/xe_oa_types.h b/drivers/gpu/drm/xe/xe_oa_types.h
>index 6984e7d04be5..d8d5c9d8c22e 100644
>--- a/drivers/gpu/drm/xe/xe_oa_types.h
>+++ b/drivers/gpu/drm/xe/xe_oa_types.h
>@@ -163,6 +163,9 @@ struct xe_oa_buffer {
>
> /** @tail: The last verified cached tail where HW has completed writing */
> u32 tail;
>+
>+ /** @circ_size: The effective circular buffer size, for Xe2+ */
>+ u32 circ_size;
You could store the difference here instead.
/** @empty_space: empty space at tend of buffer */
u32 empty_space;
Regards,
Umesh
> };
>
> /**
>--
>2.41.0
>
^ permalink raw reply [flat|nested] 15+ messages in thread* Re: [PATCH 17/17] drm/xe/oa: Enable Xe2+ overrun mode
2024-03-12 20:14 ` Umesh Nerlige Ramappa
@ 2024-03-12 20:17 ` Umesh Nerlige Ramappa
2024-03-14 0:48 ` Dixit, Ashutosh
1 sibling, 0 replies; 15+ messages in thread
From: Umesh Nerlige Ramappa @ 2024-03-12 20:17 UTC (permalink / raw)
To: Ashutosh Dixit; +Cc: intel-xe
On Tue, Mar 12, 2024 at 01:14:14PM -0700, Umesh Nerlige Ramappa wrote:
>On Mon, Mar 11, 2024 at 08:40:03PM -0700, Ashutosh Dixit wrote:
>>Enable Xe2+ overrun mode. For Xe2+, when overrun mode is enabled, there are
>>no partial reports at the end of buffer, making the OA buffer effectively a
>>non-power-of-2 size circular buffer whose size, circ_size, is a multiple of
>>the report size.
>>
>>Signed-off-by: Ashutosh Dixit <ashutosh.dixit@intel.com>
>>---
>>drivers/gpu/drm/xe/xe_oa.c | 36 +++++++++++++++++++++++++-------
>>drivers/gpu/drm/xe/xe_oa_types.h | 3 +++
>>2 files changed, 31 insertions(+), 8 deletions(-)
>>
>>diff --git a/drivers/gpu/drm/xe/xe_oa.c b/drivers/gpu/drm/xe/xe_oa.c
>>index 6f5bbb0787d9..6a0d2e229254 100644
>>--- a/drivers/gpu/drm/xe/xe_oa.c
>>+++ b/drivers/gpu/drm/xe/xe_oa.c
>>@@ -106,7 +106,15 @@ static const struct xe_oa_format oa_formats[] = {
>>
>>static u32 xe_oa_circ_diff(struct xe_oa_stream *stream, u32 tail, u32 head)
>>{
>>- return (tail - head) & (XE_OA_BUFFER_SIZE - 1);
>>+ if (stream->oa_buffer.circ_size == XE_OA_BUFFER_SIZE)
>>+ return (tail - head) & (XE_OA_BUFFER_SIZE - 1);
>>+ else
>>+ return (tail - head) % stream->oa_buffer.circ_size;
>>+}
>
>For ex: consider a 16 MB buffer with a report size of 384 bytes. At
>the end of the buffer, you would have an empty space of 256 bytes (16
>MB % 384)
>
>(For ref: 16 MB = 0x1000000, 384 = 0x180)
>In this case circ_size = 0xFFFF00
>
>Let's say your head is pointing to 0xFFFD80 and tail is pointing to
>0x180 (essentially there is one unread report at the end of the buffer
>and one unread report at the beginning of the buffer).
>
>In this case, (tail - head) % stream->oa_buffer.circ_size, is not
>calculating the correct size. Should be 0x300, but I am not getting
>that. Can you please check/verify?
>
>I am thinking we need something like this (roughly). We don't need the
>mod operation.
>
>static u32 xe_oa_circ_diff(struct xe_oa_stream *stream, u32 tail, u32 > head)
>{
> u32 available = (tail - head) & (XE_OA_BUFFER_SIZE - 1);
>
> /* head is always aligned to report size, but tail may not be */
> available = available % format_size;
should be
available -= available % format_size;
>
> return tail >= head ? available : available - empty_space;
>}
>
>>+
>>+static u32 xe_oa_circ_incr(struct xe_oa_stream *stream, u32 ptr, u32 n)
>>+{
>>+ return xe_oa_circ_diff(stream, ptr, -n);
>>}
>>
>>static void xe_oa_config_release(struct kref *ref)
>>@@ -280,7 +288,7 @@ static int xe_oa_append_report(struct xe_oa_stream *stream, char __user *buf,
>>
>> buf += *offset;
>>
>>- oa_buf_end = stream->oa_buffer.vaddr + XE_OA_BUFFER_SIZE;
>>+ oa_buf_end = stream->oa_buffer.vaddr + stream->oa_buffer.circ_size;
>> report_size_partial = oa_buf_end - report;
>>
>> if (report_size_partial < report_size) {
>>@@ -306,7 +314,6 @@ static int xe_oa_append_reports(struct xe_oa_stream *stream, char __user *buf,
>> int report_size = stream->oa_buffer.format->size;
>> u8 *oa_buf_base = stream->oa_buffer.vaddr;
>> u32 gtt_offset = xe_bo_ggtt_addr(stream->oa_buffer.bo);
>>- u32 mask = (XE_OA_BUFFER_SIZE - 1);
>> size_t start_offset = *offset;
>> unsigned long flags;
>> u32 head, tail;
>>@@ -317,21 +324,23 @@ static int xe_oa_append_reports(struct xe_oa_stream *stream, char __user *buf,
>> tail = stream->oa_buffer.tail;
>> spin_unlock_irqrestore(&stream->oa_buffer.ptr_lock, flags);
>>
>>- xe_assert(stream->oa->xe, head < XE_OA_BUFFER_SIZE && tail < XE_OA_BUFFER_SIZE);
>>+ xe_assert(stream->oa->xe,
>>+ head < stream->oa_buffer.circ_size && tail < stream->oa_buffer.circ_size);
>>
>>- for (; xe_oa_circ_diff(stream, tail, head); head = (head + report_size) & mask) {
>>+ for (; xe_oa_circ_diff(stream, tail, head);
>>+ head = xe_oa_circ_incr(stream, head, report_size)) {
>> u8 *report = oa_buf_base + head;
>>
>> ret = xe_oa_append_report(stream, buf, count, offset, report);
>> if (ret)
>> break;
>>
>>- if (is_power_of_2(report_size)) {
>>+ if (!(stream->oa_buffer.circ_size % report_size)) {
>> /* Clear out report id and timestamp to detect unlanded reports */
>> oa_report_id_clear(stream, (void *)report);
>> oa_timestamp_clear(stream, (void *)report);
>> } else {
>>- u8 *oa_buf_end = stream->oa_buffer.vaddr + XE_OA_BUFFER_SIZE;
>>+ u8 *oa_buf_end = stream->oa_buffer.vaddr + stream->oa_buffer.circ_size;
>> u32 part = oa_buf_end - report;
>>
>> /* Zero out the entire report */
>>@@ -369,7 +378,6 @@ static void xe_oa_init_oa_buffer(struct xe_oa_stream *stream)
>> xe_mmio_write32(stream->gt, __oa_regs(stream)->oa_head_ptr,
>> gtt_offset & OAG_OAHEADPTR_MASK);
>> stream->oa_buffer.head = 0;
>>-
>> /*
>> * PRM says: "This MMIO must be set before the OATAILPTR register and after the
>> * OAHEADPTR register. This is to enable proper functionality of the overflow bit".
>>@@ -1256,6 +1264,18 @@ static int xe_oa_stream_init(struct xe_oa_stream *stream,
>> stream->periodic = param->period_exponent > 0;
>> stream->period_exponent = param->period_exponent;
>>
>>+ /*
>>+ * For Xe2+, when overrun mode is enabled, there are no partial reports at the end
>>+ * of buffer, making the OA buffer effectively a non-power-of-2 size circular
>>+ * buffer whose size, circ_size, is a multiple of the report size
>>+ */
>>+ if (GRAPHICS_VER(stream->oa->xe) >= 20 &&
>>+ stream->hwe->oa_unit->type == DRM_XE_OA_UNIT_TYPE_OAG && stream->sample)
>>+ stream->oa_buffer.circ_size =
>>+ XE_OA_BUFFER_SIZE - XE_OA_BUFFER_SIZE % stream->oa_buffer.format->size;
>>+ else
>>+ stream->oa_buffer.circ_size = XE_OA_BUFFER_SIZE;
>>+
>> if (stream->exec_q && engine_supports_mi_query(stream->hwe)) {
>> /* If we don't find the context offset, just return error */
>> ret = xe_oa_set_ctx_ctrl_offset(stream);
>>diff --git a/drivers/gpu/drm/xe/xe_oa_types.h b/drivers/gpu/drm/xe/xe_oa_types.h
>>index 6984e7d04be5..d8d5c9d8c22e 100644
>>--- a/drivers/gpu/drm/xe/xe_oa_types.h
>>+++ b/drivers/gpu/drm/xe/xe_oa_types.h
>>@@ -163,6 +163,9 @@ struct xe_oa_buffer {
>>
>> /** @tail: The last verified cached tail where HW has completed writing */
>> u32 tail;
>>+
>>+ /** @circ_size: The effective circular buffer size, for Xe2+ */
>>+ u32 circ_size;
>
>You could store the difference here instead.
>
> /** @empty_space: empty space at tend of buffer */
> u32 empty_space;
>
>Regards,
>Umesh
>
>>};
>>
>>/**
>>--
>>2.41.0
>>
^ permalink raw reply [flat|nested] 15+ messages in thread* Re: [PATCH 17/17] drm/xe/oa: Enable Xe2+ overrun mode
2024-03-12 20:14 ` Umesh Nerlige Ramappa
2024-03-12 20:17 ` Umesh Nerlige Ramappa
@ 2024-03-14 0:48 ` Dixit, Ashutosh
1 sibling, 0 replies; 15+ messages in thread
From: Dixit, Ashutosh @ 2024-03-14 0:48 UTC (permalink / raw)
To: Umesh Nerlige Ramappa; +Cc: intel-xe
On Tue, 12 Mar 2024 13:14:14 -0700, Umesh Nerlige Ramappa wrote:
>
Hi Umesh,
> On Mon, Mar 11, 2024 at 08:40:03PM -0700, Ashutosh Dixit wrote:
> > Enable Xe2+ overrun mode. For Xe2+, when overrun mode is enabled, there are
> > no partial reports at the end of buffer, making the OA buffer effectively a
> > non-power-of-2 size circular buffer whose size, circ_size, is a multiple of
> > the report size.
> >
> > Signed-off-by: Ashutosh Dixit <ashutosh.dixit@intel.com>
> > ---
> > drivers/gpu/drm/xe/xe_oa.c | 36 +++++++++++++++++++++++++-------
> > drivers/gpu/drm/xe/xe_oa_types.h | 3 +++
> > 2 files changed, 31 insertions(+), 8 deletions(-)
> >
> > diff --git a/drivers/gpu/drm/xe/xe_oa.c b/drivers/gpu/drm/xe/xe_oa.c
> > index 6f5bbb0787d9..6a0d2e229254 100644
> > --- a/drivers/gpu/drm/xe/xe_oa.c
> > +++ b/drivers/gpu/drm/xe/xe_oa.c
> > @@ -106,7 +106,15 @@ static const struct xe_oa_format oa_formats[] = {
> >
> > static u32 xe_oa_circ_diff(struct xe_oa_stream *stream, u32 tail, u32 head)
> > {
> > - return (tail - head) & (XE_OA_BUFFER_SIZE - 1);
> > + if (stream->oa_buffer.circ_size == XE_OA_BUFFER_SIZE)
> > + return (tail - head) & (XE_OA_BUFFER_SIZE - 1);
> > + else
> > + return (tail - head) % stream->oa_buffer.circ_size;
> > +}
>
> For ex: consider a 16 MB buffer with a report size of 384 bytes. At the end
> of the buffer, you would have an empty space of 256 bytes (16 MB % 384)
>
> (For ref: 16 MB = 0x1000000, 384 = 0x180)
> In this case circ_size = 0xFFFF00
>
> Let's say your head is pointing to 0xFFFD80 and tail is pointing to 0x180
> (essentially there is one unread report at the end of the buffer and one
> unread report at the beginning of the buffer).
>
> In this case, (tail - head) % stream->oa_buffer.circ_size, is not
> calculating the correct size. Should be 0x300, but I am not getting
> that. Can you please check/verify?
>
> I am thinking we need something like this (roughly). We don't need the mod
> operation.
First of all, thank you so much for checking, and catching, this huge
bug. I did some digging into it and it is to do with how the % operator
behaves with -ve numbers, as well as with u32 vs. s32 data types. And yes
we should not use the % operation.
If this bug had got merged, I can only imagine what a nightmare it would be
to find and fix it, so it's great it is caught in the code review.
I missed this completely because IGT tests with Xe2 overrun mode enabled
were passing in spite of this bug! Specially igt@non-zero-reason would hang
previously, because we wouldn't find any reports because we would be
misaligned, due to the empty space at the end of the OA buffer. This test
was also passing in spite of this bug, I am still trying to figure out how
it could still pass.
In any case, I have sent out the latest version of just this patch (rather
than the entire series) with this issue hopefully fixed. Please take a
look.
> > diff --git a/drivers/gpu/drm/xe/xe_oa_types.h b/drivers/gpu/drm/xe/xe_oa_types.h
> > index 6984e7d04be5..d8d5c9d8c22e 100644
> > --- a/drivers/gpu/drm/xe/xe_oa_types.h
> > +++ b/drivers/gpu/drm/xe/xe_oa_types.h
> > @@ -163,6 +163,9 @@ struct xe_oa_buffer {
> >
> > /** @tail: The last verified cached tail where HW has completed writing */
> > u32 tail;
> > +
> > + /** @circ_size: The effective circular buffer size, for Xe2+ */
> > + u32 circ_size;
>
> You could store the difference here instead.
>
> /** @empty_space: empty space at tend of buffer */
> u32 empty_space;
I have left circ_size here as it was in the previous version, since I think
it better indicates that we still have a non-power-of-2 sized circular
buffer even in this Xe2+ overrun case.
Thanks.
--
Ashutosh
^ permalink raw reply [flat|nested] 15+ messages in thread
* [PATCH v12 00/17] Add OA functionality to Xe
@ 2024-03-12 3:38 Ashutosh Dixit
2024-03-12 3:59 ` [PATCH 17/17] drm/xe/oa: Enable Xe2+ overrun mode Ashutosh Dixit
0 siblings, 1 reply; 15+ messages in thread
From: Ashutosh Dixit @ 2024-03-12 3:38 UTC (permalink / raw)
To: intel-xe
Please see cover letter for v7 here:
https://patchwork.freedesktop.org/series/121084/#rev7
For changes in v8 through v10, see:
https://patchwork.freedesktop.org/series/128993/
For changes in v11, see:
https://patchwork.freedesktop.org/series/130705/
This series is also available at:
https://gitlab.freedesktop.org/adixit/kernel/-/tree/xe-oa
The series has been tested against this IGT series:
https://gitlab.freedesktop.org/adixit/igt-gpu-tools/-/tree/xe-oa, or,
https://patchwork.freedesktop.org/series/130033/
v2: Fix build
v3: Rebase, due to s/xe_engine/xe_exec_queue/
v4: Re-run for testing
v5: Address review comments, new patches 11 through 17
v6: New patches 18 through 21
v7: Patches are completely redone and don't start with i915 version of the uapi
v8: See https://patchwork.freedesktop.org/patch/575214/?series=128993&rev=1
v9: See https://patchwork.freedesktop.org/patch/577441/?series=128993&rev=2
v10: See https://patchwork.freedesktop.org/patch/577943/?series=128993&rev=3
v11: See https://patchwork.freedesktop.org/patch/581239/?series=130705&rev=1
v12: Add last two new patches to enable Xe2+ overrun mode
Ashutosh Dixit (17):
drm/xe/perf/uapi: "Perf" layer to support multiple perf counter stream
types
drm/xe/perf/uapi: Add perf_stream_paranoid sysctl
drm/xe/oa/uapi: Add OA data formats
drm/xe/oa/uapi: Initialize OA units
drm/xe/oa/uapi: Add/remove OA config perf ops
drm/xe/oa/uapi: Define and parse OA stream properties
drm/xe/oa: OA stream initialization (OAG)
drm/xe/oa/uapi: Expose OA stream fd
drm/xe/oa/uapi: Read file_operation
drm/xe/oa: Add OAR support
drm/xe/oa: Add OAC support
drm/xe/oa/uapi: Query OA unit properties
drm/xe/oa/uapi: OA buffer mmap
drm/xe/oa: Add MMIO trigger support
drm/xe/oa: Override GuC RC with OA on PVC
drm/xe/oa: Changes to OA_TAKEN
drm/xe/oa: Enable Xe2+ overrun mode
drivers/gpu/drm/xe/Makefile | 2 +
.../gpu/drm/xe/instructions/xe_mi_commands.h | 3 +
drivers/gpu/drm/xe/regs/xe_engine_regs.h | 4 +-
drivers/gpu/drm/xe/regs/xe_gt_regs.h | 3 +
drivers/gpu/drm/xe/regs/xe_oa_regs.h | 99 +
drivers/gpu/drm/xe/xe_device.c | 18 +-
drivers/gpu/drm/xe/xe_device_types.h | 4 +
drivers/gpu/drm/xe/xe_gt_types.h | 4 +
drivers/gpu/drm/xe/xe_guc_pc.c | 56 +
drivers/gpu/drm/xe/xe_guc_pc.h | 3 +
drivers/gpu/drm/xe/xe_hw_engine_types.h | 2 +
drivers/gpu/drm/xe/xe_lrc.c | 11 +-
drivers/gpu/drm/xe/xe_lrc.h | 1 +
drivers/gpu/drm/xe/xe_module.c | 6 +
drivers/gpu/drm/xe/xe_oa.c | 2335 +++++++++++++++++
drivers/gpu/drm/xe/xe_oa.h | 30 +
drivers/gpu/drm/xe/xe_oa_types.h | 229 ++
drivers/gpu/drm/xe/xe_perf.c | 67 +
drivers/gpu/drm/xe/xe_perf.h | 20 +
drivers/gpu/drm/xe/xe_query.c | 77 +
drivers/gpu/drm/xe/xe_reg_whitelist.c | 24 +-
include/uapi/drm/xe_drm.h | 286 ++
22 files changed, 3276 insertions(+), 8 deletions(-)
create mode 100644 drivers/gpu/drm/xe/regs/xe_oa_regs.h
create mode 100644 drivers/gpu/drm/xe/xe_oa.c
create mode 100644 drivers/gpu/drm/xe/xe_oa.h
create mode 100644 drivers/gpu/drm/xe/xe_oa_types.h
create mode 100644 drivers/gpu/drm/xe/xe_perf.c
create mode 100644 drivers/gpu/drm/xe/xe_perf.h
--
2.41.0
^ permalink raw reply [flat|nested] 15+ messages in thread* [PATCH 17/17] drm/xe/oa: Enable Xe2+ overrun mode
2024-03-12 3:38 [PATCH v12 00/17] Add OA functionality to Xe Ashutosh Dixit
@ 2024-03-12 3:59 ` Ashutosh Dixit
0 siblings, 0 replies; 15+ messages in thread
From: Ashutosh Dixit @ 2024-03-12 3:59 UTC (permalink / raw)
To: intel-xe
Enable Xe2+ overrun mode. For Xe2+, when overrun mode is enabled, there are
no partial reports at the end of buffer, making the OA buffer effectively a
non-power-of-2 size circular buffer whose size, circ_size, is a multiple of
the report size.
Signed-off-by: Ashutosh Dixit <ashutosh.dixit@intel.com>
---
drivers/gpu/drm/xe/xe_oa.c | 36 +++++++++++++++++++++++++-------
drivers/gpu/drm/xe/xe_oa_types.h | 3 +++
2 files changed, 31 insertions(+), 8 deletions(-)
diff --git a/drivers/gpu/drm/xe/xe_oa.c b/drivers/gpu/drm/xe/xe_oa.c
index 6f5bbb0787d9..6a0d2e229254 100644
--- a/drivers/gpu/drm/xe/xe_oa.c
+++ b/drivers/gpu/drm/xe/xe_oa.c
@@ -106,7 +106,15 @@ static const struct xe_oa_format oa_formats[] = {
static u32 xe_oa_circ_diff(struct xe_oa_stream *stream, u32 tail, u32 head)
{
- return (tail - head) & (XE_OA_BUFFER_SIZE - 1);
+ if (stream->oa_buffer.circ_size == XE_OA_BUFFER_SIZE)
+ return (tail - head) & (XE_OA_BUFFER_SIZE - 1);
+ else
+ return (tail - head) % stream->oa_buffer.circ_size;
+}
+
+static u32 xe_oa_circ_incr(struct xe_oa_stream *stream, u32 ptr, u32 n)
+{
+ return xe_oa_circ_diff(stream, ptr, -n);
}
static void xe_oa_config_release(struct kref *ref)
@@ -280,7 +288,7 @@ static int xe_oa_append_report(struct xe_oa_stream *stream, char __user *buf,
buf += *offset;
- oa_buf_end = stream->oa_buffer.vaddr + XE_OA_BUFFER_SIZE;
+ oa_buf_end = stream->oa_buffer.vaddr + stream->oa_buffer.circ_size;
report_size_partial = oa_buf_end - report;
if (report_size_partial < report_size) {
@@ -306,7 +314,6 @@ static int xe_oa_append_reports(struct xe_oa_stream *stream, char __user *buf,
int report_size = stream->oa_buffer.format->size;
u8 *oa_buf_base = stream->oa_buffer.vaddr;
u32 gtt_offset = xe_bo_ggtt_addr(stream->oa_buffer.bo);
- u32 mask = (XE_OA_BUFFER_SIZE - 1);
size_t start_offset = *offset;
unsigned long flags;
u32 head, tail;
@@ -317,21 +324,23 @@ static int xe_oa_append_reports(struct xe_oa_stream *stream, char __user *buf,
tail = stream->oa_buffer.tail;
spin_unlock_irqrestore(&stream->oa_buffer.ptr_lock, flags);
- xe_assert(stream->oa->xe, head < XE_OA_BUFFER_SIZE && tail < XE_OA_BUFFER_SIZE);
+ xe_assert(stream->oa->xe,
+ head < stream->oa_buffer.circ_size && tail < stream->oa_buffer.circ_size);
- for (; xe_oa_circ_diff(stream, tail, head); head = (head + report_size) & mask) {
+ for (; xe_oa_circ_diff(stream, tail, head);
+ head = xe_oa_circ_incr(stream, head, report_size)) {
u8 *report = oa_buf_base + head;
ret = xe_oa_append_report(stream, buf, count, offset, report);
if (ret)
break;
- if (is_power_of_2(report_size)) {
+ if (!(stream->oa_buffer.circ_size % report_size)) {
/* Clear out report id and timestamp to detect unlanded reports */
oa_report_id_clear(stream, (void *)report);
oa_timestamp_clear(stream, (void *)report);
} else {
- u8 *oa_buf_end = stream->oa_buffer.vaddr + XE_OA_BUFFER_SIZE;
+ u8 *oa_buf_end = stream->oa_buffer.vaddr + stream->oa_buffer.circ_size;
u32 part = oa_buf_end - report;
/* Zero out the entire report */
@@ -369,7 +378,6 @@ static void xe_oa_init_oa_buffer(struct xe_oa_stream *stream)
xe_mmio_write32(stream->gt, __oa_regs(stream)->oa_head_ptr,
gtt_offset & OAG_OAHEADPTR_MASK);
stream->oa_buffer.head = 0;
-
/*
* PRM says: "This MMIO must be set before the OATAILPTR register and after the
* OAHEADPTR register. This is to enable proper functionality of the overflow bit".
@@ -1256,6 +1264,18 @@ static int xe_oa_stream_init(struct xe_oa_stream *stream,
stream->periodic = param->period_exponent > 0;
stream->period_exponent = param->period_exponent;
+ /*
+ * For Xe2+, when overrun mode is enabled, there are no partial reports at the end
+ * of buffer, making the OA buffer effectively a non-power-of-2 size circular
+ * buffer whose size, circ_size, is a multiple of the report size
+ */
+ if (GRAPHICS_VER(stream->oa->xe) >= 20 &&
+ stream->hwe->oa_unit->type == DRM_XE_OA_UNIT_TYPE_OAG && stream->sample)
+ stream->oa_buffer.circ_size =
+ XE_OA_BUFFER_SIZE - XE_OA_BUFFER_SIZE % stream->oa_buffer.format->size;
+ else
+ stream->oa_buffer.circ_size = XE_OA_BUFFER_SIZE;
+
if (stream->exec_q && engine_supports_mi_query(stream->hwe)) {
/* If we don't find the context offset, just return error */
ret = xe_oa_set_ctx_ctrl_offset(stream);
diff --git a/drivers/gpu/drm/xe/xe_oa_types.h b/drivers/gpu/drm/xe/xe_oa_types.h
index 6984e7d04be5..d8d5c9d8c22e 100644
--- a/drivers/gpu/drm/xe/xe_oa_types.h
+++ b/drivers/gpu/drm/xe/xe_oa_types.h
@@ -163,6 +163,9 @@ struct xe_oa_buffer {
/** @tail: The last verified cached tail where HW has completed writing */
u32 tail;
+
+ /** @circ_size: The effective circular buffer size, for Xe2+ */
+ u32 circ_size;
};
/**
--
2.41.0
^ permalink raw reply related [flat|nested] 15+ messages in thread
end of thread, other threads:[~2024-06-18 1:46 UTC | newest]
Thread overview: 15+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2024-03-14 0:47 [PATCH 17/17] drm/xe/oa: Enable Xe2+ overrun mode Ashutosh Dixit
-- strict thread matches above, loose matches on Subject: below --
2024-06-18 1:45 [PATCH v19 00/17] Add OA functionality to Xe Ashutosh Dixit
2024-06-18 1:46 ` [PATCH 17/17] drm/xe/oa: Enable Xe2+ overrun mode Ashutosh Dixit
2024-06-17 22:36 [PATCH v18 00/17] Add OA functionality to Xe Ashutosh Dixit
2024-06-17 22:36 ` [PATCH 17/17] drm/xe/oa: Enable Xe2+ overrun mode Ashutosh Dixit
2024-06-12 2:05 [PATCH v17 00/17] Add OA functionality to Xe Ashutosh Dixit
2024-06-12 2:05 ` [PATCH 17/17] drm/xe/oa: Enable Xe2+ overrun mode Ashutosh Dixit
2024-06-07 20:43 [PATCH v16 00/17] Add OA functionality to Xe Ashutosh Dixit
2024-06-07 20:43 ` [PATCH 17/17] drm/xe/oa: Enable Xe2+ overrun mode Ashutosh Dixit
2024-05-27 1:43 [PATCH v15 00/17] Add OA functionality to Xe Ashutosh Dixit
2024-05-27 1:43 ` [PATCH 17/17] drm/xe/oa: Enable Xe2+ overrun mode Ashutosh Dixit
2024-05-24 19:01 [PATCH v14 00/17] Add OA functionality to Xe Ashutosh Dixit
2024-05-24 19:01 ` [PATCH 17/17] drm/xe/oa: Enable Xe2+ overrun mode Ashutosh Dixit
2024-03-15 1:35 [PATCH 00/17] Add OA functionality to Xe Ashutosh Dixit
2024-03-15 1:35 ` [PATCH 17/17] drm/xe/oa: Enable Xe2+ overrun mode Ashutosh Dixit
2024-03-14 3:25 Ashutosh Dixit
2024-03-15 0:27 ` Umesh Nerlige Ramappa
2024-03-12 3:40 [PATCH 16/17] drm/xe/oa: Changes to OA_TAKEN Ashutosh Dixit
2024-03-12 3:40 ` [PATCH 17/17] drm/xe/oa: Enable Xe2+ overrun mode Ashutosh Dixit
2024-03-12 20:14 ` Umesh Nerlige Ramappa
2024-03-12 20:17 ` Umesh Nerlige Ramappa
2024-03-14 0:48 ` Dixit, Ashutosh
2024-03-12 3:38 [PATCH v12 00/17] Add OA functionality to Xe Ashutosh Dixit
2024-03-12 3:59 ` [PATCH 17/17] drm/xe/oa: Enable Xe2+ overrun mode Ashutosh Dixit
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox