* [PATCH v4 1/8] drm/xe: Move user engine class mappings to functions
2023-12-22 7:45 [PATCH v4 0/8] Engine busyness Riana Tauro
@ 2023-12-22 7:45 ` Riana Tauro
2024-01-04 5:04 ` Aravind Iddamsetty
2023-12-22 7:45 ` [PATCH v4 2/8] drm/xe/guc: Add interface for engine busyness ticks Riana Tauro
` (6 subsequent siblings)
7 siblings, 1 reply; 23+ messages in thread
From: Riana Tauro @ 2023-12-22 7:45 UTC (permalink / raw)
To: intel-xe
Move user engine class <-> hw engine class to function
calls so that it can be used in different files.
No functional changes.
v2: change array to function
v3: rebase
add header xe_drm
Cc: Matthew Brost <matthew.brost@intel.com>
Signed-off-by: Riana Tauro <riana.tauro@intel.com>
---
drivers/gpu/drm/xe/xe_exec_queue.c | 19 ++---------
drivers/gpu/drm/xe/xe_hw_engine.c | 51 ++++++++++++++++++++++++++++++
drivers/gpu/drm/xe/xe_hw_engine.h | 3 ++
drivers/gpu/drm/xe/xe_query.c | 23 ++------------
4 files changed, 58 insertions(+), 38 deletions(-)
diff --git a/drivers/gpu/drm/xe/xe_exec_queue.c b/drivers/gpu/drm/xe/xe_exec_queue.c
index 44fe8097b7cd..366f5714c6f7 100644
--- a/drivers/gpu/drm/xe/xe_exec_queue.c
+++ b/drivers/gpu/drm/xe/xe_exec_queue.c
@@ -482,31 +482,16 @@ static int exec_queue_user_extensions(struct xe_device *xe, struct xe_exec_queue
return 0;
}
-static const enum xe_engine_class user_to_xe_engine_class[] = {
- [DRM_XE_ENGINE_CLASS_RENDER] = XE_ENGINE_CLASS_RENDER,
- [DRM_XE_ENGINE_CLASS_COPY] = XE_ENGINE_CLASS_COPY,
- [DRM_XE_ENGINE_CLASS_VIDEO_DECODE] = XE_ENGINE_CLASS_VIDEO_DECODE,
- [DRM_XE_ENGINE_CLASS_VIDEO_ENHANCE] = XE_ENGINE_CLASS_VIDEO_ENHANCE,
- [DRM_XE_ENGINE_CLASS_COMPUTE] = XE_ENGINE_CLASS_COMPUTE,
-};
-
static struct xe_hw_engine *
find_hw_engine(struct xe_device *xe,
struct drm_xe_engine_class_instance eci)
{
- u32 idx;
-
- if (eci.engine_class > ARRAY_SIZE(user_to_xe_engine_class))
- return NULL;
if (eci.gt_id >= xe->info.gt_count)
return NULL;
- idx = array_index_nospec(eci.engine_class,
- ARRAY_SIZE(user_to_xe_engine_class));
-
return xe_gt_hw_engine(xe_device_get_gt(xe, eci.gt_id),
- user_to_xe_engine_class[idx],
+ xe_hw_engine_from_user_class(eci.engine_class),
eci.engine_instance, true);
}
@@ -532,7 +517,7 @@ static u32 bind_exec_queue_logical_mask(struct xe_device *xe, struct xe_gt *gt,
continue;
if (hwe->class ==
- user_to_xe_engine_class[DRM_XE_ENGINE_CLASS_COPY])
+ xe_hw_engine_from_user_class(DRM_XE_ENGINE_CLASS_COPY))
logical_mask |= BIT(hwe->logical_instance);
}
diff --git a/drivers/gpu/drm/xe/xe_hw_engine.c b/drivers/gpu/drm/xe/xe_hw_engine.c
index 832989c83a25..ab78019b44ff 100644
--- a/drivers/gpu/drm/xe/xe_hw_engine.c
+++ b/drivers/gpu/drm/xe/xe_hw_engine.c
@@ -6,6 +6,7 @@
#include "xe_hw_engine.h"
#include <drm/drm_managed.h>
+#include <drm/xe_drm.h>
#include "regs/xe_engine_regs.h"
#include "regs/xe_gt_regs.h"
@@ -290,6 +291,56 @@ static u32 hw_engine_mmio_read32(struct xe_hw_engine *hwe, struct xe_reg reg)
return xe_mmio_read32(hwe->gt, reg);
}
+/**
+ * xe_hw_engine_to_user_class - converts xe hw engine to user engine class
+ * @engine_class: hw engine class
+ *
+ * Returns: user engine class on success, -1 on error
+ */
+u16 xe_hw_engine_to_user_class(enum xe_engine_class engine_class)
+{
+ switch (engine_class) {
+ case XE_ENGINE_CLASS_RENDER:
+ return DRM_XE_ENGINE_CLASS_RENDER;
+ case XE_ENGINE_CLASS_COPY:
+ return DRM_XE_ENGINE_CLASS_COPY;
+ case XE_ENGINE_CLASS_VIDEO_DECODE:
+ return DRM_XE_ENGINE_CLASS_VIDEO_DECODE;
+ case XE_ENGINE_CLASS_VIDEO_ENHANCE:
+ return DRM_XE_ENGINE_CLASS_VIDEO_ENHANCE;
+ case XE_ENGINE_CLASS_COMPUTE:
+ return DRM_XE_ENGINE_CLASS_COMPUTE;
+ default:
+ XE_WARN_ON(engine_class);
+ return -1;
+ }
+}
+
+/**
+ * xe_hw_engine_from_user_class - converts xe user engine class to hw engine class
+ * @engine_class: user engine class
+ *
+ * Returns: hw engine class on success
+ */
+enum xe_engine_class xe_hw_engine_from_user_class(u16 engine_class)
+{
+ switch (engine_class) {
+ case DRM_XE_ENGINE_CLASS_RENDER:
+ return XE_ENGINE_CLASS_RENDER;
+ case DRM_XE_ENGINE_CLASS_COPY:
+ return XE_ENGINE_CLASS_COPY;
+ case DRM_XE_ENGINE_CLASS_VIDEO_DECODE:
+ return XE_ENGINE_CLASS_VIDEO_DECODE;
+ case DRM_XE_ENGINE_CLASS_VIDEO_ENHANCE:
+ return XE_ENGINE_CLASS_VIDEO_ENHANCE;
+ case DRM_XE_ENGINE_CLASS_COMPUTE:
+ return XE_ENGINE_CLASS_COMPUTE;
+ default:
+ XE_WARN_ON(engine_class);
+ return XE_ENGINE_CLASS_MAX;
+ }
+}
+
void xe_hw_engine_enable_ring(struct xe_hw_engine *hwe)
{
u32 ccs_mask =
diff --git a/drivers/gpu/drm/xe/xe_hw_engine.h b/drivers/gpu/drm/xe/xe_hw_engine.h
index 71968ee2f600..89ca96063644 100644
--- a/drivers/gpu/drm/xe/xe_hw_engine.h
+++ b/drivers/gpu/drm/xe/xe_hw_engine.h
@@ -62,6 +62,9 @@ void xe_hw_engine_print(struct xe_hw_engine *hwe, struct drm_printer *p);
void xe_hw_engine_setup_default_lrc_state(struct xe_hw_engine *hwe);
bool xe_hw_engine_is_reserved(struct xe_hw_engine *hwe);
+enum xe_engine_class xe_hw_engine_from_user_class(u16 engine_class);
+u16 xe_hw_engine_to_user_class(enum xe_engine_class engine_class);
+
static inline bool xe_hw_engine_is_valid(struct xe_hw_engine *hwe)
{
return hwe->name;
diff --git a/drivers/gpu/drm/xe/xe_query.c b/drivers/gpu/drm/xe/xe_query.c
index 9b35673b286c..d4793e79e283 100644
--- a/drivers/gpu/drm/xe/xe_query.c
+++ b/drivers/gpu/drm/xe/xe_query.c
@@ -22,22 +22,6 @@
#include "xe_mmio.h"
#include "xe_ttm_vram_mgr.h"
-static const u16 xe_to_user_engine_class[] = {
- [XE_ENGINE_CLASS_RENDER] = DRM_XE_ENGINE_CLASS_RENDER,
- [XE_ENGINE_CLASS_COPY] = DRM_XE_ENGINE_CLASS_COPY,
- [XE_ENGINE_CLASS_VIDEO_DECODE] = DRM_XE_ENGINE_CLASS_VIDEO_DECODE,
- [XE_ENGINE_CLASS_VIDEO_ENHANCE] = DRM_XE_ENGINE_CLASS_VIDEO_ENHANCE,
- [XE_ENGINE_CLASS_COMPUTE] = DRM_XE_ENGINE_CLASS_COMPUTE,
-};
-
-static const enum xe_engine_class user_to_xe_engine_class[] = {
- [DRM_XE_ENGINE_CLASS_RENDER] = XE_ENGINE_CLASS_RENDER,
- [DRM_XE_ENGINE_CLASS_COPY] = XE_ENGINE_CLASS_COPY,
- [DRM_XE_ENGINE_CLASS_VIDEO_DECODE] = XE_ENGINE_CLASS_VIDEO_DECODE,
- [DRM_XE_ENGINE_CLASS_VIDEO_ENHANCE] = XE_ENGINE_CLASS_VIDEO_ENHANCE,
- [DRM_XE_ENGINE_CLASS_COMPUTE] = XE_ENGINE_CLASS_COMPUTE,
-};
-
static size_t calc_hw_engine_info_size(struct xe_device *xe)
{
struct xe_hw_engine *hwe;
@@ -139,10 +123,7 @@ query_engine_cycles(struct xe_device *xe,
if (!gt)
return -EINVAL;
- if (eci->engine_class >= ARRAY_SIZE(user_to_xe_engine_class))
- return -EINVAL;
-
- hwe = xe_gt_hw_engine(gt, user_to_xe_engine_class[eci->engine_class],
+ hwe = xe_gt_hw_engine(gt, xe_hw_engine_from_user_class(eci->engine_class),
eci->engine_instance, true);
if (!hwe)
return -EINVAL;
@@ -208,7 +189,7 @@ static int query_engines(struct xe_device *xe,
continue;
engines->engines[i].instance.engine_class =
- xe_to_user_engine_class[hwe->class];
+ xe_hw_engine_to_user_class(hwe->class);
engines->engines[i].instance.engine_instance =
hwe->logical_instance;
engines->engines[i].instance.gt_id = gt->info.id;
--
2.40.0
^ permalink raw reply related [flat|nested] 23+ messages in thread* Re: [PATCH v4 1/8] drm/xe: Move user engine class mappings to functions
2023-12-22 7:45 ` [PATCH v4 1/8] drm/xe: Move user engine class mappings to functions Riana Tauro
@ 2024-01-04 5:04 ` Aravind Iddamsetty
2024-01-05 5:31 ` Riana Tauro
0 siblings, 1 reply; 23+ messages in thread
From: Aravind Iddamsetty @ 2024-01-04 5:04 UTC (permalink / raw)
To: Riana Tauro, intel-xe
[-- Attachment #1: Type: text/plain, Size: 7418 bytes --]
On 12/22/23 13:15, Riana Tauro wrote:
> Move user engine class <-> hw engine class to function
> calls so that it can be used in different files.
>
> No functional changes.
>
> v2: change array to function
> v3: rebase
> add header xe_drm
>
> Cc: Matthew Brost <matthew.brost@intel.com>
> Signed-off-by: Riana Tauro <riana.tauro@intel.com>
> ---
> drivers/gpu/drm/xe/xe_exec_queue.c | 19 ++---------
> drivers/gpu/drm/xe/xe_hw_engine.c | 51 ++++++++++++++++++++++++++++++
> drivers/gpu/drm/xe/xe_hw_engine.h | 3 ++
> drivers/gpu/drm/xe/xe_query.c | 23 ++------------
> 4 files changed, 58 insertions(+), 38 deletions(-)
>
> diff --git a/drivers/gpu/drm/xe/xe_exec_queue.c b/drivers/gpu/drm/xe/xe_exec_queue.c
> index 44fe8097b7cd..366f5714c6f7 100644
> --- a/drivers/gpu/drm/xe/xe_exec_queue.c
> +++ b/drivers/gpu/drm/xe/xe_exec_queue.c
> @@ -482,31 +482,16 @@ static int exec_queue_user_extensions(struct xe_device *xe, struct xe_exec_queue
> return 0;
> }
>
> -static const enum xe_engine_class user_to_xe_engine_class[] = {
> - [DRM_XE_ENGINE_CLASS_RENDER] = XE_ENGINE_CLASS_RENDER,
> - [DRM_XE_ENGINE_CLASS_COPY] = XE_ENGINE_CLASS_COPY,
> - [DRM_XE_ENGINE_CLASS_VIDEO_DECODE] = XE_ENGINE_CLASS_VIDEO_DECODE,
> - [DRM_XE_ENGINE_CLASS_VIDEO_ENHANCE] = XE_ENGINE_CLASS_VIDEO_ENHANCE,
> - [DRM_XE_ENGINE_CLASS_COMPUTE] = XE_ENGINE_CLASS_COMPUTE,
> -};
> -
> static struct xe_hw_engine *
> find_hw_engine(struct xe_device *xe,
> struct drm_xe_engine_class_instance eci)
> {
> - u32 idx;
> -
> - if (eci.engine_class > ARRAY_SIZE(user_to_xe_engine_class))
> - return NULL;
>
> if (eci.gt_id >= xe->info.gt_count)
> return NULL;
>
> - idx = array_index_nospec(eci.engine_class,
> - ARRAY_SIZE(user_to_xe_engine_class));
> -
> return xe_gt_hw_engine(xe_device_get_gt(xe, eci.gt_id),
> - user_to_xe_engine_class[idx],
> + xe_hw_engine_from_user_class(eci.engine_class),
> eci.engine_instance, true);
in xe_gt_hw_engine introduce a check at the beginning for hw_engine_class and return NULL if it
is XE_ENGINE_CLASS_MAX, so that we can skip for_each_hw_engine.
> }
>
> @@ -532,7 +517,7 @@ static u32 bind_exec_queue_logical_mask(struct xe_device *xe, struct xe_gt *gt,
> continue;
>
> if (hwe->class ==
> - user_to_xe_engine_class[DRM_XE_ENGINE_CLASS_COPY])
> + xe_hw_engine_from_user_class(DRM_XE_ENGINE_CLASS_COPY))
> logical_mask |= BIT(hwe->logical_instance);
> }
>
> diff --git a/drivers/gpu/drm/xe/xe_hw_engine.c b/drivers/gpu/drm/xe/xe_hw_engine.c
> index 832989c83a25..ab78019b44ff 100644
> --- a/drivers/gpu/drm/xe/xe_hw_engine.c
> +++ b/drivers/gpu/drm/xe/xe_hw_engine.c
> @@ -6,6 +6,7 @@
> #include "xe_hw_engine.h"
>
> #include <drm/drm_managed.h>
> +#include <drm/xe_drm.h>
>
> #include "regs/xe_engine_regs.h"
> #include "regs/xe_gt_regs.h"
> @@ -290,6 +291,56 @@ static u32 hw_engine_mmio_read32(struct xe_hw_engine *hwe, struct xe_reg reg)
> return xe_mmio_read32(hwe->gt, reg);
> }
>
> +/**
> + * xe_hw_engine_to_user_class - converts xe hw engine to user engine class
> + * @engine_class: hw engine class
> + *
> + * Returns: user engine class on success, -1 on error
> + */
> +u16 xe_hw_engine_to_user_class(enum xe_engine_class engine_class)
> +{
> + switch (engine_class) {
> + case XE_ENGINE_CLASS_RENDER:
> + return DRM_XE_ENGINE_CLASS_RENDER;
> + case XE_ENGINE_CLASS_COPY:
> + return DRM_XE_ENGINE_CLASS_COPY;
> + case XE_ENGINE_CLASS_VIDEO_DECODE:
> + return DRM_XE_ENGINE_CLASS_VIDEO_DECODE;
> + case XE_ENGINE_CLASS_VIDEO_ENHANCE:
> + return DRM_XE_ENGINE_CLASS_VIDEO_ENHANCE;
> + case XE_ENGINE_CLASS_COMPUTE:
> + return DRM_XE_ENGINE_CLASS_COMPUTE;
> + default:
> + XE_WARN_ON(engine_class);
> + return -1;
> + }
> +}
> +
> +/**
> + * xe_hw_engine_from_user_class - converts xe user engine class to hw engine class
> + * @engine_class: user engine class
> + *
> + * Returns: hw engine class on success
> + */
> +enum xe_engine_class xe_hw_engine_from_user_class(u16 engine_class)
> +{
> + switch (engine_class) {
> + case DRM_XE_ENGINE_CLASS_RENDER:
> + return XE_ENGINE_CLASS_RENDER;
> + case DRM_XE_ENGINE_CLASS_COPY:
> + return XE_ENGINE_CLASS_COPY;
> + case DRM_XE_ENGINE_CLASS_VIDEO_DECODE:
> + return XE_ENGINE_CLASS_VIDEO_DECODE;
> + case DRM_XE_ENGINE_CLASS_VIDEO_ENHANCE:
> + return XE_ENGINE_CLASS_VIDEO_ENHANCE;
> + case DRM_XE_ENGINE_CLASS_COMPUTE:
> + return XE_ENGINE_CLASS_COMPUTE;
> + default:
> + XE_WARN_ON(engine_class);
> + return XE_ENGINE_CLASS_MAX;
> + }
> +}
> +
> void xe_hw_engine_enable_ring(struct xe_hw_engine *hwe)
> {
> u32 ccs_mask =
> diff --git a/drivers/gpu/drm/xe/xe_hw_engine.h b/drivers/gpu/drm/xe/xe_hw_engine.h
> index 71968ee2f600..89ca96063644 100644
> --- a/drivers/gpu/drm/xe/xe_hw_engine.h
> +++ b/drivers/gpu/drm/xe/xe_hw_engine.h
> @@ -62,6 +62,9 @@ void xe_hw_engine_print(struct xe_hw_engine *hwe, struct drm_printer *p);
> void xe_hw_engine_setup_default_lrc_state(struct xe_hw_engine *hwe);
>
> bool xe_hw_engine_is_reserved(struct xe_hw_engine *hwe);
> +enum xe_engine_class xe_hw_engine_from_user_class(u16 engine_class);
> +u16 xe_hw_engine_to_user_class(enum xe_engine_class engine_class);
> +
> static inline bool xe_hw_engine_is_valid(struct xe_hw_engine *hwe)
> {
> return hwe->name;
> diff --git a/drivers/gpu/drm/xe/xe_query.c b/drivers/gpu/drm/xe/xe_query.c
> index 9b35673b286c..d4793e79e283 100644
> --- a/drivers/gpu/drm/xe/xe_query.c
> +++ b/drivers/gpu/drm/xe/xe_query.c
> @@ -22,22 +22,6 @@
> #include "xe_mmio.h"
> #include "xe_ttm_vram_mgr.h"
>
> -static const u16 xe_to_user_engine_class[] = {
> - [XE_ENGINE_CLASS_RENDER] = DRM_XE_ENGINE_CLASS_RENDER,
> - [XE_ENGINE_CLASS_COPY] = DRM_XE_ENGINE_CLASS_COPY,
> - [XE_ENGINE_CLASS_VIDEO_DECODE] = DRM_XE_ENGINE_CLASS_VIDEO_DECODE,
> - [XE_ENGINE_CLASS_VIDEO_ENHANCE] = DRM_XE_ENGINE_CLASS_VIDEO_ENHANCE,
> - [XE_ENGINE_CLASS_COMPUTE] = DRM_XE_ENGINE_CLASS_COMPUTE,
> -};
> -
> -static const enum xe_engine_class user_to_xe_engine_class[] = {
> - [DRM_XE_ENGINE_CLASS_RENDER] = XE_ENGINE_CLASS_RENDER,
> - [DRM_XE_ENGINE_CLASS_COPY] = XE_ENGINE_CLASS_COPY,
> - [DRM_XE_ENGINE_CLASS_VIDEO_DECODE] = XE_ENGINE_CLASS_VIDEO_DECODE,
> - [DRM_XE_ENGINE_CLASS_VIDEO_ENHANCE] = XE_ENGINE_CLASS_VIDEO_ENHANCE,
> - [DRM_XE_ENGINE_CLASS_COMPUTE] = XE_ENGINE_CLASS_COMPUTE,
> -};
> -
> static size_t calc_hw_engine_info_size(struct xe_device *xe)
> {
> struct xe_hw_engine *hwe;
> @@ -139,10 +123,7 @@ query_engine_cycles(struct xe_device *xe,
> if (!gt)
> return -EINVAL;
>
> - if (eci->engine_class >= ARRAY_SIZE(user_to_xe_engine_class))
> - return -EINVAL;
> -
> - hwe = xe_gt_hw_engine(gt, user_to_xe_engine_class[eci->engine_class],
> + hwe = xe_gt_hw_engine(gt, xe_hw_engine_from_user_class(eci->engine_class),
> eci->engine_instance, true);
looks like this changed, may be rebase on latest.
> if (!hwe)
> return -EINVAL;
> @@ -208,7 +189,7 @@ static int query_engines(struct xe_device *xe,
> continue;
>
> engines->engines[i].instance.engine_class =
> - xe_to_user_engine_class[hwe->class];
> + xe_hw_engine_to_user_class(hwe->class);
> engines->engines[i].instance.engine_instance =
> hwe->logical_instance;
> engines->engines[i].instance.gt_id = gt->info.id;
with that addressed Reviewed-by: Aravind Iddamsetty <aravind.iddamsetty@linux.intel.com>
Regards,
Aravind.
[-- Attachment #2: Type: text/html, Size: 8500 bytes --]
^ permalink raw reply [flat|nested] 23+ messages in thread* Re: [PATCH v4 1/8] drm/xe: Move user engine class mappings to functions
2024-01-04 5:04 ` Aravind Iddamsetty
@ 2024-01-05 5:31 ` Riana Tauro
0 siblings, 0 replies; 23+ messages in thread
From: Riana Tauro @ 2024-01-05 5:31 UTC (permalink / raw)
To: Aravind Iddamsetty, intel-xe
Hi Aravind
Thanks for the review.
On 1/4/2024 10:34 AM, Aravind Iddamsetty wrote:
>
> On 12/22/23 13:15, Riana Tauro wrote:
>> Move user engine class <-> hw engine class to function
>> calls so that it can be used in different files.
>>
>> No functional changes.
>>
>> v2: change array to function
>> v3: rebase
>> add header xe_drm
>>
>> Cc: Matthew Brost <matthew.brost@intel.com>
>> Signed-off-by: Riana Tauro <riana.tauro@intel.com>
>> ---
>> drivers/gpu/drm/xe/xe_exec_queue.c | 19 ++---------
>> drivers/gpu/drm/xe/xe_hw_engine.c | 51 ++++++++++++++++++++++++++++++
>> drivers/gpu/drm/xe/xe_hw_engine.h | 3 ++
>> drivers/gpu/drm/xe/xe_query.c | 23 ++------------
>> 4 files changed, 58 insertions(+), 38 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/xe/xe_exec_queue.c b/drivers/gpu/drm/xe/xe_exec_queue.c
>> index 44fe8097b7cd..366f5714c6f7 100644
>> --- a/drivers/gpu/drm/xe/xe_exec_queue.c
>> +++ b/drivers/gpu/drm/xe/xe_exec_queue.c
>> @@ -482,31 +482,16 @@ static int exec_queue_user_extensions(struct xe_device *xe, struct xe_exec_queue
>> return 0;
>> }
>>
>> -static const enum xe_engine_class user_to_xe_engine_class[] = {
>> - [DRM_XE_ENGINE_CLASS_RENDER] = XE_ENGINE_CLASS_RENDER,
>> - [DRM_XE_ENGINE_CLASS_COPY] = XE_ENGINE_CLASS_COPY,
>> - [DRM_XE_ENGINE_CLASS_VIDEO_DECODE] = XE_ENGINE_CLASS_VIDEO_DECODE,
>> - [DRM_XE_ENGINE_CLASS_VIDEO_ENHANCE] = XE_ENGINE_CLASS_VIDEO_ENHANCE,
>> - [DRM_XE_ENGINE_CLASS_COMPUTE] = XE_ENGINE_CLASS_COMPUTE,
>> -};
>> -
>> static struct xe_hw_engine *
>> find_hw_engine(struct xe_device *xe,
>> struct drm_xe_engine_class_instance eci)
>> {
>> - u32 idx;
>> -
>> - if (eci.engine_class > ARRAY_SIZE(user_to_xe_engine_class))
>> - return NULL;
>>
>> if (eci.gt_id >= xe->info.gt_count)
>> return NULL;
>>
>> - idx = array_index_nospec(eci.engine_class,
>> - ARRAY_SIZE(user_to_xe_engine_class));
>> -
>> return xe_gt_hw_engine(xe_device_get_gt(xe, eci.gt_id),
>> - user_to_xe_engine_class[idx],
>> + xe_hw_engine_from_user_class(eci.engine_class),
>> eci.engine_instance, true);
>
> in xe_gt_hw_engine introduce a check at the beginning for hw_engine_class and return NULL if it
> is XE_ENGINE_CLASS_MAX, so that we can skip for_each_hw_engine.
Sure. Will add this
>> }
>>
>> @@ -532,7 +517,7 @@ static u32 bind_exec_queue_logical_mask(struct xe_device *xe, struct xe_gt *gt,
>> continue;
>>
>> if (hwe->class ==
>> - user_to_xe_engine_class[DRM_XE_ENGINE_CLASS_COPY])
>> + xe_hw_engine_from_user_class(DRM_XE_ENGINE_CLASS_COPY))
>> logical_mask |= BIT(hwe->logical_instance);
>> }
>>
>> diff --git a/drivers/gpu/drm/xe/xe_hw_engine.c b/drivers/gpu/drm/xe/xe_hw_engine.c
>> index 832989c83a25..ab78019b44ff 100644
>> --- a/drivers/gpu/drm/xe/xe_hw_engine.c
>> +++ b/drivers/gpu/drm/xe/xe_hw_engine.c
>> @@ -6,6 +6,7 @@
>> #include "xe_hw_engine.h"
>>
>> #include <drm/drm_managed.h>
>> +#include <drm/xe_drm.h>
>>
>> #include "regs/xe_engine_regs.h"
>> #include "regs/xe_gt_regs.h"
>> @@ -290,6 +291,56 @@ static u32 hw_engine_mmio_read32(struct xe_hw_engine *hwe, struct xe_reg reg)
>> return xe_mmio_read32(hwe->gt, reg);
>> }
>>
>> +/**
>> + * xe_hw_engine_to_user_class - converts xe hw engine to user engine class
>> + * @engine_class: hw engine class
>> + *
>> + * Returns: user engine class on success, -1 on error
>> + */
>> +u16 xe_hw_engine_to_user_class(enum xe_engine_class engine_class)
>> +{
>> + switch (engine_class) {
>> + case XE_ENGINE_CLASS_RENDER:
>> + return DRM_XE_ENGINE_CLASS_RENDER;
>> + case XE_ENGINE_CLASS_COPY:
>> + return DRM_XE_ENGINE_CLASS_COPY;
>> + case XE_ENGINE_CLASS_VIDEO_DECODE:
>> + return DRM_XE_ENGINE_CLASS_VIDEO_DECODE;
>> + case XE_ENGINE_CLASS_VIDEO_ENHANCE:
>> + return DRM_XE_ENGINE_CLASS_VIDEO_ENHANCE;
>> + case XE_ENGINE_CLASS_COMPUTE:
>> + return DRM_XE_ENGINE_CLASS_COMPUTE;
>> + default:
>> + XE_WARN_ON(engine_class);
>> + return -1;
>> + }
>> +}
>> +
>> +/**
>> + * xe_hw_engine_from_user_class - converts xe user engine class to hw engine class
>> + * @engine_class: user engine class
>> + *
>> + * Returns: hw engine class on success
>> + */
>> +enum xe_engine_class xe_hw_engine_from_user_class(u16 engine_class)
>> +{
>> + switch (engine_class) {
>> + case DRM_XE_ENGINE_CLASS_RENDER:
>> + return XE_ENGINE_CLASS_RENDER;
>> + case DRM_XE_ENGINE_CLASS_COPY:
>> + return XE_ENGINE_CLASS_COPY;
>> + case DRM_XE_ENGINE_CLASS_VIDEO_DECODE:
>> + return XE_ENGINE_CLASS_VIDEO_DECODE;
>> + case DRM_XE_ENGINE_CLASS_VIDEO_ENHANCE:
>> + return XE_ENGINE_CLASS_VIDEO_ENHANCE;
>> + case DRM_XE_ENGINE_CLASS_COMPUTE:
>> + return XE_ENGINE_CLASS_COMPUTE;
>> + default:
>> + XE_WARN_ON(engine_class);
>> + return XE_ENGINE_CLASS_MAX;
>> + }
>> +}
>> +
>> void xe_hw_engine_enable_ring(struct xe_hw_engine *hwe)
>> {
>> u32 ccs_mask =
>> diff --git a/drivers/gpu/drm/xe/xe_hw_engine.h b/drivers/gpu/drm/xe/xe_hw_engine.h
>> index 71968ee2f600..89ca96063644 100644
>> --- a/drivers/gpu/drm/xe/xe_hw_engine.h
>> +++ b/drivers/gpu/drm/xe/xe_hw_engine.h
>> @@ -62,6 +62,9 @@ void xe_hw_engine_print(struct xe_hw_engine *hwe, struct drm_printer *p);
>> void xe_hw_engine_setup_default_lrc_state(struct xe_hw_engine *hwe);
>>
>> bool xe_hw_engine_is_reserved(struct xe_hw_engine *hwe);
>> +enum xe_engine_class xe_hw_engine_from_user_class(u16 engine_class);
>> +u16 xe_hw_engine_to_user_class(enum xe_engine_class engine_class);
>> +
>> static inline bool xe_hw_engine_is_valid(struct xe_hw_engine *hwe)
>> {
>> return hwe->name;
>> diff --git a/drivers/gpu/drm/xe/xe_query.c b/drivers/gpu/drm/xe/xe_query.c
>> index 9b35673b286c..d4793e79e283 100644
>> --- a/drivers/gpu/drm/xe/xe_query.c
>> +++ b/drivers/gpu/drm/xe/xe_query.c
>> @@ -22,22 +22,6 @@
>> #include "xe_mmio.h"
>> #include "xe_ttm_vram_mgr.h"
>>
>> -static const u16 xe_to_user_engine_class[] = {
>> - [XE_ENGINE_CLASS_RENDER] = DRM_XE_ENGINE_CLASS_RENDER,
>> - [XE_ENGINE_CLASS_COPY] = DRM_XE_ENGINE_CLASS_COPY,
>> - [XE_ENGINE_CLASS_VIDEO_DECODE] = DRM_XE_ENGINE_CLASS_VIDEO_DECODE,
>> - [XE_ENGINE_CLASS_VIDEO_ENHANCE] = DRM_XE_ENGINE_CLASS_VIDEO_ENHANCE,
>> - [XE_ENGINE_CLASS_COMPUTE] = DRM_XE_ENGINE_CLASS_COMPUTE,
>> -};
>> -
>> -static const enum xe_engine_class user_to_xe_engine_class[] = {
>> - [DRM_XE_ENGINE_CLASS_RENDER] = XE_ENGINE_CLASS_RENDER,
>> - [DRM_XE_ENGINE_CLASS_COPY] = XE_ENGINE_CLASS_COPY,
>> - [DRM_XE_ENGINE_CLASS_VIDEO_DECODE] = XE_ENGINE_CLASS_VIDEO_DECODE,
>> - [DRM_XE_ENGINE_CLASS_VIDEO_ENHANCE] = XE_ENGINE_CLASS_VIDEO_ENHANCE,
>> - [DRM_XE_ENGINE_CLASS_COMPUTE] = XE_ENGINE_CLASS_COMPUTE,
>> -};
>> -
>> static size_t calc_hw_engine_info_size(struct xe_device *xe)
>> {
>> struct xe_hw_engine *hwe;
>> @@ -139,10 +123,7 @@ query_engine_cycles(struct xe_device *xe,
>> if (!gt)
>> return -EINVAL;
>>
>> - if (eci->engine_class >= ARRAY_SIZE(user_to_xe_engine_class))
>> - return -EINVAL;
>> -
>> - hwe = xe_gt_hw_engine(gt, user_to_xe_engine_class[eci->engine_class],
>> + hwe = xe_gt_hw_engine(gt, xe_hw_engine_from_user_class(eci->engine_class),
>> eci->engine_instance, true);
>
> looks like this changed, may be rebase on latest.
query_engine_cycles still has this function. rebased on latest
>> if (!hwe)
>> return -EINVAL;
>> @@ -208,7 +189,7 @@ static int query_engines(struct xe_device *xe,
>> continue;
>>
>> engines->engines[i].instance.engine_class =
>> - xe_to_user_engine_class[hwe->class];
>> + xe_hw_engine_to_user_class(hwe->class);
>> engines->engines[i].instance.engine_instance =
>> hwe->logical_instance;
>> engines->engines[i].instance.gt_id = gt->info.id;
>
> with that addressed Reviewed-by: Aravind Iddamsetty <aravind.iddamsetty@linux.intel.com>
>
> Regards,
> Aravind.
Thanks
Riana
^ permalink raw reply [flat|nested] 23+ messages in thread
* [PATCH v4 2/8] drm/xe/guc: Add interface for engine busyness ticks
2023-12-22 7:45 [PATCH v4 0/8] Engine busyness Riana Tauro
2023-12-22 7:45 ` [PATCH v4 1/8] drm/xe: Move user engine class mappings to functions Riana Tauro
@ 2023-12-22 7:45 ` Riana Tauro
2023-12-22 7:45 ` [PATCH v4 3/8] drm/xe/uapi: Add configs for Engine busyness Riana Tauro
` (5 subsequent siblings)
7 siblings, 0 replies; 23+ messages in thread
From: Riana Tauro @ 2023-12-22 7:45 UTC (permalink / raw)
To: intel-xe
GuC provides engine busyness ticks as a 64 bit counter which count
as clock ticks. These counters are maintained in a
shared memory buffer and updated on a continuous basis.
Add functions that initialize Engine busyness and get
the current accumulated busyness.
v2: rebase
fix naming of local variable (Umesh)
Co-developed-by: John Harrison <John.C.Harrison@Intel.com>
Signed-off-by: John Harrison <John.C.Harrison@Intel.com>
Signed-off-by: Riana Tauro <riana.tauro@intel.com>
Reviewed-by: Umesh Nerlige Ramappa <umesh.nerlige.ramappa@intel.com>
---
drivers/gpu/drm/xe/Makefile | 1 +
drivers/gpu/drm/xe/abi/guc_actions_abi.h | 1 +
drivers/gpu/drm/xe/xe_gt.c | 13 ++
drivers/gpu/drm/xe/xe_gt.h | 2 +
drivers/gpu/drm/xe/xe_guc.c | 7 +
drivers/gpu/drm/xe/xe_guc_engine_busyness.c | 153 ++++++++++++++++++++
drivers/gpu/drm/xe/xe_guc_engine_busyness.h | 17 +++
drivers/gpu/drm/xe/xe_guc_fwif.h | 15 ++
drivers/gpu/drm/xe/xe_guc_types.h | 6 +
9 files changed, 215 insertions(+)
create mode 100644 drivers/gpu/drm/xe/xe_guc_engine_busyness.c
create mode 100644 drivers/gpu/drm/xe/xe_guc_engine_busyness.h
diff --git a/drivers/gpu/drm/xe/Makefile b/drivers/gpu/drm/xe/Makefile
index df8601d6a59f..26988c1c732a 100644
--- a/drivers/gpu/drm/xe/Makefile
+++ b/drivers/gpu/drm/xe/Makefile
@@ -95,6 +95,7 @@ xe-y += xe_bb.o \
xe_guc_ct.o \
xe_guc_db_mgr.o \
xe_guc_debugfs.o \
+ xe_guc_engine_busyness.o \
xe_guc_hwconfig.o \
xe_guc_log.o \
xe_guc_pc.o \
diff --git a/drivers/gpu/drm/xe/abi/guc_actions_abi.h b/drivers/gpu/drm/xe/abi/guc_actions_abi.h
index 3062e0e0d467..d87681ca89bc 100644
--- a/drivers/gpu/drm/xe/abi/guc_actions_abi.h
+++ b/drivers/gpu/drm/xe/abi/guc_actions_abi.h
@@ -139,6 +139,7 @@ enum xe_guc_action {
XE_GUC_ACTION_REGISTER_CONTEXT_MULTI_LRC = 0x4601,
XE_GUC_ACTION_CLIENT_SOFT_RESET = 0x5507,
XE_GUC_ACTION_SET_ENG_UTIL_BUFF = 0x550A,
+ XE_GUC_ACTION_SET_DEVICE_ENGINE_UTILIZATION = 0x550C,
XE_GUC_ACTION_NOTIFY_MEMORY_CAT_ERROR = 0x6000,
XE_GUC_ACTION_REPORT_PAGE_FAULT_REQ_DESC = 0x6002,
XE_GUC_ACTION_PAGE_FAULT_RES_DESC = 0x6003,
diff --git a/drivers/gpu/drm/xe/xe_gt.c b/drivers/gpu/drm/xe/xe_gt.c
index 3af2adec1295..58daadc00363 100644
--- a/drivers/gpu/drm/xe/xe_gt.c
+++ b/drivers/gpu/drm/xe/xe_gt.c
@@ -32,6 +32,7 @@
#include "xe_gt_sysfs.h"
#include "xe_gt_tlb_invalidation.h"
#include "xe_gt_topology.h"
+#include "xe_guc_engine_busyness.h"
#include "xe_guc_exec_queue_types.h"
#include "xe_guc_pc.h"
#include "xe_hw_fence.h"
@@ -776,3 +777,15 @@ struct xe_hw_engine *xe_gt_any_hw_engine_by_reset_domain(struct xe_gt *gt,
return NULL;
}
+
+/**
+ * xe_gt_engine_busy_ticks - Return current accumulated engine busyness ticks
+ * @gt: GT structure
+ * @hwe: Xe HW engine to report on
+ *
+ * Returns accumulated ticks @hwe was busy since engine stats were enabled.
+ */
+u64 xe_gt_engine_busy_ticks(struct xe_gt *gt, struct xe_hw_engine *hwe)
+{
+ return xe_guc_engine_busyness_ticks(>->uc.guc, hwe);
+}
diff --git a/drivers/gpu/drm/xe/xe_gt.h b/drivers/gpu/drm/xe/xe_gt.h
index 4486e083f5ef..4303ec48c404 100644
--- a/drivers/gpu/drm/xe/xe_gt.h
+++ b/drivers/gpu/drm/xe/xe_gt.h
@@ -42,6 +42,8 @@ int xe_gt_resume(struct xe_gt *gt);
void xe_gt_reset_async(struct xe_gt *gt);
void xe_gt_sanitize(struct xe_gt *gt);
+u64 xe_gt_engine_busy_ticks(struct xe_gt *gt, struct xe_hw_engine *hwe);
+
/**
* xe_gt_any_hw_engine_by_reset_domain - scan the list of engines and return the
* first that matches the same reset domain as @class
diff --git a/drivers/gpu/drm/xe/xe_guc.c b/drivers/gpu/drm/xe/xe_guc.c
index 811e8b201270..cfd434b26d0d 100644
--- a/drivers/gpu/drm/xe/xe_guc.c
+++ b/drivers/gpu/drm/xe/xe_guc.c
@@ -18,6 +18,7 @@
#include "xe_gt.h"
#include "xe_guc_ads.h"
#include "xe_guc_ct.h"
+#include "xe_guc_engine_busyness.h"
#include "xe_guc_hwconfig.h"
#include "xe_guc_log.h"
#include "xe_guc_pc.h"
@@ -302,9 +303,15 @@ int xe_guc_init_post_hwconfig(struct xe_guc *guc)
int xe_guc_post_load_init(struct xe_guc *guc)
{
+ int err;
+
xe_guc_ads_populate_post_load(&guc->ads);
guc->submission_state.enabled = true;
+ err = xe_guc_engine_busyness_init(guc);
+ if (err)
+ return err;
+
return 0;
}
diff --git a/drivers/gpu/drm/xe/xe_guc_engine_busyness.c b/drivers/gpu/drm/xe/xe_guc_engine_busyness.c
new file mode 100644
index 000000000000..af7b6e768751
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_guc_engine_busyness.c
@@ -0,0 +1,153 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+#include "xe_guc_engine_busyness.h"
+
+#include <drm/drm_managed.h>
+
+#include "abi/guc_actions_abi.h"
+#include "xe_bo.h"
+#include "xe_guc.h"
+#include "xe_guc_ct.h"
+
+/**
+ * DOC: Xe GuC Engine Busyness
+ *
+ * GuC >= 70.11.1 maintains busyness counters in a shared memory buffer for each
+ * engine on a continuous basis. The counters are all 64 bits and count in clock
+ * ticks. The values are updated on context switch events and periodicaly on a
+ * timer internal to GuC. The update rate is guaranteed to be at least 2Hz (but with
+ * a caveat that is not real time, best effort only).
+ *
+ * engine busyness ticks (ticks_engine) : clock ticks for which engine was active
+ */
+
+static void guc_engine_busyness_usage_map(struct xe_guc *guc,
+ struct xe_hw_engine *hwe,
+ struct iosys_map *engine_map)
+{
+ struct iosys_map *map;
+ size_t offset;
+ u32 instance;
+ u8 guc_class;
+
+ guc_class = xe_engine_class_to_guc_class(hwe->class);
+ instance = hwe->logical_instance;
+
+ map = &guc->busy.bo->vmap;
+
+ offset = offsetof(struct guc_engine_observation_data,
+ engine_data[guc_class][instance]);
+
+ *engine_map = IOSYS_MAP_INIT_OFFSET(map, offset);
+}
+
+static void guc_engine_busyness_get_usage(struct xe_guc *guc,
+ struct xe_hw_engine *hwe,
+ u64 *ticks_engine)
+{
+ struct iosys_map engine_map;
+ u64 engine_ticks = 0;
+ int i = 0;
+
+ guc_engine_busyness_usage_map(guc, hwe, &engine_map);
+
+#define read_engine_usage(map_, field_) \
+ iosys_map_rd_field(map_, 0, struct guc_engine_data, field_)
+
+ do {
+ engine_ticks = read_engine_usage(&engine_map, total_execution_ticks);
+
+ if (read_engine_usage(&engine_map, total_execution_ticks) == engine_ticks)
+ break;
+ } while (++i < 6);
+
+#undef read_engine_usage
+
+ if (ticks_engine)
+ *ticks_engine = engine_ticks;
+}
+
+static void guc_engine_busyness_enable_stats(struct xe_guc *guc)
+{
+ u32 ggtt_addr = xe_bo_ggtt_addr(guc->busy.bo);
+ u32 action[] = {
+ XE_GUC_ACTION_SET_DEVICE_ENGINE_UTILIZATION,
+ ggtt_addr,
+ 0,
+ };
+ struct xe_device *xe = guc_to_xe(guc);
+ int ret;
+
+ ret = xe_guc_ct_send(&guc->ct, action, ARRAY_SIZE(action), 0, 0);
+ if (ret)
+ drm_err(&xe->drm, "Failed to enable usage stats %pe", ERR_PTR(ret));
+}
+
+static void guc_engine_busyness_fini(struct drm_device *drm, void *arg)
+{
+ struct xe_guc *guc = arg;
+
+ xe_bo_unpin_map_no_vm(guc->busy.bo);
+}
+
+/*
+ * xe_guc_engine_busyness_ticks - Gets current accumulated
+ * engine busyness ticks
+ * @guc: The GuC object
+ * @hwe: Xe HW Engine
+ *
+ * Returns current acculumated ticks @hwe was busy when engine stats are enabled.
+ */
+u64 xe_guc_engine_busyness_ticks(struct xe_guc *guc, struct xe_hw_engine *hwe)
+{
+ u64 ticks_engine;
+
+ guc_engine_busyness_get_usage(guc, hwe, &ticks_engine);
+
+ return ticks_engine;
+}
+
+/*
+ * xe_guc_engine_busyness_init - Initializes the GuC Engine Busyness
+ * @guc: The GuC object
+ *
+ * Initialize GuC engine busyness, only called once during driver load
+ * Supported only on GuC >= 70.11.1
+ *
+ * Return: 0 on success, negative error code on error.
+ */
+int xe_guc_engine_busyness_init(struct xe_guc *guc)
+{
+ struct xe_device *xe = guc_to_xe(guc);
+ struct xe_gt *gt = guc_to_gt(guc);
+ struct xe_tile *tile = gt_to_tile(gt);
+ struct xe_bo *bo;
+ u32 size;
+ int err;
+
+ /* Initialization already done */
+ if (guc->busy.bo)
+ return 0;
+
+ size = PAGE_ALIGN(sizeof(struct guc_engine_observation_data));
+
+ bo = xe_bo_create_pin_map(xe, tile, NULL, size,
+ ttm_bo_type_kernel,
+ XE_BO_CREATE_VRAM_IF_DGFX(tile) |
+ XE_BO_CREATE_GGTT_BIT);
+
+ if (IS_ERR(bo))
+ return PTR_ERR(bo);
+
+ guc->busy.bo = bo;
+
+ guc_engine_busyness_enable_stats(guc);
+
+ err = drmm_add_action_or_reset(&xe->drm, guc_engine_busyness_fini, guc);
+ if (err)
+ return err;
+
+ return 0;
+}
diff --git a/drivers/gpu/drm/xe/xe_guc_engine_busyness.h b/drivers/gpu/drm/xe/xe_guc_engine_busyness.h
new file mode 100644
index 000000000000..d70f06209896
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_guc_engine_busyness.h
@@ -0,0 +1,17 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#ifndef _XE_GUC_ENGINE_BUSYNESS_H_
+#define _XE_GUC_ENGINE_BUSYNESS_H_
+
+#include <linux/types.h>
+
+struct xe_hw_engine;
+struct xe_guc;
+
+int xe_guc_engine_busyness_init(struct xe_guc *guc);
+u64 xe_guc_engine_busyness_ticks(struct xe_guc *guc, struct xe_hw_engine *hwe);
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_guc_fwif.h b/drivers/gpu/drm/xe/xe_guc_fwif.h
index 4dd5a88a7826..c8ca5fe97614 100644
--- a/drivers/gpu/drm/xe/xe_guc_fwif.h
+++ b/drivers/gpu/drm/xe/xe_guc_fwif.h
@@ -37,6 +37,7 @@
#define GUC_COMPUTE_CLASS 4
#define GUC_GSC_OTHER_CLASS 5
#define GUC_LAST_ENGINE_CLASS GUC_GSC_OTHER_CLASS
+#define GUC_MAX_OAG_COUNTERS 8
#define GUC_MAX_ENGINE_CLASSES 16
#define GUC_MAX_INSTANCES_PER_CLASS 32
@@ -222,6 +223,20 @@ struct guc_engine_usage {
struct guc_engine_usage_record engines[GUC_MAX_ENGINE_CLASSES][GUC_MAX_INSTANCES_PER_CLASS];
} __packed;
+/* Engine busyness stats */
+struct guc_engine_data {
+ u64 total_execution_ticks;
+ u64 reserved;
+} __packed;
+
+struct guc_engine_observation_data {
+ struct guc_engine_data engine_data[GUC_MAX_ENGINE_CLASSES][GUC_MAX_INSTANCES_PER_CLASS];
+ u64 oag_busy_data[GUC_MAX_OAG_COUNTERS];
+ u64 total_active_ticks;
+ u64 gt_timestamp;
+ u64 reserved1;
+} __packed;
+
/* This action will be programmed in C1BC - SOFT_SCRATCH_15_REG */
enum xe_guc_recv_message {
XE_GUC_RECV_MSG_CRASH_DUMP_POSTED = BIT(1),
diff --git a/drivers/gpu/drm/xe/xe_guc_types.h b/drivers/gpu/drm/xe/xe_guc_types.h
index 16de203c62a7..a75728071f46 100644
--- a/drivers/gpu/drm/xe/xe_guc_types.h
+++ b/drivers/gpu/drm/xe/xe_guc_types.h
@@ -85,6 +85,12 @@ struct xe_guc {
u32 size;
} hwconfig;
+ /** @busy: Engine busyness */
+ struct {
+ /** @bo: GGTT buffer object of engine busyness that is shared with GuC */
+ struct xe_bo *bo;
+ } busy;
+
/**
* @notify_reg: Register which is written to notify GuC of H2G messages
*/
--
2.40.0
^ permalink raw reply related [flat|nested] 23+ messages in thread* [PATCH v4 3/8] drm/xe/uapi: Add configs for Engine busyness
2023-12-22 7:45 [PATCH v4 0/8] Engine busyness Riana Tauro
2023-12-22 7:45 ` [PATCH v4 1/8] drm/xe: Move user engine class mappings to functions Riana Tauro
2023-12-22 7:45 ` [PATCH v4 2/8] drm/xe/guc: Add interface for engine busyness ticks Riana Tauro
@ 2023-12-22 7:45 ` Riana Tauro
2024-01-03 5:26 ` Aravind Iddamsetty
2023-12-22 7:45 ` [PATCH v4 4/8] drm/xe/pmu: Enable PMU interface and add engine busyness counter Riana Tauro
` (4 subsequent siblings)
7 siblings, 1 reply; 23+ messages in thread
From: Riana Tauro @ 2023-12-22 7:45 UTC (permalink / raw)
To: intel-xe
GuC provides engine busyness ticks as a 64 bit counter which count
as clock ticks.
Add configs to the uapi to expose Engine busyness via PMU.
v2: add "__" prefix for internal helpers
add a simple helper for application usage (Aravind)
v3: rebase
change internal uapi pmu config helpers (Umesh)
Cc: Aravind Iddamsetty <aravind.iddamsetty@linux.intel.com>
Signed-off-by: Riana Tauro <riana.tauro@intel.com>
---
include/uapi/drm/xe_drm.h | 41 +++++++++++++++++++++++++++++++++++++++
1 file changed, 41 insertions(+)
diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h
index 9fa3ae324731..f8456cda5cda 100644
--- a/include/uapi/drm/xe_drm.h
+++ b/include/uapi/drm/xe_drm.h
@@ -1340,6 +1340,47 @@ struct drm_xe_wait_user_fence {
__u64 reserved[2];
};
+/**
+ * DOC: XE PMU event config IDs
+ *
+ * Check 'man perf_event_open' to use the ID's DRM_XE_PMU_XXXX listed in xe_drm.h
+ * in 'struct perf_event_attr' as part of perf_event_open syscall to read a
+ * particular event.
+ *
+ */
+
+/**
+ * enum drm_xe_pmu_engine_sample - Supported PMU engine samples
+ */
+enum drm_xe_pmu_engine_sample {
+ /** @DRM_XE_PMU_SAMPLE_BUSY_TICKS: Engine busy ticks */
+ DRM_XE_PMU_SAMPLE_BUSY_TICKS = 0,
+};
+
+/*
+ * Top bits of every counter are GT id.
+ */
+#define __DRM_XE_PMU_GT_SHIFT (56)
+#define __DRM_XE_PMU_SAMPLE_BITS (4)
+#define __DRM_XE_PMU_SAMPLE_INSTANCE_BITS (8)
+#define __DRM_XE_PMU_CLASS_SHIFT \
+ (__DRM_XE_PMU_SAMPLE_BITS + __DRM_XE_PMU_SAMPLE_INSTANCE_BITS)
+
+#define __DRM_XE_PMU_GT_EVENT(gt, x) \
+ (((__u64)(x)) | ((__u64)(gt) << __DRM_XE_PMU_GT_SHIFT))
+
+#define __DRM_XE_PMU_ENGINE(class, instance, sample) \
+ (((class) << __DRM_XE_PMU_CLASS_SHIFT | \
+ (instance) << __DRM_XE_PMU_SAMPLE_BITS | \
+ (sample)))
+
+#define __DRM_XE_PMU_OTHER(gt, x) \
+ (__DRM_XE_PMU_GT_EVENT(gt, 0xfffff) + 1 + (x))
+
+#define DRM_XE_PMU_ENGINE_BUSY_TICKS(gt, class, instance) \
+ __DRM_XE_PMU_GT_EVENT(gt, __DRM_XE_PMU_ENGINE(class, instance, \
+ DRM_XE_PMU_SAMPLE_BUSY_TICKS))
+
#if defined(__cplusplus)
}
#endif
--
2.40.0
^ permalink raw reply related [flat|nested] 23+ messages in thread* Re: [PATCH v4 3/8] drm/xe/uapi: Add configs for Engine busyness
2023-12-22 7:45 ` [PATCH v4 3/8] drm/xe/uapi: Add configs for Engine busyness Riana Tauro
@ 2024-01-03 5:26 ` Aravind Iddamsetty
2024-01-03 6:40 ` Riana Tauro
0 siblings, 1 reply; 23+ messages in thread
From: Aravind Iddamsetty @ 2024-01-03 5:26 UTC (permalink / raw)
To: Riana Tauro, intel-xe
On 12/22/23 13:15, Riana Tauro wrote:
> GuC provides engine busyness ticks as a 64 bit counter which count
> as clock ticks.
>
> Add configs to the uapi to expose Engine busyness via PMU.
>
> v2: add "__" prefix for internal helpers
> add a simple helper for application usage (Aravind)
>
> v3: rebase
> change internal uapi pmu config helpers (Umesh)
>
> Cc: Aravind Iddamsetty <aravind.iddamsetty@linux.intel.com>
> Signed-off-by: Riana Tauro <riana.tauro@intel.com>
> ---
> include/uapi/drm/xe_drm.h | 41 +++++++++++++++++++++++++++++++++++++++
> 1 file changed, 41 insertions(+)
>
> diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h
> index 9fa3ae324731..f8456cda5cda 100644
> --- a/include/uapi/drm/xe_drm.h
> +++ b/include/uapi/drm/xe_drm.h
> @@ -1340,6 +1340,47 @@ struct drm_xe_wait_user_fence {
> __u64 reserved[2];
> };
>
> +/**
> + * DOC: XE PMU event config IDs
> + *
> + * Check 'man perf_event_open' to use the ID's DRM_XE_PMU_XXXX listed in xe_drm.h
> + * in 'struct perf_event_attr' as part of perf_event_open syscall to read a
> + * particular event.
> + *
> + */
> +
> +/**
> + * enum drm_xe_pmu_engine_sample - Supported PMU engine samples
> + */
> +enum drm_xe_pmu_engine_sample {
> + /** @DRM_XE_PMU_SAMPLE_BUSY_TICKS: Engine busy ticks */
> + DRM_XE_PMU_SAMPLE_BUSY_TICKS = 0,
> +};
> +
> +/*
> + * Top bits of every counter are GT id.
> + */
> +#define __DRM_XE_PMU_GT_SHIFT (56)
> +#define __DRM_XE_PMU_SAMPLE_BITS (4)
> +#define __DRM_XE_PMU_SAMPLE_INSTANCE_BITS (8)
> +#define __DRM_XE_PMU_CLASS_SHIFT \
> + (__DRM_XE_PMU_SAMPLE_BITS + __DRM_XE_PMU_SAMPLE_INSTANCE_BITS)
> +
> +#define __DRM_XE_PMU_GT_EVENT(gt, x) \
> + (((__u64)(x)) | ((__u64)(gt) << __DRM_XE_PMU_GT_SHIFT))
> +
> +#define __DRM_XE_PMU_ENGINE(class, instance, sample) \
> + (((class) << __DRM_XE_PMU_CLASS_SHIFT | \
> + (instance) << __DRM_XE_PMU_SAMPLE_BITS | \
> + (sample)))
> +
> +#define __DRM_XE_PMU_OTHER(gt, x) \
> + (__DRM_XE_PMU_GT_EVENT(gt, 0xfffff) + 1 + (x))
Use __DRM_XE_PMU_ENGINE(0xff, 0xff, 0xf) instead of 0xfffff so that it will be clear
that it is starting after the engine event.
But __DRM_XE_PMU_OTHER is not used any where so why to introduce in this patch.
Thanks,
Aravind.
> +
> +#define DRM_XE_PMU_ENGINE_BUSY_TICKS(gt, class, instance) \
> + __DRM_XE_PMU_GT_EVENT(gt, __DRM_XE_PMU_ENGINE(class, instance, \
> + DRM_XE_PMU_SAMPLE_BUSY_TICKS))
> +
> #if defined(__cplusplus)
> }
> #endif
^ permalink raw reply [flat|nested] 23+ messages in thread* Re: [PATCH v4 3/8] drm/xe/uapi: Add configs for Engine busyness
2024-01-03 5:26 ` Aravind Iddamsetty
@ 2024-01-03 6:40 ` Riana Tauro
2024-01-03 7:02 ` Aravind Iddamsetty
0 siblings, 1 reply; 23+ messages in thread
From: Riana Tauro @ 2024-01-03 6:40 UTC (permalink / raw)
To: Aravind Iddamsetty, intel-xe
Hi Aravind
On 1/3/2024 10:56 AM, Aravind Iddamsetty wrote:
>
> On 12/22/23 13:15, Riana Tauro wrote:
>> GuC provides engine busyness ticks as a 64 bit counter which count
>> as clock ticks.
>>
>> Add configs to the uapi to expose Engine busyness via PMU.
>>
>> v2: add "__" prefix for internal helpers
>> add a simple helper for application usage (Aravind)
>>
>> v3: rebase
>> change internal uapi pmu config helpers (Umesh)
>>
>> Cc: Aravind Iddamsetty <aravind.iddamsetty@linux.intel.com>
>> Signed-off-by: Riana Tauro <riana.tauro@intel.com>
>> ---
>> include/uapi/drm/xe_drm.h | 41 +++++++++++++++++++++++++++++++++++++++
>> 1 file changed, 41 insertions(+)
>>
>> diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h
>> index 9fa3ae324731..f8456cda5cda 100644
>> --- a/include/uapi/drm/xe_drm.h
>> +++ b/include/uapi/drm/xe_drm.h
>> @@ -1340,6 +1340,47 @@ struct drm_xe_wait_user_fence {
>> __u64 reserved[2];
>> };
>>
>> +/**
>> + * DOC: XE PMU event config IDs
>> + *
>> + * Check 'man perf_event_open' to use the ID's DRM_XE_PMU_XXXX listed in xe_drm.h
>> + * in 'struct perf_event_attr' as part of perf_event_open syscall to read a
>> + * particular event.
>> + *
>> + */
>> +
>> +/**
>> + * enum drm_xe_pmu_engine_sample - Supported PMU engine samples
>> + */
>> +enum drm_xe_pmu_engine_sample {
>> + /** @DRM_XE_PMU_SAMPLE_BUSY_TICKS: Engine busy ticks */
>> + DRM_XE_PMU_SAMPLE_BUSY_TICKS = 0,
>> +};
>> +
>> +/*
>> + * Top bits of every counter are GT id.
>> + */
>> +#define __DRM_XE_PMU_GT_SHIFT (56)
>> +#define __DRM_XE_PMU_SAMPLE_BITS (4)
>> +#define __DRM_XE_PMU_SAMPLE_INSTANCE_BITS (8)
>> +#define __DRM_XE_PMU_CLASS_SHIFT \
>> + (__DRM_XE_PMU_SAMPLE_BITS + __DRM_XE_PMU_SAMPLE_INSTANCE_BITS)
>> +
>> +#define __DRM_XE_PMU_GT_EVENT(gt, x) \
>> + (((__u64)(x)) | ((__u64)(gt) << __DRM_XE_PMU_GT_SHIFT))
>> +
>> +#define __DRM_XE_PMU_ENGINE(class, instance, sample) \
>> + (((class) << __DRM_XE_PMU_CLASS_SHIFT | \
>> + (instance) << __DRM_XE_PMU_SAMPLE_BITS | \
>> + (sample)))
>> +
>> +#define __DRM_XE_PMU_OTHER(gt, x) \
>> + (__DRM_XE_PMU_GT_EVENT(gt, 0xfffff) + 1 + (x))
> Use __DRM_XE_PMU_ENGINE(0xff, 0xff, 0xf) instead of 0xfffff so that it will be clear
> that it is starting after the engine event.
Previous comments from Umesh suggested to use 0xfffff so changed it from
rev3.
>
> But __DRM_XE_PMU_OTHER is not used any where so why to introduce in this patch.
I added it here so that it can be used in the next patch to check if its
engine event.
Will move total active ticks config also to the same patch so that all
configs are in one patch.
Thanks
Riana
>
> Thanks,
> Aravind.
>> +
>> +#define DRM_XE_PMU_ENGINE_BUSY_TICKS(gt, class, instance) \
>> + __DRM_XE_PMU_GT_EVENT(gt, __DRM_XE_PMU_ENGINE(class, instance, \
>> + DRM_XE_PMU_SAMPLE_BUSY_TICKS))
>> +
>> #if defined(__cplusplus)
>> }
>> #endif
^ permalink raw reply [flat|nested] 23+ messages in thread* Re: [PATCH v4 3/8] drm/xe/uapi: Add configs for Engine busyness
2024-01-03 6:40 ` Riana Tauro
@ 2024-01-03 7:02 ` Aravind Iddamsetty
2024-01-03 7:06 ` Riana Tauro
0 siblings, 1 reply; 23+ messages in thread
From: Aravind Iddamsetty @ 2024-01-03 7:02 UTC (permalink / raw)
To: Riana Tauro, intel-xe
On 1/3/24 12:10, Riana Tauro wrote:
>
> Hi Aravind
>
> On 1/3/2024 10:56 AM, Aravind Iddamsetty wrote:
>>
>> On 12/22/23 13:15, Riana Tauro wrote:
>>> GuC provides engine busyness ticks as a 64 bit counter which count
>>> as clock ticks.
>>>
>>> Add configs to the uapi to expose Engine busyness via PMU.
>>>
>>> v2: add "__" prefix for internal helpers
>>> add a simple helper for application usage (Aravind)
>>>
>>> v3: rebase
>>> change internal uapi pmu config helpers (Umesh)
>>>
>>> Cc: Aravind Iddamsetty <aravind.iddamsetty@linux.intel.com>
>>> Signed-off-by: Riana Tauro <riana.tauro@intel.com>
>>> ---
>>> include/uapi/drm/xe_drm.h | 41 +++++++++++++++++++++++++++++++++++++++
>>> 1 file changed, 41 insertions(+)
>>>
>>> diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h
>>> index 9fa3ae324731..f8456cda5cda 100644
>>> --- a/include/uapi/drm/xe_drm.h
>>> +++ b/include/uapi/drm/xe_drm.h
>>> @@ -1340,6 +1340,47 @@ struct drm_xe_wait_user_fence {
>>> __u64 reserved[2];
>>> };
>>> +/**
>>> + * DOC: XE PMU event config IDs
>>> + *
>>> + * Check 'man perf_event_open' to use the ID's DRM_XE_PMU_XXXX listed in xe_drm.h
>>> + * in 'struct perf_event_attr' as part of perf_event_open syscall to read a
>>> + * particular event.
>>> + *
>>> + */
>>> +
>>> +/**
>>> + * enum drm_xe_pmu_engine_sample - Supported PMU engine samples
>>> + */
>>> +enum drm_xe_pmu_engine_sample {
>>> + /** @DRM_XE_PMU_SAMPLE_BUSY_TICKS: Engine busy ticks */
>>> + DRM_XE_PMU_SAMPLE_BUSY_TICKS = 0,
>>> +};
>>> +
>>> +/*
>>> + * Top bits of every counter are GT id.
>>> + */
>>> +#define __DRM_XE_PMU_GT_SHIFT (56)
>>> +#define __DRM_XE_PMU_SAMPLE_BITS (4)
>>> +#define __DRM_XE_PMU_SAMPLE_INSTANCE_BITS (8)
>>> +#define __DRM_XE_PMU_CLASS_SHIFT \
>>> + (__DRM_XE_PMU_SAMPLE_BITS + __DRM_XE_PMU_SAMPLE_INSTANCE_BITS)
>>> +
>>> +#define __DRM_XE_PMU_GT_EVENT(gt, x) \
>>> + (((__u64)(x)) | ((__u64)(gt) << __DRM_XE_PMU_GT_SHIFT))
>>> +
>>> +#define __DRM_XE_PMU_ENGINE(class, instance, sample) \
>>> + (((class) << __DRM_XE_PMU_CLASS_SHIFT | \
>>> + (instance) << __DRM_XE_PMU_SAMPLE_BITS | \
>>> + (sample)))
>>> +
>>> +#define __DRM_XE_PMU_OTHER(gt, x) \
>>> + (__DRM_XE_PMU_GT_EVENT(gt, 0xfffff) + 1 + (x))
>> Use __DRM_XE_PMU_ENGINE(0xff, 0xff, 0xf) instead of 0xfffff so that it will be clear
>> that it is starting after the engine event.
> Previous comments from Umesh suggested to use 0xfffff so changed it from rev3.
looking back at the comment, Umesh corrected it to be 0xfffff, so using as suggested by me above
looks to be more apt.
>>
>> But __DRM_XE_PMU_OTHER is not used any where so why to introduce in this patch.
> I added it here so that it can be used in the next patch to check if its engine event.
>
> Will move total active ticks config also to the same patch so that all configs are in one patch.
then you should define __DRM_XE_PMU_OTHER where active ticks is being introduced not viceversa.
Thanks,
Aravind.
>
> Thanks
> Riana
>
>
>>
>> Thanks,
>> Aravind.
>>> +
>>> +#define DRM_XE_PMU_ENGINE_BUSY_TICKS(gt, class, instance) \
>>> + __DRM_XE_PMU_GT_EVENT(gt, __DRM_XE_PMU_ENGINE(class, instance, \
>>> + DRM_XE_PMU_SAMPLE_BUSY_TICKS))
>>> +
>>> #if defined(__cplusplus)
>>> }
>>> #endif
^ permalink raw reply [flat|nested] 23+ messages in thread* Re: [PATCH v4 3/8] drm/xe/uapi: Add configs for Engine busyness
2024-01-03 7:02 ` Aravind Iddamsetty
@ 2024-01-03 7:06 ` Riana Tauro
2024-01-03 7:08 ` Riana Tauro
0 siblings, 1 reply; 23+ messages in thread
From: Riana Tauro @ 2024-01-03 7:06 UTC (permalink / raw)
To: Aravind Iddamsetty, intel-xe
On 1/3/2024 12:32 PM, Aravind Iddamsetty wrote:
>
> On 1/3/24 12:10, Riana Tauro wrote:
>>
>> Hi Aravind
>>
>> On 1/3/2024 10:56 AM, Aravind Iddamsetty wrote:
>>>
>>> On 12/22/23 13:15, Riana Tauro wrote:
>>>> GuC provides engine busyness ticks as a 64 bit counter which count
>>>> as clock ticks.
>>>>
>>>> Add configs to the uapi to expose Engine busyness via PMU.
>>>>
>>>> v2: add "__" prefix for internal helpers
>>>> add a simple helper for application usage (Aravind)
>>>>
>>>> v3: rebase
>>>> change internal uapi pmu config helpers (Umesh)
>>>>
>>>> Cc: Aravind Iddamsetty <aravind.iddamsetty@linux.intel.com>
>>>> Signed-off-by: Riana Tauro <riana.tauro@intel.com>
>>>> ---
>>>> include/uapi/drm/xe_drm.h | 41 +++++++++++++++++++++++++++++++++++++++
>>>> 1 file changed, 41 insertions(+)
>>>>
>>>> diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h
>>>> index 9fa3ae324731..f8456cda5cda 100644
>>>> --- a/include/uapi/drm/xe_drm.h
>>>> +++ b/include/uapi/drm/xe_drm.h
>>>> @@ -1340,6 +1340,47 @@ struct drm_xe_wait_user_fence {
>>>> __u64 reserved[2];
>>>> };
>>>> +/**
>>>> + * DOC: XE PMU event config IDs
>>>> + *
>>>> + * Check 'man perf_event_open' to use the ID's DRM_XE_PMU_XXXX listed in xe_drm.h
>>>> + * in 'struct perf_event_attr' as part of perf_event_open syscall to read a
>>>> + * particular event.
>>>> + *
>>>> + */
>>>> +
>>>> +/**
>>>> + * enum drm_xe_pmu_engine_sample - Supported PMU engine samples
>>>> + */
>>>> +enum drm_xe_pmu_engine_sample {
>>>> + /** @DRM_XE_PMU_SAMPLE_BUSY_TICKS: Engine busy ticks */
>>>> + DRM_XE_PMU_SAMPLE_BUSY_TICKS = 0,
>>>> +};
>>>> +
>>>> +/*
>>>> + * Top bits of every counter are GT id.
>>>> + */
>>>> +#define __DRM_XE_PMU_GT_SHIFT (56)
>>>> +#define __DRM_XE_PMU_SAMPLE_BITS (4)
>>>> +#define __DRM_XE_PMU_SAMPLE_INSTANCE_BITS (8)
>>>> +#define __DRM_XE_PMU_CLASS_SHIFT \
>>>> + (__DRM_XE_PMU_SAMPLE_BITS + __DRM_XE_PMU_SAMPLE_INSTANCE_BITS)
>>>> +
>>>> +#define __DRM_XE_PMU_GT_EVENT(gt, x) \
>>>> + (((__u64)(x)) | ((__u64)(gt) << __DRM_XE_PMU_GT_SHIFT))
>>>> +
>>>> +#define __DRM_XE_PMU_ENGINE(class, instance, sample) \
>>>> + (((class) << __DRM_XE_PMU_CLASS_SHIFT | \
>>>> + (instance) << __DRM_XE_PMU_SAMPLE_BITS | \
>>>> + (sample)))
>>>> +
>>>> +#define __DRM_XE_PMU_OTHER(gt, x) \
>>>> + (__DRM_XE_PMU_GT_EVENT(gt, 0xfffff) + 1 + (x))
>>> Use __DRM_XE_PMU_ENGINE(0xff, 0xff, 0xf) instead of 0xfffff so that it will be clear
>>> that it is starting after the engine event.
>> Previous comments from Umesh suggested to use 0xfffff so changed it from rev3.
> looking back at the comment, Umesh corrected it to be 0xfffff, so using as suggested by me above
> looks to be more apt.
Initially Group busyness took the first few configs, so i had added a
different offset.
Anyway ___DRM_XE_PMU_GT_EVENT(gt, 0xfffff) is same as
__DRM_XE_PMU_ENGINE_EVENT(0xff, 0xff, 0xf). So will change it as you
suggested.
>>>
>>> But __DRM_XE_PMU_OTHER is not used any where so why to introduce in this patch.
>> I added it here so that it can be used in the next patch to check if its engine event.
>>
>> Will move total active ticks config also to the same patch so that all configs are in one patch.
>
> then you should define __DRM_XE_PMU_OTHER where active ticks is being introduced not viceversa.Okay will move it to the total active ticks patch.
Thanks
Riana
>
> Thanks,
> Aravind.
>>
>> Thanks
>> Riana
>>
>>
>>>
>>> Thanks,
>>> Aravind.
>>>> +
>>>> +#define DRM_XE_PMU_ENGINE_BUSY_TICKS(gt, class, instance) \
>>>> + __DRM_XE_PMU_GT_EVENT(gt, __DRM_XE_PMU_ENGINE(class, instance, \
>>>> + DRM_XE_PMU_SAMPLE_BUSY_TICKS))
>>>> +
>>>> #if defined(__cplusplus)
>>>> }
>>>> #endif
^ permalink raw reply [flat|nested] 23+ messages in thread* Re: [PATCH v4 3/8] drm/xe/uapi: Add configs for Engine busyness
2024-01-03 7:06 ` Riana Tauro
@ 2024-01-03 7:08 ` Riana Tauro
0 siblings, 0 replies; 23+ messages in thread
From: Riana Tauro @ 2024-01-03 7:08 UTC (permalink / raw)
To: Aravind Iddamsetty, intel-xe
On 1/3/2024 12:36 PM, Riana Tauro wrote:
>
>
> On 1/3/2024 12:32 PM, Aravind Iddamsetty wrote:
>>
>> On 1/3/24 12:10, Riana Tauro wrote:
>>>
>>> Hi Aravind
>>>
>>> On 1/3/2024 10:56 AM, Aravind Iddamsetty wrote:
>>>>
>>>> On 12/22/23 13:15, Riana Tauro wrote:
>>>>> GuC provides engine busyness ticks as a 64 bit counter which count
>>>>> as clock ticks.
>>>>>
>>>>> Add configs to the uapi to expose Engine busyness via PMU.
>>>>>
>>>>> v2: add "__" prefix for internal helpers
>>>>> add a simple helper for application usage (Aravind)
>>>>>
>>>>> v3: rebase
>>>>> change internal uapi pmu config helpers (Umesh)
>>>>>
>>>>> Cc: Aravind Iddamsetty <aravind.iddamsetty@linux.intel.com>
>>>>> Signed-off-by: Riana Tauro <riana.tauro@intel.com>
>>>>> ---
>>>>> include/uapi/drm/xe_drm.h | 41
>>>>> +++++++++++++++++++++++++++++++++++++++
>>>>> 1 file changed, 41 insertions(+)
>>>>>
>>>>> diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h
>>>>> index 9fa3ae324731..f8456cda5cda 100644
>>>>> --- a/include/uapi/drm/xe_drm.h
>>>>> +++ b/include/uapi/drm/xe_drm.h
>>>>> @@ -1340,6 +1340,47 @@ struct drm_xe_wait_user_fence {
>>>>> __u64 reserved[2];
>>>>> };
>>>>> +/**
>>>>> + * DOC: XE PMU event config IDs
>>>>> + *
>>>>> + * Check 'man perf_event_open' to use the ID's DRM_XE_PMU_XXXX
>>>>> listed in xe_drm.h
>>>>> + * in 'struct perf_event_attr' as part of perf_event_open syscall
>>>>> to read a
>>>>> + * particular event.
>>>>> + *
>>>>> + */
>>>>> +
>>>>> +/**
>>>>> + * enum drm_xe_pmu_engine_sample - Supported PMU engine samples
>>>>> + */
>>>>> +enum drm_xe_pmu_engine_sample {
>>>>> + /** @DRM_XE_PMU_SAMPLE_BUSY_TICKS: Engine busy ticks */
>>>>> + DRM_XE_PMU_SAMPLE_BUSY_TICKS = 0,
>>>>> +};
>>>>> +
>>>>> +/*
>>>>> + * Top bits of every counter are GT id.
>>>>> + */
>>>>> +#define __DRM_XE_PMU_GT_SHIFT (56)
>>>>> +#define __DRM_XE_PMU_SAMPLE_BITS (4)
>>>>> +#define __DRM_XE_PMU_SAMPLE_INSTANCE_BITS (8)
>>>>> +#define __DRM_XE_PMU_CLASS_SHIFT \
>>>>> + (__DRM_XE_PMU_SAMPLE_BITS + __DRM_XE_PMU_SAMPLE_INSTANCE_BITS)
>>>>> +
>>>>> +#define __DRM_XE_PMU_GT_EVENT(gt, x) \
>>>>> + (((__u64)(x)) | ((__u64)(gt) << __DRM_XE_PMU_GT_SHIFT))
>>>>> +
>>>>> +#define __DRM_XE_PMU_ENGINE(class, instance, sample) \
>>>>> + (((class) << __DRM_XE_PMU_CLASS_SHIFT | \
>>>>> + (instance) << __DRM_XE_PMU_SAMPLE_BITS | \
>>>>> + (sample)))
>>>>> +
>>>>> +#define __DRM_XE_PMU_OTHER(gt, x) \
>>>>> + (__DRM_XE_PMU_GT_EVENT(gt, 0xfffff) + 1 + (x))
>>>> Use __DRM_XE_PMU_ENGINE(0xff, 0xff, 0xf) instead of 0xfffff so that
>>>> it will be clear
>>>> that it is starting after the engine event.
>>> Previous comments from Umesh suggested to use 0xfffff so changed it
>>> from rev3.
>> looking back at the comment, Umesh corrected it to be 0xfffff, so
>> using as suggested by me above
>> looks to be more apt.
> Initially Group busyness took the first few configs, so i had added a
> different offset.
>
> Anyway ___DRM_XE_PMU_GT_EVENT(gt, 0xfffff) is same as
> __DRM_XE_PMU_ENGINE_EVENT(0xff, 0xff, 0xf). So will change it as you
> suggested.
>
0xfffff is same as __DRM_XE_PMU_ENGINE_EVENT(0xff, 0xff, 0xf).
So will add it as
___DRM_XE_PMU_GT_EVENT(gt, __DRM_XE_PMU_ENGINE_EVENT(0xff, 0xff, 0xf))
>>>>
>>>> But __DRM_XE_PMU_OTHER is not used any where so why to introduce in
>>>> this patch.
>>> I added it here so that it can be used in the next patch to check if
>>> its engine event.
>>>
>>> Will move total active ticks config also to the same patch so that
>>> all configs are in one patch.
>>
>> then you should define __DRM_XE_PMU_OTHER where active ticks is being
>> introduced not viceversa.Okay will move it to the total active ticks
>> patch.
>
> Thanks
> Riana
>>
>> Thanks,
>> Aravind.
>>>
>>> Thanks
>>> Riana
>>>
>>>
>>>>
>>>> Thanks,
>>>> Aravind.
>>>>> +
>>>>> +#define DRM_XE_PMU_ENGINE_BUSY_TICKS(gt, class, instance) \
>>>>> + __DRM_XE_PMU_GT_EVENT(gt, __DRM_XE_PMU_ENGINE(class, instance, \
>>>>> + DRM_XE_PMU_SAMPLE_BUSY_TICKS))
>>>>> +
>>>>> #if defined(__cplusplus)
>>>>> }
>>>>> #endif
^ permalink raw reply [flat|nested] 23+ messages in thread
* [PATCH v4 4/8] drm/xe/pmu: Enable PMU interface and add engine busyness counter
2023-12-22 7:45 [PATCH v4 0/8] Engine busyness Riana Tauro
` (2 preceding siblings ...)
2023-12-22 7:45 ` [PATCH v4 3/8] drm/xe/uapi: Add configs for Engine busyness Riana Tauro
@ 2023-12-22 7:45 ` Riana Tauro
2024-01-03 5:03 ` Aravind Iddamsetty
2023-12-22 7:45 ` [PATCH v4 5/8] drm/xe/guc: Add PMU counter for total active ticks Riana Tauro
` (3 subsequent siblings)
7 siblings, 1 reply; 23+ messages in thread
From: Riana Tauro @ 2023-12-22 7:45 UTC (permalink / raw)
To: intel-xe
From: Aravind Iddamsetty <aravind.iddamsetty@linux.intel.com>
This patch adds the PMU base implementation along with engine busyness
counters.
GuC provides engine busyness ticks as a 64 bit counter which count
as clock ticks. These counters are maintained in a
shared memory buffer and internally updated on a continuous basis.
This is listed by perf tool as
sudo ./perf list
xe_0000_03_00.0/bcs0-busy-ticks-gt0/ [Kernel PMU event]
xe_0000_03_00.0/ccs0-busy-ticks-gt0/ [Kernel PMU event]
xe_0000_03_00.0/rcs0-busy-ticks-gt0/ [Kernel PMU event]
xe_0000_03_00.0/vcs0-busy-ticks-gt0/ [Kernel PMU event]
xe_0000_03_00.0/vecs0-busy-ticks-gt0/ [Kernel PMU event]
and read as
sudo ./perf stat -e xe_0000_03_00.0/bcs0-busy-ticks-gt0/ -I 1000
time counts unit events
1.000674178 2052 xe_0000_03_00.0/bcs0-busy-ticks-gt0/
2.006626312 2033 xe_0000_03_00.0/bcs0-busy-ticks-gt0/
3.009499300 40067 xe_0000_03_00.0/bcs0-busy-ticks-gt0/
4.010521486 8491 xe_0000_03_00.0/bcs0-busy-ticks-gt0/
The pmu base implementation is taken from i915.
v2: rebase
v3: add engine busyness
v4: change internal uapi helpers (Umesh)
Co-developed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Co-developed-by: Bommu Krishnaiah <krishnaiah.bommu@intel.com>
Signed-off-by: Bommu Krishnaiah <krishnaiah.bommu@intel.com>
Co-developed-by: Riana Tauro <riana.tauro@intel.com>
Signed-off-by: Riana Tauro <riana.tauro@intel.com>
Signed-off-by: Aravind Iddamsetty <aravind.iddamsetty@linux.intel.com>
---
drivers/gpu/drm/xe/Makefile | 2 +
drivers/gpu/drm/xe/xe_device.c | 2 +
drivers/gpu/drm/xe/xe_device_types.h | 4 +
drivers/gpu/drm/xe/xe_module.c | 5 +
drivers/gpu/drm/xe/xe_pmu.c | 543 +++++++++++++++++++++++++++
drivers/gpu/drm/xe/xe_pmu.h | 23 ++
drivers/gpu/drm/xe/xe_pmu_types.h | 49 +++
7 files changed, 628 insertions(+)
create mode 100644 drivers/gpu/drm/xe/xe_pmu.c
create mode 100644 drivers/gpu/drm/xe/xe_pmu.h
create mode 100644 drivers/gpu/drm/xe/xe_pmu_types.h
diff --git a/drivers/gpu/drm/xe/Makefile b/drivers/gpu/drm/xe/Makefile
index 26988c1c732a..4cd8bae783ab 100644
--- a/drivers/gpu/drm/xe/Makefile
+++ b/drivers/gpu/drm/xe/Makefile
@@ -296,6 +296,8 @@ endif
obj-$(CONFIG_DRM_XE) += xe.o
obj-$(CONFIG_DRM_XE_KUNIT_TEST) += tests/
+xe-$(CONFIG_PERF_EVENTS) += xe_pmu.o
+
# header test
hdrtest_find_args := -not -path xe_rtp_helpers.h
ifneq ($(CONFIG_DRM_XE_DISPLAY),y)
diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c
index 86867d42d532..a7388aeffc28 100644
--- a/drivers/gpu/drm/xe/xe_device.c
+++ b/drivers/gpu/drm/xe/xe_device.c
@@ -538,6 +538,8 @@ int xe_device_probe(struct xe_device *xe)
xe_hwmon_register(xe);
+ xe_pmu_register(&xe->pmu);
+
err = drmm_add_action_or_reset(&xe->drm, xe_device_sanitize, xe);
if (err)
return err;
diff --git a/drivers/gpu/drm/xe/xe_device_types.h b/drivers/gpu/drm/xe/xe_device_types.h
index 71f23ac365e6..c738c03ded88 100644
--- a/drivers/gpu/drm/xe/xe_device_types.h
+++ b/drivers/gpu/drm/xe/xe_device_types.h
@@ -19,6 +19,7 @@
#include "xe_memirq_types.h"
#include "xe_platform_types.h"
#include "xe_pt_types.h"
+#include "xe_pmu.h"
#include "xe_sriov_types.h"
#include "xe_step_types.h"
@@ -479,6 +480,9 @@ struct xe_device {
/* To shut up runtime pm macros.. */
struct xe_runtime_pm {} runtime_pm;
+ /** @pmu: performance monitoring unit */
+ struct xe_pmu pmu;
+
/* For pcode */
struct mutex sb_lock;
diff --git a/drivers/gpu/drm/xe/xe_module.c b/drivers/gpu/drm/xe/xe_module.c
index 110b69864656..51bf69b7ab22 100644
--- a/drivers/gpu/drm/xe/xe_module.c
+++ b/drivers/gpu/drm/xe/xe_module.c
@@ -11,6 +11,7 @@
#include "xe_drv.h"
#include "xe_hw_fence.h"
#include "xe_pci.h"
+#include "xe_pmu.h"
#include "xe_sched_job.h"
struct xe_modparam xe_modparam = {
@@ -62,6 +63,10 @@ static const struct init_funcs init_funcs[] = {
.init = xe_sched_job_module_init,
.exit = xe_sched_job_module_exit,
},
+ {
+ .init = xe_pmu_init,
+ .exit = xe_pmu_exit,
+ },
{
.init = xe_register_pci_driver,
.exit = xe_unregister_pci_driver,
diff --git a/drivers/gpu/drm/xe/xe_pmu.c b/drivers/gpu/drm/xe/xe_pmu.c
new file mode 100644
index 000000000000..371ca6d7e215
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_pmu.c
@@ -0,0 +1,543 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#include <drm/drm_drv.h>
+#include <drm/drm_managed.h>
+#include <drm/xe_drm.h>
+
+#include "xe_device.h"
+#include "xe_gt.h"
+
+#define XE_ENGINE_SAMPLE_COUNT (DRM_XE_PMU_SAMPLE_BUSY_TICKS + 1)
+
+static cpumask_t xe_pmu_cpumask;
+static unsigned int xe_pmu_target_cpu = -1;
+
+static unsigned int config_gt_id(const u64 config)
+{
+ return config >> __DRM_XE_PMU_GT_SHIFT;
+}
+
+static u64 config_counter(const u64 config)
+{
+ return config & ~(~0ULL << __DRM_XE_PMU_GT_SHIFT);
+}
+
+static u8 engine_event_sample(struct perf_event *event)
+{
+ u64 config = event->attr.config;
+
+ return config_counter(config) & 0xf;
+}
+
+static u8 engine_event_class(struct perf_event *event)
+{
+ u64 config = event->attr.config;
+
+ return (config_counter(config) >> __DRM_XE_PMU_CLASS_SHIFT) & 0xff;
+}
+
+static u8 engine_event_instance(struct perf_event *event)
+{
+ u64 config = event->attr.config;
+
+ return (config_counter(config) >> __DRM_XE_PMU_SAMPLE_BITS) & 0xff;
+}
+
+static bool is_engine_event(struct perf_event *event)
+{
+ return config_counter(event->attr.config) < __DRM_XE_PMU_OTHER(0, 0);
+}
+
+static int engine_event_status(struct xe_hw_engine *hwe,
+ enum drm_xe_pmu_engine_sample sample)
+{
+ if (!hwe)
+ return -ENODEV;
+
+ /* Other engine events will be added, XE_ENGINE_SAMPLE_COUNT will be changed */
+ return (sample >= DRM_XE_PMU_SAMPLE_BUSY_TICKS && sample < XE_ENGINE_SAMPLE_COUNT)
+ ? 0 : -ENOENT;
+}
+
+static int engine_event_init(struct perf_event *event)
+{
+ struct xe_device *xe = container_of(event->pmu, typeof(*xe), pmu.base);
+ const u64 config = event->attr.config;
+ const unsigned int gt_id = config_gt_id(config);
+ struct xe_gt *gt = xe_device_get_gt(xe, gt_id);
+ struct xe_hw_engine *hwe;
+
+ hwe = xe_gt_hw_engine(gt, xe_hw_engine_from_user_class(engine_event_class(event)),
+ engine_event_instance(event), true);
+
+ return engine_event_status(hwe, engine_event_sample(event));
+}
+
+static void xe_pmu_event_destroy(struct perf_event *event)
+{
+ struct xe_device *xe =
+ container_of(event->pmu, typeof(*xe), pmu.base);
+
+ drm_WARN_ON(&xe->drm, event->parent);
+
+ drm_dev_put(&xe->drm);
+}
+
+static int xe_pmu_event_init(struct perf_event *event)
+{
+ struct xe_device *xe =
+ container_of(event->pmu, typeof(*xe), pmu.base);
+ struct xe_pmu *pmu = &xe->pmu;
+ int ret;
+
+ if (pmu->closed)
+ return -ENODEV;
+
+ if (event->attr.type != event->pmu->type)
+ return -ENOENT;
+
+ /* unsupported modes and filters */
+ if (event->attr.sample_period) /* no sampling */
+ return -EINVAL;
+
+ if (has_branch_stack(event))
+ return -EOPNOTSUPP;
+
+ if (event->cpu < 0)
+ return -EINVAL;
+
+ /* only allow running on one cpu at a time */
+ if (!cpumask_test_cpu(event->cpu, &xe_pmu_cpumask))
+ return -EINVAL;
+
+ if (is_engine_event(event)) {
+ ret = engine_event_init(event);
+ if (ret)
+ return ret;
+ }
+
+ if (!event->parent) {
+ drm_dev_get(&xe->drm);
+ event->destroy = xe_pmu_event_destroy;
+ }
+
+ return 0;
+}
+
+static u64 __xe_pmu_event_read(struct perf_event *event)
+{
+ struct xe_device *xe =
+ container_of(event->pmu, typeof(*xe), pmu.base);
+ const unsigned int gt_id = config_gt_id(event->attr.config);
+ struct xe_gt *gt = xe_device_get_gt(xe, gt_id);
+ u64 val;
+
+ if (is_engine_event(event)) {
+ u8 sample = engine_event_sample(event);
+ struct xe_hw_engine *hwe;
+
+ hwe = xe_gt_hw_engine(gt, xe_hw_engine_from_user_class(engine_event_class(event)),
+ engine_event_instance(event), true);
+ if (!hwe)
+ drm_WARN_ON_ONCE(&xe->drm, "unknown engine\n");
+ else if (sample == DRM_XE_PMU_SAMPLE_BUSY_TICKS)
+ val = xe_gt_engine_busy_ticks(gt, hwe);
+ else
+ drm_warn(&xe->drm, "unknown pmu engine event\n");
+ }
+
+ return val;
+}
+
+static void xe_pmu_event_read(struct perf_event *event)
+{
+ struct xe_device *xe =
+ container_of(event->pmu, typeof(*xe), pmu.base);
+ struct hw_perf_event *hwc = &event->hw;
+ struct xe_pmu *pmu = &xe->pmu;
+ u64 prev, new;
+
+ if (pmu->closed) {
+ event->hw.state = PERF_HES_STOPPED;
+ return;
+ }
+again:
+ prev = local64_read(&hwc->prev_count);
+ new = __xe_pmu_event_read(event);
+
+ if (local64_cmpxchg(&hwc->prev_count, prev, new) != prev)
+ goto again;
+
+ local64_add(new - prev, &event->count);
+}
+
+static void xe_pmu_enable(struct perf_event *event)
+{
+ /*
+ * Store the current counter value so we can report the correct delta
+ * for all listeners. Even when the event was already enabled and has
+ * an existing non-zero value.
+ */
+ local64_set(&event->hw.prev_count, __xe_pmu_event_read(event));
+}
+
+static void xe_pmu_event_start(struct perf_event *event, int flags)
+{
+ struct xe_device *xe =
+ container_of(event->pmu, typeof(*xe), pmu.base);
+ struct xe_pmu *pmu = &xe->pmu;
+
+ if (pmu->closed)
+ return;
+
+ xe_pmu_enable(event);
+ event->hw.state = 0;
+}
+
+static void xe_pmu_event_stop(struct perf_event *event, int flags)
+{
+ if (flags & PERF_EF_UPDATE)
+ xe_pmu_event_read(event);
+
+ event->hw.state = PERF_HES_STOPPED;
+}
+
+static int xe_pmu_event_add(struct perf_event *event, int flags)
+{
+ struct xe_device *xe =
+ container_of(event->pmu, typeof(*xe), pmu.base);
+ struct xe_pmu *pmu = &xe->pmu;
+
+ if (pmu->closed)
+ return -ENODEV;
+
+ if (flags & PERF_EF_START)
+ xe_pmu_event_start(event, flags);
+
+ return 0;
+}
+
+static void xe_pmu_event_del(struct perf_event *event, int flags)
+{
+ xe_pmu_event_stop(event, PERF_EF_UPDATE);
+}
+
+struct xe_ext_attribute {
+ struct device_attribute attr;
+ unsigned long val;
+};
+
+static ssize_t xe_pmu_event_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct xe_ext_attribute *eattr;
+
+ eattr = container_of(attr, struct xe_ext_attribute, attr);
+ return sprintf(buf, "config=0x%lx\n", eattr->val);
+}
+
+static ssize_t cpumask_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ return cpumap_print_to_pagebuf(true, buf, &xe_pmu_cpumask);
+}
+
+static DEVICE_ATTR_RO(cpumask);
+
+static struct attribute *xe_cpumask_attrs[] = {
+ &dev_attr_cpumask.attr,
+ NULL,
+};
+
+static const struct attribute_group xe_pmu_cpumask_attr_group = {
+ .attrs = xe_cpumask_attrs,
+};
+
+#define __engine_event(__sample, __name) \
+{ \
+ .sample = (__sample), \
+ .name = (__name), \
+}
+
+static struct xe_ext_attribute *
+add_xe_attr(struct xe_ext_attribute *attr, const char *name, u64 config)
+{
+ sysfs_attr_init(&attr->attr.attr);
+ attr->attr.attr.name = name;
+ attr->attr.attr.mode = 0444;
+ attr->attr.show = xe_pmu_event_show;
+ attr->val = config;
+
+ return ++attr;
+}
+
+static struct attribute **
+create_event_attributes(struct xe_pmu *pmu)
+{
+ struct xe_device *xe = container_of(pmu, typeof(*xe), pmu);
+ struct xe_ext_attribute *xe_attr = NULL, *xe_iter;
+ struct attribute **attr = NULL, **attr_iter;
+ unsigned int count = 0;
+ enum xe_hw_engine_id id;
+ unsigned int i, j;
+ struct xe_hw_engine *hwe;
+ struct xe_gt *gt;
+
+ static const struct {
+ enum drm_xe_pmu_engine_sample sample;
+ char *name;
+ } engine_events[] = {
+ __engine_event(DRM_XE_PMU_SAMPLE_BUSY_TICKS, "busy-ticks"),
+ };
+
+ for_each_gt(gt, xe, j) {
+ for_each_hw_engine(hwe, gt, id) {
+ for (i = 0; i < ARRAY_SIZE(engine_events); i++) {
+ if (!engine_event_status(hwe, engine_events[i].sample))
+ count++;
+ }
+ }
+ }
+
+ /* Allocate attribute objects and table. */
+ xe_attr = kcalloc(count, sizeof(*xe_attr), GFP_KERNEL);
+ if (!xe_attr)
+ goto err_alloc;
+
+ /* Max one pointer of each attribute type plus a termination entry. */
+ attr = kcalloc(count + 1, sizeof(*attr), GFP_KERNEL);
+ if (!attr)
+ goto err_alloc;
+
+ xe_iter = xe_attr;
+ attr_iter = attr;
+
+ /* Initialize supported engine counters */
+ for_each_gt(gt, xe, j) {
+ for_each_hw_engine(hwe, gt, id) {
+ for (i = 0; i < ARRAY_SIZE(engine_events); i++) {
+ char *str;
+
+ if (engine_event_status(hwe, engine_events[i].sample))
+ continue;
+
+ str = kasprintf(GFP_KERNEL, "%s-%s-gt%u",
+ hwe->name, engine_events[i].name, j);
+
+ if (!str)
+ goto err;
+
+ *attr_iter++ = &xe_iter->attr.attr;
+ xe_iter = add_xe_attr(xe_iter, str,
+ __DRM_XE_PMU_GT_EVENT(j, __DRM_XE_PMU_ENGINE(xe_hw_engine_to_user_class(hwe->class),
+ hwe->logical_instance,
+ engine_events[i].sample)));
+ }
+ }
+ }
+
+ pmu->xe_attr = xe_attr;
+ return attr;
+
+err:
+ for (attr_iter = attr; *attr_iter; attr_iter++)
+ kfree((*attr_iter)->name);
+
+err_alloc:
+ kfree(attr);
+ kfree(xe_attr);
+
+ return NULL;
+}
+
+static void free_event_attributes(struct xe_pmu *pmu)
+{
+ struct attribute **attr_iter = pmu->events_attr_group.attrs;
+
+ for (; *attr_iter; attr_iter++)
+ kfree((*attr_iter)->name);
+
+ kfree(pmu->events_attr_group.attrs);
+ kfree(pmu->xe_attr);
+
+ pmu->events_attr_group.attrs = NULL;
+ pmu->xe_attr = NULL;
+}
+
+static int xe_pmu_cpu_online(unsigned int cpu, struct hlist_node *node)
+{
+ struct xe_pmu *pmu = hlist_entry_safe(node, typeof(*pmu), cpuhp.node);
+
+ /* Select the first online CPU as a designated reader. */
+ if (cpumask_empty(&xe_pmu_cpumask))
+ cpumask_set_cpu(cpu, &xe_pmu_cpumask);
+
+ return 0;
+}
+
+static int xe_pmu_cpu_offline(unsigned int cpu, struct hlist_node *node)
+{
+ struct xe_pmu *pmu = hlist_entry_safe(node, typeof(*pmu), cpuhp.node);
+ unsigned int target = xe_pmu_target_cpu;
+
+ /*
+ * Unregistering an instance generates a CPU offline event which we must
+ * ignore to avoid incorrectly modifying the shared xe_pmu_cpumask.
+ */
+ if (pmu->closed)
+ return 0;
+
+ if (cpumask_test_and_clear_cpu(cpu, &xe_pmu_cpumask)) {
+ target = cpumask_any_but(topology_sibling_cpumask(cpu), cpu);
+
+ /* Migrate events if there is a valid target */
+ if (target < nr_cpu_ids) {
+ cpumask_set_cpu(target, &xe_pmu_cpumask);
+ xe_pmu_target_cpu = target;
+ }
+ }
+
+ if (target < nr_cpu_ids && target != pmu->cpuhp.cpu) {
+ perf_pmu_migrate_context(&pmu->base, cpu, target);
+ pmu->cpuhp.cpu = target;
+ }
+
+ return 0;
+}
+
+static enum cpuhp_state cpuhp_slot = CPUHP_INVALID;
+
+int xe_pmu_init(void)
+{
+ int ret;
+
+ ret = cpuhp_setup_state_multi(CPUHP_AP_ONLINE_DYN,
+ "perf/x86/intel/xe:online",
+ xe_pmu_cpu_online,
+ xe_pmu_cpu_offline);
+ if (ret < 0)
+ pr_notice("Failed to setup cpuhp state for xe PMU! (%d)\n",
+ ret);
+ else
+ cpuhp_slot = ret;
+
+ return 0;
+}
+
+void xe_pmu_exit(void)
+{
+ if (cpuhp_slot != CPUHP_INVALID)
+ cpuhp_remove_multi_state(cpuhp_slot);
+}
+
+static int xe_pmu_register_cpuhp_state(struct xe_pmu *pmu)
+{
+ if (cpuhp_slot == CPUHP_INVALID)
+ return -EINVAL;
+
+ return cpuhp_state_add_instance(cpuhp_slot, &pmu->cpuhp.node);
+}
+
+static void xe_pmu_unregister_cpuhp_state(struct xe_pmu *pmu)
+{
+ cpuhp_state_remove_instance(cpuhp_slot, &pmu->cpuhp.node);
+}
+
+static void xe_pmu_unregister(struct drm_device *device, void *arg)
+{
+ struct xe_pmu *pmu = arg;
+
+ if (!pmu->base.event_init)
+ return;
+
+ /*
+ * "Disconnect" the PMU callbacks - since all are atomic synchronize_rcu
+ * ensures all currently executing ones will have exited before we
+ * proceed with unregistration.
+ */
+ pmu->closed = true;
+ synchronize_rcu();
+
+ xe_pmu_unregister_cpuhp_state(pmu);
+
+ perf_pmu_unregister(&pmu->base);
+ pmu->base.event_init = NULL;
+ kfree(pmu->base.attr_groups);
+ kfree(pmu->name);
+ free_event_attributes(pmu);
+}
+
+void xe_pmu_register(struct xe_pmu *pmu)
+{
+ struct xe_device *xe = container_of(pmu, typeof(*xe), pmu);
+ const struct attribute_group *attr_groups[] = {
+ &pmu->events_attr_group,
+ &xe_pmu_cpumask_attr_group,
+ NULL
+ };
+
+ int ret = -ENOMEM;
+
+ spin_lock_init(&pmu->lock);
+ pmu->cpuhp.cpu = -1;
+
+ pmu->name = kasprintf(GFP_KERNEL,
+ "xe_%s",
+ dev_name(xe->drm.dev));
+ if (pmu->name)
+ /* tools/perf reserves colons as special. */
+ strreplace((char *)pmu->name, ':', '_');
+
+ if (!pmu->name)
+ goto err;
+
+ pmu->events_attr_group.name = "events";
+ pmu->events_attr_group.attrs = create_event_attributes(pmu);
+ if (!pmu->events_attr_group.attrs)
+ goto err_name;
+
+ pmu->base.attr_groups = kmemdup(attr_groups, sizeof(attr_groups),
+ GFP_KERNEL);
+ if (!pmu->base.attr_groups)
+ goto err_attr;
+
+ pmu->base.module = THIS_MODULE;
+ pmu->base.task_ctx_nr = perf_invalid_context;
+ pmu->base.event_init = xe_pmu_event_init;
+ pmu->base.add = xe_pmu_event_add;
+ pmu->base.del = xe_pmu_event_del;
+ pmu->base.start = xe_pmu_event_start;
+ pmu->base.stop = xe_pmu_event_stop;
+ pmu->base.read = xe_pmu_event_read;
+
+ ret = perf_pmu_register(&pmu->base, pmu->name, -1);
+ if (ret)
+ goto err_groups;
+
+ ret = xe_pmu_register_cpuhp_state(pmu);
+ if (ret)
+ goto err_unreg;
+
+ ret = drmm_add_action_or_reset(&xe->drm, xe_pmu_unregister, pmu);
+ if (ret)
+ goto err_cpuhp;
+
+ return;
+
+err_cpuhp:
+ xe_pmu_unregister_cpuhp_state(pmu);
+err_unreg:
+ perf_pmu_unregister(&pmu->base);
+err_groups:
+ kfree(pmu->base.attr_groups);
+err_attr:
+ pmu->base.event_init = NULL;
+ free_event_attributes(pmu);
+err_name:
+ kfree(pmu->name);
+err:
+ drm_notice(&xe->drm, "Failed to register PMU!\n");
+}
diff --git a/drivers/gpu/drm/xe/xe_pmu.h b/drivers/gpu/drm/xe/xe_pmu.h
new file mode 100644
index 000000000000..d6fca18466f4
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_pmu.h
@@ -0,0 +1,23 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#ifndef _XE_PMU_H_
+#define _XE_PMU_H_
+
+#include "xe_gt_types.h"
+#include "xe_pmu_types.h"
+
+#if IS_ENABLED(CONFIG_PERF_EVENTS)
+int xe_pmu_init(void);
+void xe_pmu_exit(void);
+void xe_pmu_register(struct xe_pmu *pmu);
+#else
+static inline int xe_pmu_init(void) { return 0; }
+static inline void xe_pmu_exit(void) {}
+static inline void xe_pmu_register(struct xe_pmu *pmu) {}
+#endif
+
+#endif
+
diff --git a/drivers/gpu/drm/xe/xe_pmu_types.h b/drivers/gpu/drm/xe/xe_pmu_types.h
new file mode 100644
index 000000000000..d38b24d27cfd
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_pmu_types.h
@@ -0,0 +1,49 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#ifndef _XE_PMU_TYPES_H_
+#define _XE_PMU_TYPES_H_
+
+#include <linux/perf_event.h>
+#include <linux/spinlock_types.h>
+#include <uapi/drm/xe_drm.h>
+
+#define XE_PMU_MAX_GT 2
+
+struct xe_pmu {
+ /**
+ * @cpuhp: Struct used for CPU hotplug handling.
+ */
+ struct {
+ struct hlist_node node;
+ unsigned int cpu;
+ } cpuhp;
+ /**
+ * @base: PMU base.
+ */
+ struct pmu base;
+ /**
+ * @closed: xe is unregistering.
+ */
+ bool closed;
+ /**
+ * @name: Name as registered with perf core.
+ */
+ const char *name;
+ /**
+ * @lock: Lock protecting enable mask and ref count handling.
+ */
+ spinlock_t lock;
+ /**
+ * @events_attr_group: Device events attribute group.
+ */
+ struct attribute_group events_attr_group;
+ /**
+ * @xe_attr: Memory block holding device attributes.
+ */
+ void *xe_attr;
+};
+
+#endif
--
2.40.0
^ permalink raw reply related [flat|nested] 23+ messages in thread* Re: [PATCH v4 4/8] drm/xe/pmu: Enable PMU interface and add engine busyness counter
2023-12-22 7:45 ` [PATCH v4 4/8] drm/xe/pmu: Enable PMU interface and add engine busyness counter Riana Tauro
@ 2024-01-03 5:03 ` Aravind Iddamsetty
[not found] ` <85zfxnrlv0.wl-ashutosh.dixit@intel.com>
0 siblings, 1 reply; 23+ messages in thread
From: Aravind Iddamsetty @ 2024-01-03 5:03 UTC (permalink / raw)
To: Riana Tauro, intel-xe, Dixit, Ashutosh
On 12/22/23 13:15, Riana Tauro wrote:
Hi Riana,
If we split this up into two patches infra + engine busyness I believe we can
retain the R-B for infra from earlier series as it has been extensively reviewed.
Ashutosh any thoughts?
Thanks,
Aravind.
> From: Aravind Iddamsetty <aravind.iddamsetty@linux.intel.com>
>
> This patch adds the PMU base implementation along with engine busyness
> counters.
>
> GuC provides engine busyness ticks as a 64 bit counter which count
> as clock ticks. These counters are maintained in a
> shared memory buffer and internally updated on a continuous basis.
>
> This is listed by perf tool as
>
> sudo ./perf list
> xe_0000_03_00.0/bcs0-busy-ticks-gt0/ [Kernel PMU event]
> xe_0000_03_00.0/ccs0-busy-ticks-gt0/ [Kernel PMU event]
> xe_0000_03_00.0/rcs0-busy-ticks-gt0/ [Kernel PMU event]
> xe_0000_03_00.0/vcs0-busy-ticks-gt0/ [Kernel PMU event]
> xe_0000_03_00.0/vecs0-busy-ticks-gt0/ [Kernel PMU event]
>
> and read as
>
> sudo ./perf stat -e xe_0000_03_00.0/bcs0-busy-ticks-gt0/ -I 1000
> time counts unit events
> 1.000674178 2052 xe_0000_03_00.0/bcs0-busy-ticks-gt0/
> 2.006626312 2033 xe_0000_03_00.0/bcs0-busy-ticks-gt0/
> 3.009499300 40067 xe_0000_03_00.0/bcs0-busy-ticks-gt0/
> 4.010521486 8491 xe_0000_03_00.0/bcs0-busy-ticks-gt0/
>
> The pmu base implementation is taken from i915.
>
> v2: rebase
>
> v3: add engine busyness
>
> v4: change internal uapi helpers (Umesh)
>
> Co-developed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> Co-developed-by: Bommu Krishnaiah <krishnaiah.bommu@intel.com>
> Signed-off-by: Bommu Krishnaiah <krishnaiah.bommu@intel.com>
> Co-developed-by: Riana Tauro <riana.tauro@intel.com>
> Signed-off-by: Riana Tauro <riana.tauro@intel.com>
> Signed-off-by: Aravind Iddamsetty <aravind.iddamsetty@linux.intel.com>
> ---
> drivers/gpu/drm/xe/Makefile | 2 +
> drivers/gpu/drm/xe/xe_device.c | 2 +
> drivers/gpu/drm/xe/xe_device_types.h | 4 +
> drivers/gpu/drm/xe/xe_module.c | 5 +
> drivers/gpu/drm/xe/xe_pmu.c | 543 +++++++++++++++++++++++++++
> drivers/gpu/drm/xe/xe_pmu.h | 23 ++
> drivers/gpu/drm/xe/xe_pmu_types.h | 49 +++
> 7 files changed, 628 insertions(+)
> create mode 100644 drivers/gpu/drm/xe/xe_pmu.c
> create mode 100644 drivers/gpu/drm/xe/xe_pmu.h
> create mode 100644 drivers/gpu/drm/xe/xe_pmu_types.h
>
> diff --git a/drivers/gpu/drm/xe/Makefile b/drivers/gpu/drm/xe/Makefile
> index 26988c1c732a..4cd8bae783ab 100644
> --- a/drivers/gpu/drm/xe/Makefile
> +++ b/drivers/gpu/drm/xe/Makefile
> @@ -296,6 +296,8 @@ endif
> obj-$(CONFIG_DRM_XE) += xe.o
> obj-$(CONFIG_DRM_XE_KUNIT_TEST) += tests/
>
> +xe-$(CONFIG_PERF_EVENTS) += xe_pmu.o
> +
> # header test
> hdrtest_find_args := -not -path xe_rtp_helpers.h
> ifneq ($(CONFIG_DRM_XE_DISPLAY),y)
> diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c
> index 86867d42d532..a7388aeffc28 100644
> --- a/drivers/gpu/drm/xe/xe_device.c
> +++ b/drivers/gpu/drm/xe/xe_device.c
> @@ -538,6 +538,8 @@ int xe_device_probe(struct xe_device *xe)
>
> xe_hwmon_register(xe);
>
> + xe_pmu_register(&xe->pmu);
> +
> err = drmm_add_action_or_reset(&xe->drm, xe_device_sanitize, xe);
> if (err)
> return err;
> diff --git a/drivers/gpu/drm/xe/xe_device_types.h b/drivers/gpu/drm/xe/xe_device_types.h
> index 71f23ac365e6..c738c03ded88 100644
> --- a/drivers/gpu/drm/xe/xe_device_types.h
> +++ b/drivers/gpu/drm/xe/xe_device_types.h
> @@ -19,6 +19,7 @@
> #include "xe_memirq_types.h"
> #include "xe_platform_types.h"
> #include "xe_pt_types.h"
> +#include "xe_pmu.h"
> #include "xe_sriov_types.h"
> #include "xe_step_types.h"
>
> @@ -479,6 +480,9 @@ struct xe_device {
> /* To shut up runtime pm macros.. */
> struct xe_runtime_pm {} runtime_pm;
>
> + /** @pmu: performance monitoring unit */
> + struct xe_pmu pmu;
> +
> /* For pcode */
> struct mutex sb_lock;
>
> diff --git a/drivers/gpu/drm/xe/xe_module.c b/drivers/gpu/drm/xe/xe_module.c
> index 110b69864656..51bf69b7ab22 100644
> --- a/drivers/gpu/drm/xe/xe_module.c
> +++ b/drivers/gpu/drm/xe/xe_module.c
> @@ -11,6 +11,7 @@
> #include "xe_drv.h"
> #include "xe_hw_fence.h"
> #include "xe_pci.h"
> +#include "xe_pmu.h"
> #include "xe_sched_job.h"
>
> struct xe_modparam xe_modparam = {
> @@ -62,6 +63,10 @@ static const struct init_funcs init_funcs[] = {
> .init = xe_sched_job_module_init,
> .exit = xe_sched_job_module_exit,
> },
> + {
> + .init = xe_pmu_init,
> + .exit = xe_pmu_exit,
> + },
> {
> .init = xe_register_pci_driver,
> .exit = xe_unregister_pci_driver,
> diff --git a/drivers/gpu/drm/xe/xe_pmu.c b/drivers/gpu/drm/xe/xe_pmu.c
> new file mode 100644
> index 000000000000..371ca6d7e215
> --- /dev/null
> +++ b/drivers/gpu/drm/xe/xe_pmu.c
> @@ -0,0 +1,543 @@
> +// SPDX-License-Identifier: MIT
> +/*
> + * Copyright © 2023 Intel Corporation
> + */
> +
> +#include <drm/drm_drv.h>
> +#include <drm/drm_managed.h>
> +#include <drm/xe_drm.h>
> +
> +#include "xe_device.h"
> +#include "xe_gt.h"
> +
> +#define XE_ENGINE_SAMPLE_COUNT (DRM_XE_PMU_SAMPLE_BUSY_TICKS + 1)
> +
> +static cpumask_t xe_pmu_cpumask;
> +static unsigned int xe_pmu_target_cpu = -1;
> +
> +static unsigned int config_gt_id(const u64 config)
> +{
> + return config >> __DRM_XE_PMU_GT_SHIFT;
> +}
> +
> +static u64 config_counter(const u64 config)
> +{
> + return config & ~(~0ULL << __DRM_XE_PMU_GT_SHIFT);
> +}
> +
> +static u8 engine_event_sample(struct perf_event *event)
> +{
> + u64 config = event->attr.config;
> +
> + return config_counter(config) & 0xf;
> +}
> +
> +static u8 engine_event_class(struct perf_event *event)
> +{
> + u64 config = event->attr.config;
> +
> + return (config_counter(config) >> __DRM_XE_PMU_CLASS_SHIFT) & 0xff;
> +}
> +
> +static u8 engine_event_instance(struct perf_event *event)
> +{
> + u64 config = event->attr.config;
> +
> + return (config_counter(config) >> __DRM_XE_PMU_SAMPLE_BITS) & 0xff;
> +}
> +
> +static bool is_engine_event(struct perf_event *event)
> +{
> + return config_counter(event->attr.config) < __DRM_XE_PMU_OTHER(0, 0);
> +}
> +
> +static int engine_event_status(struct xe_hw_engine *hwe,
> + enum drm_xe_pmu_engine_sample sample)
> +{
> + if (!hwe)
> + return -ENODEV;
> +
> + /* Other engine events will be added, XE_ENGINE_SAMPLE_COUNT will be changed */
> + return (sample >= DRM_XE_PMU_SAMPLE_BUSY_TICKS && sample < XE_ENGINE_SAMPLE_COUNT)
> + ? 0 : -ENOENT;
> +}
> +
> +static int engine_event_init(struct perf_event *event)
> +{
> + struct xe_device *xe = container_of(event->pmu, typeof(*xe), pmu.base);
> + const u64 config = event->attr.config;
> + const unsigned int gt_id = config_gt_id(config);
> + struct xe_gt *gt = xe_device_get_gt(xe, gt_id);
> + struct xe_hw_engine *hwe;
> +
> + hwe = xe_gt_hw_engine(gt, xe_hw_engine_from_user_class(engine_event_class(event)),
> + engine_event_instance(event), true);
> +
> + return engine_event_status(hwe, engine_event_sample(event));
> +}
> +
> +static void xe_pmu_event_destroy(struct perf_event *event)
> +{
> + struct xe_device *xe =
> + container_of(event->pmu, typeof(*xe), pmu.base);
> +
> + drm_WARN_ON(&xe->drm, event->parent);
> +
> + drm_dev_put(&xe->drm);
> +}
> +
> +static int xe_pmu_event_init(struct perf_event *event)
> +{
> + struct xe_device *xe =
> + container_of(event->pmu, typeof(*xe), pmu.base);
> + struct xe_pmu *pmu = &xe->pmu;
> + int ret;
> +
> + if (pmu->closed)
> + return -ENODEV;
> +
> + if (event->attr.type != event->pmu->type)
> + return -ENOENT;
> +
> + /* unsupported modes and filters */
> + if (event->attr.sample_period) /* no sampling */
> + return -EINVAL;
> +
> + if (has_branch_stack(event))
> + return -EOPNOTSUPP;
> +
> + if (event->cpu < 0)
> + return -EINVAL;
> +
> + /* only allow running on one cpu at a time */
> + if (!cpumask_test_cpu(event->cpu, &xe_pmu_cpumask))
> + return -EINVAL;
> +
> + if (is_engine_event(event)) {
> + ret = engine_event_init(event);
> + if (ret)
> + return ret;
> + }
> +
> + if (!event->parent) {
> + drm_dev_get(&xe->drm);
> + event->destroy = xe_pmu_event_destroy;
> + }
> +
> + return 0;
> +}
> +
> +static u64 __xe_pmu_event_read(struct perf_event *event)
> +{
> + struct xe_device *xe =
> + container_of(event->pmu, typeof(*xe), pmu.base);
> + const unsigned int gt_id = config_gt_id(event->attr.config);
> + struct xe_gt *gt = xe_device_get_gt(xe, gt_id);
> + u64 val;
> +
> + if (is_engine_event(event)) {
> + u8 sample = engine_event_sample(event);
> + struct xe_hw_engine *hwe;
> +
> + hwe = xe_gt_hw_engine(gt, xe_hw_engine_from_user_class(engine_event_class(event)),
> + engine_event_instance(event), true);
> + if (!hwe)
> + drm_WARN_ON_ONCE(&xe->drm, "unknown engine\n");
> + else if (sample == DRM_XE_PMU_SAMPLE_BUSY_TICKS)
> + val = xe_gt_engine_busy_ticks(gt, hwe);
> + else
> + drm_warn(&xe->drm, "unknown pmu engine event\n");
> + }
> +
> + return val;
> +}
> +
> +static void xe_pmu_event_read(struct perf_event *event)
> +{
> + struct xe_device *xe =
> + container_of(event->pmu, typeof(*xe), pmu.base);
> + struct hw_perf_event *hwc = &event->hw;
> + struct xe_pmu *pmu = &xe->pmu;
> + u64 prev, new;
> +
> + if (pmu->closed) {
> + event->hw.state = PERF_HES_STOPPED;
> + return;
> + }
> +again:
> + prev = local64_read(&hwc->prev_count);
> + new = __xe_pmu_event_read(event);
> +
> + if (local64_cmpxchg(&hwc->prev_count, prev, new) != prev)
> + goto again;
> +
> + local64_add(new - prev, &event->count);
> +}
> +
> +static void xe_pmu_enable(struct perf_event *event)
> +{
> + /*
> + * Store the current counter value so we can report the correct delta
> + * for all listeners. Even when the event was already enabled and has
> + * an existing non-zero value.
> + */
> + local64_set(&event->hw.prev_count, __xe_pmu_event_read(event));
> +}
> +
> +static void xe_pmu_event_start(struct perf_event *event, int flags)
> +{
> + struct xe_device *xe =
> + container_of(event->pmu, typeof(*xe), pmu.base);
> + struct xe_pmu *pmu = &xe->pmu;
> +
> + if (pmu->closed)
> + return;
> +
> + xe_pmu_enable(event);
> + event->hw.state = 0;
> +}
> +
> +static void xe_pmu_event_stop(struct perf_event *event, int flags)
> +{
> + if (flags & PERF_EF_UPDATE)
> + xe_pmu_event_read(event);
> +
> + event->hw.state = PERF_HES_STOPPED;
> +}
> +
> +static int xe_pmu_event_add(struct perf_event *event, int flags)
> +{
> + struct xe_device *xe =
> + container_of(event->pmu, typeof(*xe), pmu.base);
> + struct xe_pmu *pmu = &xe->pmu;
> +
> + if (pmu->closed)
> + return -ENODEV;
> +
> + if (flags & PERF_EF_START)
> + xe_pmu_event_start(event, flags);
> +
> + return 0;
> +}
> +
> +static void xe_pmu_event_del(struct perf_event *event, int flags)
> +{
> + xe_pmu_event_stop(event, PERF_EF_UPDATE);
> +}
> +
> +struct xe_ext_attribute {
> + struct device_attribute attr;
> + unsigned long val;
> +};
> +
> +static ssize_t xe_pmu_event_show(struct device *dev,
> + struct device_attribute *attr, char *buf)
> +{
> + struct xe_ext_attribute *eattr;
> +
> + eattr = container_of(attr, struct xe_ext_attribute, attr);
> + return sprintf(buf, "config=0x%lx\n", eattr->val);
> +}
> +
> +static ssize_t cpumask_show(struct device *dev,
> + struct device_attribute *attr, char *buf)
> +{
> + return cpumap_print_to_pagebuf(true, buf, &xe_pmu_cpumask);
> +}
> +
> +static DEVICE_ATTR_RO(cpumask);
> +
> +static struct attribute *xe_cpumask_attrs[] = {
> + &dev_attr_cpumask.attr,
> + NULL,
> +};
> +
> +static const struct attribute_group xe_pmu_cpumask_attr_group = {
> + .attrs = xe_cpumask_attrs,
> +};
> +
> +#define __engine_event(__sample, __name) \
> +{ \
> + .sample = (__sample), \
> + .name = (__name), \
> +}
> +
> +static struct xe_ext_attribute *
> +add_xe_attr(struct xe_ext_attribute *attr, const char *name, u64 config)
> +{
> + sysfs_attr_init(&attr->attr.attr);
> + attr->attr.attr.name = name;
> + attr->attr.attr.mode = 0444;
> + attr->attr.show = xe_pmu_event_show;
> + attr->val = config;
> +
> + return ++attr;
> +}
> +
> +static struct attribute **
> +create_event_attributes(struct xe_pmu *pmu)
> +{
> + struct xe_device *xe = container_of(pmu, typeof(*xe), pmu);
> + struct xe_ext_attribute *xe_attr = NULL, *xe_iter;
> + struct attribute **attr = NULL, **attr_iter;
> + unsigned int count = 0;
> + enum xe_hw_engine_id id;
> + unsigned int i, j;
> + struct xe_hw_engine *hwe;
> + struct xe_gt *gt;
> +
> + static const struct {
> + enum drm_xe_pmu_engine_sample sample;
> + char *name;
> + } engine_events[] = {
> + __engine_event(DRM_XE_PMU_SAMPLE_BUSY_TICKS, "busy-ticks"),
> + };
> +
> + for_each_gt(gt, xe, j) {
> + for_each_hw_engine(hwe, gt, id) {
> + for (i = 0; i < ARRAY_SIZE(engine_events); i++) {
> + if (!engine_event_status(hwe, engine_events[i].sample))
> + count++;
> + }
> + }
> + }
> +
> + /* Allocate attribute objects and table. */
> + xe_attr = kcalloc(count, sizeof(*xe_attr), GFP_KERNEL);
> + if (!xe_attr)
> + goto err_alloc;
> +
> + /* Max one pointer of each attribute type plus a termination entry. */
> + attr = kcalloc(count + 1, sizeof(*attr), GFP_KERNEL);
> + if (!attr)
> + goto err_alloc;
> +
> + xe_iter = xe_attr;
> + attr_iter = attr;
> +
> + /* Initialize supported engine counters */
> + for_each_gt(gt, xe, j) {
> + for_each_hw_engine(hwe, gt, id) {
> + for (i = 0; i < ARRAY_SIZE(engine_events); i++) {
> + char *str;
> +
> + if (engine_event_status(hwe, engine_events[i].sample))
> + continue;
> +
> + str = kasprintf(GFP_KERNEL, "%s-%s-gt%u",
> + hwe->name, engine_events[i].name, j);
> +
> + if (!str)
> + goto err;
> +
> + *attr_iter++ = &xe_iter->attr.attr;
> + xe_iter = add_xe_attr(xe_iter, str,
> + __DRM_XE_PMU_GT_EVENT(j, __DRM_XE_PMU_ENGINE(xe_hw_engine_to_user_class(hwe->class),
> + hwe->logical_instance,
> + engine_events[i].sample)));
> + }
> + }
> + }
> +
> + pmu->xe_attr = xe_attr;
> + return attr;
> +
> +err:
> + for (attr_iter = attr; *attr_iter; attr_iter++)
> + kfree((*attr_iter)->name);
> +
> +err_alloc:
> + kfree(attr);
> + kfree(xe_attr);
> +
> + return NULL;
> +}
> +
> +static void free_event_attributes(struct xe_pmu *pmu)
> +{
> + struct attribute **attr_iter = pmu->events_attr_group.attrs;
> +
> + for (; *attr_iter; attr_iter++)
> + kfree((*attr_iter)->name);
> +
> + kfree(pmu->events_attr_group.attrs);
> + kfree(pmu->xe_attr);
> +
> + pmu->events_attr_group.attrs = NULL;
> + pmu->xe_attr = NULL;
> +}
> +
> +static int xe_pmu_cpu_online(unsigned int cpu, struct hlist_node *node)
> +{
> + struct xe_pmu *pmu = hlist_entry_safe(node, typeof(*pmu), cpuhp.node);
> +
> + /* Select the first online CPU as a designated reader. */
> + if (cpumask_empty(&xe_pmu_cpumask))
> + cpumask_set_cpu(cpu, &xe_pmu_cpumask);
> +
> + return 0;
> +}
> +
> +static int xe_pmu_cpu_offline(unsigned int cpu, struct hlist_node *node)
> +{
> + struct xe_pmu *pmu = hlist_entry_safe(node, typeof(*pmu), cpuhp.node);
> + unsigned int target = xe_pmu_target_cpu;
> +
> + /*
> + * Unregistering an instance generates a CPU offline event which we must
> + * ignore to avoid incorrectly modifying the shared xe_pmu_cpumask.
> + */
> + if (pmu->closed)
> + return 0;
> +
> + if (cpumask_test_and_clear_cpu(cpu, &xe_pmu_cpumask)) {
> + target = cpumask_any_but(topology_sibling_cpumask(cpu), cpu);
> +
> + /* Migrate events if there is a valid target */
> + if (target < nr_cpu_ids) {
> + cpumask_set_cpu(target, &xe_pmu_cpumask);
> + xe_pmu_target_cpu = target;
> + }
> + }
> +
> + if (target < nr_cpu_ids && target != pmu->cpuhp.cpu) {
> + perf_pmu_migrate_context(&pmu->base, cpu, target);
> + pmu->cpuhp.cpu = target;
> + }
> +
> + return 0;
> +}
> +
> +static enum cpuhp_state cpuhp_slot = CPUHP_INVALID;
> +
> +int xe_pmu_init(void)
> +{
> + int ret;
> +
> + ret = cpuhp_setup_state_multi(CPUHP_AP_ONLINE_DYN,
> + "perf/x86/intel/xe:online",
> + xe_pmu_cpu_online,
> + xe_pmu_cpu_offline);
> + if (ret < 0)
> + pr_notice("Failed to setup cpuhp state for xe PMU! (%d)\n",
> + ret);
> + else
> + cpuhp_slot = ret;
> +
> + return 0;
> +}
> +
> +void xe_pmu_exit(void)
> +{
> + if (cpuhp_slot != CPUHP_INVALID)
> + cpuhp_remove_multi_state(cpuhp_slot);
> +}
> +
> +static int xe_pmu_register_cpuhp_state(struct xe_pmu *pmu)
> +{
> + if (cpuhp_slot == CPUHP_INVALID)
> + return -EINVAL;
> +
> + return cpuhp_state_add_instance(cpuhp_slot, &pmu->cpuhp.node);
> +}
> +
> +static void xe_pmu_unregister_cpuhp_state(struct xe_pmu *pmu)
> +{
> + cpuhp_state_remove_instance(cpuhp_slot, &pmu->cpuhp.node);
> +}
> +
> +static void xe_pmu_unregister(struct drm_device *device, void *arg)
> +{
> + struct xe_pmu *pmu = arg;
> +
> + if (!pmu->base.event_init)
> + return;
> +
> + /*
> + * "Disconnect" the PMU callbacks - since all are atomic synchronize_rcu
> + * ensures all currently executing ones will have exited before we
> + * proceed with unregistration.
> + */
> + pmu->closed = true;
> + synchronize_rcu();
> +
> + xe_pmu_unregister_cpuhp_state(pmu);
> +
> + perf_pmu_unregister(&pmu->base);
> + pmu->base.event_init = NULL;
> + kfree(pmu->base.attr_groups);
> + kfree(pmu->name);
> + free_event_attributes(pmu);
> +}
> +
> +void xe_pmu_register(struct xe_pmu *pmu)
> +{
> + struct xe_device *xe = container_of(pmu, typeof(*xe), pmu);
> + const struct attribute_group *attr_groups[] = {
> + &pmu->events_attr_group,
> + &xe_pmu_cpumask_attr_group,
> + NULL
> + };
> +
> + int ret = -ENOMEM;
> +
> + spin_lock_init(&pmu->lock);
> + pmu->cpuhp.cpu = -1;
> +
> + pmu->name = kasprintf(GFP_KERNEL,
> + "xe_%s",
> + dev_name(xe->drm.dev));
> + if (pmu->name)
> + /* tools/perf reserves colons as special. */
> + strreplace((char *)pmu->name, ':', '_');
> +
> + if (!pmu->name)
> + goto err;
> +
> + pmu->events_attr_group.name = "events";
> + pmu->events_attr_group.attrs = create_event_attributes(pmu);
> + if (!pmu->events_attr_group.attrs)
> + goto err_name;
> +
> + pmu->base.attr_groups = kmemdup(attr_groups, sizeof(attr_groups),
> + GFP_KERNEL);
> + if (!pmu->base.attr_groups)
> + goto err_attr;
> +
> + pmu->base.module = THIS_MODULE;
> + pmu->base.task_ctx_nr = perf_invalid_context;
> + pmu->base.event_init = xe_pmu_event_init;
> + pmu->base.add = xe_pmu_event_add;
> + pmu->base.del = xe_pmu_event_del;
> + pmu->base.start = xe_pmu_event_start;
> + pmu->base.stop = xe_pmu_event_stop;
> + pmu->base.read = xe_pmu_event_read;
> +
> + ret = perf_pmu_register(&pmu->base, pmu->name, -1);
> + if (ret)
> + goto err_groups;
> +
> + ret = xe_pmu_register_cpuhp_state(pmu);
> + if (ret)
> + goto err_unreg;
> +
> + ret = drmm_add_action_or_reset(&xe->drm, xe_pmu_unregister, pmu);
> + if (ret)
> + goto err_cpuhp;
> +
> + return;
> +
> +err_cpuhp:
> + xe_pmu_unregister_cpuhp_state(pmu);
> +err_unreg:
> + perf_pmu_unregister(&pmu->base);
> +err_groups:
> + kfree(pmu->base.attr_groups);
> +err_attr:
> + pmu->base.event_init = NULL;
> + free_event_attributes(pmu);
> +err_name:
> + kfree(pmu->name);
> +err:
> + drm_notice(&xe->drm, "Failed to register PMU!\n");
> +}
> diff --git a/drivers/gpu/drm/xe/xe_pmu.h b/drivers/gpu/drm/xe/xe_pmu.h
> new file mode 100644
> index 000000000000..d6fca18466f4
> --- /dev/null
> +++ b/drivers/gpu/drm/xe/xe_pmu.h
> @@ -0,0 +1,23 @@
> +/* SPDX-License-Identifier: MIT */
> +/*
> + * Copyright © 2023 Intel Corporation
> + */
> +
> +#ifndef _XE_PMU_H_
> +#define _XE_PMU_H_
> +
> +#include "xe_gt_types.h"
> +#include "xe_pmu_types.h"
> +
> +#if IS_ENABLED(CONFIG_PERF_EVENTS)
> +int xe_pmu_init(void);
> +void xe_pmu_exit(void);
> +void xe_pmu_register(struct xe_pmu *pmu);
> +#else
> +static inline int xe_pmu_init(void) { return 0; }
> +static inline void xe_pmu_exit(void) {}
> +static inline void xe_pmu_register(struct xe_pmu *pmu) {}
> +#endif
> +
> +#endif
> +
> diff --git a/drivers/gpu/drm/xe/xe_pmu_types.h b/drivers/gpu/drm/xe/xe_pmu_types.h
> new file mode 100644
> index 000000000000..d38b24d27cfd
> --- /dev/null
> +++ b/drivers/gpu/drm/xe/xe_pmu_types.h
> @@ -0,0 +1,49 @@
> +/* SPDX-License-Identifier: MIT */
> +/*
> + * Copyright © 2023 Intel Corporation
> + */
> +
> +#ifndef _XE_PMU_TYPES_H_
> +#define _XE_PMU_TYPES_H_
> +
> +#include <linux/perf_event.h>
> +#include <linux/spinlock_types.h>
> +#include <uapi/drm/xe_drm.h>
> +
> +#define XE_PMU_MAX_GT 2
> +
> +struct xe_pmu {
> + /**
> + * @cpuhp: Struct used for CPU hotplug handling.
> + */
> + struct {
> + struct hlist_node node;
> + unsigned int cpu;
> + } cpuhp;
> + /**
> + * @base: PMU base.
> + */
> + struct pmu base;
> + /**
> + * @closed: xe is unregistering.
> + */
> + bool closed;
> + /**
> + * @name: Name as registered with perf core.
> + */
> + const char *name;
> + /**
> + * @lock: Lock protecting enable mask and ref count handling.
> + */
> + spinlock_t lock;
> + /**
> + * @events_attr_group: Device events attribute group.
> + */
> + struct attribute_group events_attr_group;
> + /**
> + * @xe_attr: Memory block holding device attributes.
> + */
> + void *xe_attr;
> +};
> +
> +#endif
^ permalink raw reply [flat|nested] 23+ messages in thread
* [PATCH v4 5/8] drm/xe/guc: Add PMU counter for total active ticks
2023-12-22 7:45 [PATCH v4 0/8] Engine busyness Riana Tauro
` (3 preceding siblings ...)
2023-12-22 7:45 ` [PATCH v4 4/8] drm/xe/pmu: Enable PMU interface and add engine busyness counter Riana Tauro
@ 2023-12-22 7:45 ` Riana Tauro
2023-12-22 20:03 ` Belgaumkar, Vinay
2024-01-03 6:54 ` Aravind Iddamsetty
2023-12-22 7:46 ` [PATCH v4 6/8] drm/xe/guc: Expose engine busyness only for supported GuC version Riana Tauro
` (2 subsequent siblings)
7 siblings, 2 replies; 23+ messages in thread
From: Riana Tauro @ 2023-12-22 7:45 UTC (permalink / raw)
To: intel-xe
GuC provides engine busyness ticks as a 64 bit counter which count
as clock ticks. These counters are maintained in a
shared memory buffer and internally updated on a continuous basis.
GuC also provides a periodically total active ticks that GT has been
active for (GuC loaded and running).
This counter is exposed to the user such that busyness can
be calculated as a percentage using
busyness % = (engine active ticks/total active ticks) * 100.
This patch provides a pmu counter for total active ticks.
This is listed by perf tool as
sudo ./perf list
xe_0000_03_00.0/total-active-ticks-gt0/ [Kernel PMU event]
and can be read using
sudo ./perf stat -e xe_0000_03_00.0/total-active-ticks-gt0/ -I 1000
time counts unit events
1.001332764 58942964 xe_0000_03_00.0/total-active-ticks-gt0/
2.011421147 21191869 xe_0000_03_00.0/total-active-ticks-gt0/
3.013223865 19269012 xe_0000_03_00.0/total-active-ticks-gt0/
v2: change commit message and comment for
total active ticks (Umesh, Tvrtko)
Co-developed-by: Umesh Nerlige Ramappa <umesh.nerlige.ramappa@intel.com>
Signed-off-by: Umesh Nerlige Ramappa <umesh.nerlige.ramappa@intel.com>
Signed-off-by: Riana Tauro <riana.tauro@intel.com>
---
drivers/gpu/drm/xe/xe_gt.c | 11 +++
drivers/gpu/drm/xe/xe_gt.h | 2 +-
drivers/gpu/drm/xe/xe_guc_engine_busyness.c | 72 ++++++++++++++++----
drivers/gpu/drm/xe/xe_guc_engine_busyness.h | 1 +
drivers/gpu/drm/xe/xe_pmu.c | 74 +++++++++++++++++++--
include/uapi/drm/xe_drm.h | 23 ++++++-
6 files changed, 162 insertions(+), 21 deletions(-)
diff --git a/drivers/gpu/drm/xe/xe_gt.c b/drivers/gpu/drm/xe/xe_gt.c
index 58daadc00363..5825471a3422 100644
--- a/drivers/gpu/drm/xe/xe_gt.c
+++ b/drivers/gpu/drm/xe/xe_gt.c
@@ -789,3 +789,14 @@ u64 xe_gt_engine_busy_ticks(struct xe_gt *gt, struct xe_hw_engine *hwe)
{
return xe_guc_engine_busyness_ticks(>->uc.guc, hwe);
}
+
+/**
+ * xe_gt_total_active_ticks - Return total active ticks
+ * @gt: GT structure
+ *
+ * Returns total active ticks that the GT was active for.
+ */
+u64 xe_gt_total_active_ticks(struct xe_gt *gt)
+{
+ return xe_guc_engine_busyness_active_ticks(>->uc.guc);
+}
diff --git a/drivers/gpu/drm/xe/xe_gt.h b/drivers/gpu/drm/xe/xe_gt.h
index 4303ec48c404..9bac85cdf609 100644
--- a/drivers/gpu/drm/xe/xe_gt.h
+++ b/drivers/gpu/drm/xe/xe_gt.h
@@ -43,7 +43,7 @@ void xe_gt_reset_async(struct xe_gt *gt);
void xe_gt_sanitize(struct xe_gt *gt);
u64 xe_gt_engine_busy_ticks(struct xe_gt *gt, struct xe_hw_engine *hwe);
-
+u64 xe_gt_total_active_ticks(struct xe_gt *gt);
/**
* xe_gt_any_hw_engine_by_reset_domain - scan the list of engines and return the
* first that matches the same reset domain as @class
diff --git a/drivers/gpu/drm/xe/xe_guc_engine_busyness.c b/drivers/gpu/drm/xe/xe_guc_engine_busyness.c
index af7b6e768751..24e72555647a 100644
--- a/drivers/gpu/drm/xe/xe_guc_engine_busyness.c
+++ b/drivers/gpu/drm/xe/xe_guc_engine_busyness.c
@@ -20,53 +20,83 @@
* timer internal to GuC. The update rate is guaranteed to be at least 2Hz (but with
* a caveat that is not real time, best effort only).
*
+ * In addition to the engine busyness ticks, there is also a total time count which
+ * is a free running GT timestamp counter.
+ *
+ * Note that counters should be used as ratios of each other for calculating a
+ * percentage.
+ *
* engine busyness ticks (ticks_engine) : clock ticks for which engine was active
+ * total active ticks (ticks_gt) : total clock ticks
+ *
+ * engine busyness % = (ticks_engine / ticks_gt) * 100
*/
static void guc_engine_busyness_usage_map(struct xe_guc *guc,
struct xe_hw_engine *hwe,
- struct iosys_map *engine_map)
+ struct iosys_map *engine_map,
+ struct iosys_map *global_map)
{
struct iosys_map *map;
size_t offset;
u32 instance;
u8 guc_class;
- guc_class = xe_engine_class_to_guc_class(hwe->class);
- instance = hwe->logical_instance;
+ if (hwe) {
+ guc_class = xe_engine_class_to_guc_class(hwe->class);
+ instance = hwe->logical_instance;
+ }
map = &guc->busy.bo->vmap;
- offset = offsetof(struct guc_engine_observation_data,
- engine_data[guc_class][instance]);
+ if (hwe) {
+ offset = offsetof(struct guc_engine_observation_data,
+ engine_data[guc_class][instance]);
+ *engine_map = IOSYS_MAP_INIT_OFFSET(map, offset);
+ }
- *engine_map = IOSYS_MAP_INIT_OFFSET(map, offset);
+ *global_map = IOSYS_MAP_INIT_OFFSET(map, 0);
}
static void guc_engine_busyness_get_usage(struct xe_guc *guc,
struct xe_hw_engine *hwe,
- u64 *ticks_engine)
+ u64 *ticks_engine,
+ u64 *ticks_gt)
{
- struct iosys_map engine_map;
- u64 engine_ticks = 0;
+ struct iosys_map engine_map, global_map;
+ u64 engine_ticks = 0, gt_ticks = 0;
int i = 0;
- guc_engine_busyness_usage_map(guc, hwe, &engine_map);
+ guc_engine_busyness_usage_map(guc, hwe, &engine_map, &global_map);
#define read_engine_usage(map_, field_) \
iosys_map_rd_field(map_, 0, struct guc_engine_data, field_)
+#define read_global_field(map_, field_) \
+ iosys_map_rd_field(map_, 0, struct guc_engine_observation_data, field_)
+
do {
- engine_ticks = read_engine_usage(&engine_map, total_execution_ticks);
+ if (hwe)
+ engine_ticks = read_engine_usage(&engine_map, total_execution_ticks);
+
+ gt_ticks = read_global_field(&global_map, gt_timestamp);
- if (read_engine_usage(&engine_map, total_execution_ticks) == engine_ticks)
+ if (hwe && read_engine_usage(&engine_map, total_execution_ticks) != engine_ticks)
+ continue;
+
+ if (read_global_field(&global_map, gt_timestamp) == gt_ticks)
break;
+
} while (++i < 6);
#undef read_engine_usage
+#undef read_global_field
if (ticks_engine)
*ticks_engine = engine_ticks;
+
+ if (ticks_gt)
+ *ticks_gt = gt_ticks;
}
static void guc_engine_busyness_enable_stats(struct xe_guc *guc)
@@ -92,6 +122,22 @@ static void guc_engine_busyness_fini(struct drm_device *drm, void *arg)
xe_bo_unpin_map_no_vm(guc->busy.bo);
}
+/*
+ * xe_guc_engine_busyness_active_ticks - Gets the total active ticks
+ * @guc: The GuC object
+ *
+ * Returns total active ticks that the GT has been running for
+ * (GuC loaded and running).
+ */
+u64 xe_guc_engine_busyness_active_ticks(struct xe_guc *guc)
+{
+ u64 ticks_gt;
+
+ guc_engine_busyness_get_usage(guc, NULL, NULL, &ticks_gt);
+
+ return ticks_gt;
+}
+
/*
* xe_guc_engine_busyness_ticks - Gets current accumulated
* engine busyness ticks
@@ -104,7 +150,7 @@ u64 xe_guc_engine_busyness_ticks(struct xe_guc *guc, struct xe_hw_engine *hwe)
{
u64 ticks_engine;
- guc_engine_busyness_get_usage(guc, hwe, &ticks_engine);
+ guc_engine_busyness_get_usage(guc, hwe, &ticks_engine, NULL);
return ticks_engine;
}
diff --git a/drivers/gpu/drm/xe/xe_guc_engine_busyness.h b/drivers/gpu/drm/xe/xe_guc_engine_busyness.h
index d70f06209896..57325910ebc4 100644
--- a/drivers/gpu/drm/xe/xe_guc_engine_busyness.h
+++ b/drivers/gpu/drm/xe/xe_guc_engine_busyness.h
@@ -12,6 +12,7 @@ struct xe_hw_engine;
struct xe_guc;
int xe_guc_engine_busyness_init(struct xe_guc *guc);
+u64 xe_guc_engine_busyness_active_ticks(struct xe_guc *guc);
u64 xe_guc_engine_busyness_ticks(struct xe_guc *guc, struct xe_hw_engine *hwe);
#endif
diff --git a/drivers/gpu/drm/xe/xe_pmu.c b/drivers/gpu/drm/xe/xe_pmu.c
index 371ca6d7e215..c2be157a6f5d 100644
--- a/drivers/gpu/drm/xe/xe_pmu.c
+++ b/drivers/gpu/drm/xe/xe_pmu.c
@@ -51,6 +51,20 @@ static bool is_engine_event(struct perf_event *event)
return config_counter(event->attr.config) < __DRM_XE_PMU_OTHER(0, 0);
}
+static int
+config_status(struct xe_device *xe, u64 config)
+{
+ unsigned int gt_id = config_gt_id(config);
+
+ if (gt_id >= XE_PMU_MAX_GT)
+ return -ENOENT;
+
+ if (config_counter(config) == DRM_XE_PMU_TOTAL_ACTIVE_TICKS(0))
+ return 0;
+
+ return -ENOENT;
+}
+
static int engine_event_status(struct xe_hw_engine *hwe,
enum drm_xe_pmu_engine_sample sample)
{
@@ -113,11 +127,13 @@ static int xe_pmu_event_init(struct perf_event *event)
if (!cpumask_test_cpu(event->cpu, &xe_pmu_cpumask))
return -EINVAL;
- if (is_engine_event(event)) {
+ if (is_engine_event(event))
ret = engine_event_init(event);
- if (ret)
- return ret;
- }
+ else
+ ret = config_status(xe, event->attr.config);
+
+ if (ret)
+ return ret;
if (!event->parent) {
drm_dev_get(&xe->drm);
@@ -131,7 +147,8 @@ static u64 __xe_pmu_event_read(struct perf_event *event)
{
struct xe_device *xe =
container_of(event->pmu, typeof(*xe), pmu.base);
- const unsigned int gt_id = config_gt_id(event->attr.config);
+ u64 config = event->attr.config;
+ const unsigned int gt_id = config_gt_id(config);
struct xe_gt *gt = xe_device_get_gt(xe, gt_id);
u64 val;
@@ -147,6 +164,11 @@ static u64 __xe_pmu_event_read(struct perf_event *event)
val = xe_gt_engine_busy_ticks(gt, hwe);
else
drm_warn(&xe->drm, "unknown pmu engine event\n");
+ } else {
+ if (config_counter(config) == DRM_XE_PMU_TOTAL_ACTIVE_TICKS(0))
+ val = xe_gt_total_active_ticks(gt);
+ else
+ drm_warn(&xe->drm, "unknown pmu event\n");
}
return val;
@@ -256,6 +278,12 @@ static const struct attribute_group xe_pmu_cpumask_attr_group = {
.attrs = xe_cpumask_attrs,
};
+#define __event(__counter, __name) \
+{ \
+ .counter = (__counter), \
+ .name = (__name), \
+}
+
#define __engine_event(__sample, __name) \
{ \
.sample = (__sample), \
@@ -293,6 +321,23 @@ create_event_attributes(struct xe_pmu *pmu)
__engine_event(DRM_XE_PMU_SAMPLE_BUSY_TICKS, "busy-ticks"),
};
+ static const struct {
+ unsigned int counter;
+ const char *name;
+ } events[] = {
+ __event(0, "total-active-ticks"),
+ };
+
+ /* Count how many counters we will be exposing. */
+ for_each_gt(gt, xe, j) {
+ for (i = 0; i < ARRAY_SIZE(events); i++) {
+ u64 config = __DRM_XE_PMU_OTHER(j, events[i].counter);
+
+ if (!config_status(xe, config))
+ count++;
+ }
+ }
+
for_each_gt(gt, xe, j) {
for_each_hw_engine(hwe, gt, id) {
for (i = 0; i < ARRAY_SIZE(engine_events); i++) {
@@ -315,6 +360,25 @@ create_event_attributes(struct xe_pmu *pmu)
xe_iter = xe_attr;
attr_iter = attr;
+ /* Initialize supported non-engine counters */
+ for_each_gt(gt, xe, j) {
+ for (i = 0; i < ARRAY_SIZE(events); i++) {
+ u64 config = __DRM_XE_PMU_OTHER(j, events[i].counter);
+ char *str;
+
+ if (config_status(xe, config))
+ continue;
+
+ str = kasprintf(GFP_KERNEL, "%s-gt%u",
+ events[i].name, j);
+ if (!str)
+ goto err;
+
+ *attr_iter++ = &xe_iter->attr.attr;
+ xe_iter = add_xe_attr(xe_iter, str, config);
+ }
+ }
+
/* Initialize supported engine counters */
for_each_gt(gt, xe, j) {
for_each_hw_engine(hwe, gt, id) {
diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h
index f8456cda5cda..3134930b0160 100644
--- a/include/uapi/drm/xe_drm.h
+++ b/include/uapi/drm/xe_drm.h
@@ -1341,12 +1341,29 @@ struct drm_xe_wait_user_fence {
};
/**
- * DOC: XE PMU event config IDs
+ * DOC: XE PMU Event Config ID's
*
- * Check 'man perf_event_open' to use the ID's DRM_XE_PMU_XXXX listed in xe_drm.h
+ * Check 'man perf_event_open' to use the ID's DRM_XE_PMU_XXXX listed here
* in 'struct perf_event_attr' as part of perf_event_open syscall to read a
* particular event.
*
+ * For example to open the DRM_XE_PMU_TOTAL_ACTIVE_TICKS(0):
+ *
+ * .. code-block:: C
+ *
+ * struct perf_event_attr attr;
+ * long long count;
+ * int cpu = 0;
+ * int fd;
+ *
+ * memset(&attr, 0, sizeof(struct perf_event_attr));
+ * attr.type = type; // eg: /sys/bus/event_source/devices/xe_0000_03_00.0/type
+ * attr.read_format = PERF_FORMAT_TOTAL_TIME_ENABLED;
+ * attr.use_clockid = 1;
+ * attr.clockid = CLOCK_MONOTONIC;
+ * attr.config = DRM_XE_PMU_TOTAL_ACTIVE_TICKS(0);
+ *
+ * fd = syscall(__NR_perf_event_open, &attr, -1, cpu, -1, 0);
*/
/**
@@ -1381,6 +1398,8 @@ enum drm_xe_pmu_engine_sample {
__DRM_XE_PMU_GT_EVENT(gt, __DRM_XE_PMU_ENGINE(class, instance, \
DRM_XE_PMU_SAMPLE_BUSY_TICKS))
+#define DRM_XE_PMU_TOTAL_ACTIVE_TICKS(gt) __DRM_XE_PMU_OTHER(gt, 0)
+
#if defined(__cplusplus)
}
#endif
--
2.40.0
^ permalink raw reply related [flat|nested] 23+ messages in thread* Re: [PATCH v4 5/8] drm/xe/guc: Add PMU counter for total active ticks
2023-12-22 7:45 ` [PATCH v4 5/8] drm/xe/guc: Add PMU counter for total active ticks Riana Tauro
@ 2023-12-22 20:03 ` Belgaumkar, Vinay
2024-01-03 6:54 ` Aravind Iddamsetty
1 sibling, 0 replies; 23+ messages in thread
From: Belgaumkar, Vinay @ 2023-12-22 20:03 UTC (permalink / raw)
To: Riana Tauro, intel-xe
[-- Attachment #1: Type: text/plain, Size: 13433 bytes --]
On 12/21/2023 11:45 PM, Riana Tauro wrote:
> GuC provides engine busyness ticks as a 64 bit counter which count
> as clock ticks. These counters are maintained in a
> shared memory buffer and internally updated on a continuous basis.
>
> GuC also provides a periodically total active ticks that GT has been
> active for (GuC loaded and running).
> This counter is exposed to the user such that busyness can
> be calculated as a percentage using
>
> busyness % = (engine active ticks/total active ticks) * 100.
>
> This patch provides a pmu counter for total active ticks.
>
> This is listed by perf tool as
>
> sudo ./perf list
> xe_0000_03_00.0/total-active-ticks-gt0/ [Kernel PMU event]
>
> and can be read using
>
> sudo ./perf stat -e xe_0000_03_00.0/total-active-ticks-gt0/ -I 1000
> time counts unit events
> 1.001332764 58942964 xe_0000_03_00.0/total-active-ticks-gt0/
> 2.011421147 21191869 xe_0000_03_00.0/total-active-ticks-gt0/
> 3.013223865 19269012 xe_0000_03_00.0/total-active-ticks-gt0/
>
> v2: change commit message and comment for
> total active ticks (Umesh, Tvrtko)
>
> Co-developed-by: Umesh Nerlige Ramappa<umesh.nerlige.ramappa@intel.com>
> Signed-off-by: Umesh Nerlige Ramappa<umesh.nerlige.ramappa@intel.com>
> Signed-off-by: Riana Tauro<riana.tauro@intel.com>
> ---
> drivers/gpu/drm/xe/xe_gt.c | 11 +++
> drivers/gpu/drm/xe/xe_gt.h | 2 +-
> drivers/gpu/drm/xe/xe_guc_engine_busyness.c | 72 ++++++++++++++++----
> drivers/gpu/drm/xe/xe_guc_engine_busyness.h | 1 +
> drivers/gpu/drm/xe/xe_pmu.c | 74 +++++++++++++++++++--
> include/uapi/drm/xe_drm.h | 23 ++++++-
> 6 files changed, 162 insertions(+), 21 deletions(-)
>
> diff --git a/drivers/gpu/drm/xe/xe_gt.c b/drivers/gpu/drm/xe/xe_gt.c
> index 58daadc00363..5825471a3422 100644
> --- a/drivers/gpu/drm/xe/xe_gt.c
> +++ b/drivers/gpu/drm/xe/xe_gt.c
> @@ -789,3 +789,14 @@ u64 xe_gt_engine_busy_ticks(struct xe_gt *gt, struct xe_hw_engine *hwe)
> {
> return xe_guc_engine_busyness_ticks(>->uc.guc, hwe);
> }
> +
> +/**
> + * xe_gt_total_active_ticks - Return total active ticks
> + * @gt: GT structure
> + *
> + * Returns total active ticks that the GT was active for.
> + */
> +u64 xe_gt_total_active_ticks(struct xe_gt *gt)
> +{
> + return xe_guc_engine_busyness_active_ticks(>->uc.guc);
> +}
> diff --git a/drivers/gpu/drm/xe/xe_gt.h b/drivers/gpu/drm/xe/xe_gt.h
> index 4303ec48c404..9bac85cdf609 100644
> --- a/drivers/gpu/drm/xe/xe_gt.h
> +++ b/drivers/gpu/drm/xe/xe_gt.h
> @@ -43,7 +43,7 @@ void xe_gt_reset_async(struct xe_gt *gt);
> void xe_gt_sanitize(struct xe_gt *gt);
>
> u64 xe_gt_engine_busy_ticks(struct xe_gt *gt, struct xe_hw_engine *hwe);
> -
> +u64 xe_gt_total_active_ticks(struct xe_gt *gt);
> /**
> * xe_gt_any_hw_engine_by_reset_domain - scan the list of engines and return the
> * first that matches the same reset domain as @class
> diff --git a/drivers/gpu/drm/xe/xe_guc_engine_busyness.c b/drivers/gpu/drm/xe/xe_guc_engine_busyness.c
> index af7b6e768751..24e72555647a 100644
> --- a/drivers/gpu/drm/xe/xe_guc_engine_busyness.c
> +++ b/drivers/gpu/drm/xe/xe_guc_engine_busyness.c
> @@ -20,53 +20,83 @@
> * timer internal to GuC. The update rate is guaranteed to be at least 2Hz (but with
> * a caveat that is not real time, best effort only).
> *
> + * In addition to the engine busyness ticks, there is also a total time count which
> + * is a free running GT timestamp counter.
> + *
> + * Note that counters should be used as ratios of each other for calculating a
> + * percentage.
> + *
> * engine busyness ticks (ticks_engine) : clock ticks for which engine was active
> + * total active ticks (ticks_gt) : total clock ticks
> + *
> + * engine busyness % = (ticks_engine / ticks_gt) * 100
> */
>
> static void guc_engine_busyness_usage_map(struct xe_guc *guc,
> struct xe_hw_engine *hwe,
> - struct iosys_map *engine_map)
> + struct iosys_map *engine_map,
> + struct iosys_map *global_map)
> {
> struct iosys_map *map;
> size_t offset;
> u32 instance;
> u8 guc_class;
>
> - guc_class = xe_engine_class_to_guc_class(hwe->class);
> - instance = hwe->logical_instance;
> + if (hwe) {
> + guc_class = xe_engine_class_to_guc_class(hwe->class);
> + instance = hwe->logical_instance;
> + }
>
> map = &guc->busy.bo->vmap;
>
> - offset = offsetof(struct guc_engine_observation_data,
> - engine_data[guc_class][instance]);
> + if (hwe) {
> + offset = offsetof(struct guc_engine_observation_data,
> + engine_data[guc_class][instance]);
> + *engine_map = IOSYS_MAP_INIT_OFFSET(map, offset);
> + }
>
> - *engine_map = IOSYS_MAP_INIT_OFFSET(map, offset);
> + *global_map = IOSYS_MAP_INIT_OFFSET(map, 0);
> }
>
> static void guc_engine_busyness_get_usage(struct xe_guc *guc,
> struct xe_hw_engine *hwe,
> - u64 *ticks_engine)
> + u64 *ticks_engine,
> + u64 *ticks_gt)
> {
> - struct iosys_map engine_map;
> - u64 engine_ticks = 0;
> + struct iosys_map engine_map, global_map;
> + u64 engine_ticks = 0, gt_ticks = 0;
> int i = 0;
>
> - guc_engine_busyness_usage_map(guc, hwe, &engine_map);
> + guc_engine_busyness_usage_map(guc, hwe, &engine_map, &global_map);
>
> #define read_engine_usage(map_, field_) \
> iosys_map_rd_field(map_, 0, struct guc_engine_data, field_)
>
> +#define read_global_field(map_, field_) \
> + iosys_map_rd_field(map_, 0, struct guc_engine_observation_data, field_)
> +
> do {
> - engine_ticks = read_engine_usage(&engine_map, total_execution_ticks);
> + if (hwe)
> + engine_ticks = read_engine_usage(&engine_map, total_execution_ticks);
> +
> + gt_ticks = read_global_field(&global_map, gt_timestamp);
>
> - if (read_engine_usage(&engine_map, total_execution_ticks) == engine_ticks)
> + if (hwe && read_engine_usage(&engine_map, total_execution_ticks) != engine_ticks)
> + continue;
> +
> + if (read_global_field(&global_map, gt_timestamp) == gt_ticks)
> break;
> +
> } while (++i < 6);
>
> #undef read_engine_usage
> +#undef read_global_field
>
> if (ticks_engine)
> *ticks_engine = engine_ticks;
> +
> + if (ticks_gt)
> + *ticks_gt = gt_ticks;
> }
>
> static void guc_engine_busyness_enable_stats(struct xe_guc *guc)
> @@ -92,6 +122,22 @@ static void guc_engine_busyness_fini(struct drm_device *drm, void *arg)
> xe_bo_unpin_map_no_vm(guc->busy.bo);
> }
>
> +/*
> + * xe_guc_engine_busyness_active_ticks - Gets the total active ticks
> + * @guc: The GuC object
> + *
> + * Returns total active ticks that the GT has been running for
> + * (GuC loaded and running).
> + */
> +u64 xe_guc_engine_busyness_active_ticks(struct xe_guc *guc)
nit: Could we have the word gt somewhere in this function so its clear
what we are asking for? like xe_guc_engine_busyness_active_gt_ticks?
Other than that,
Reviewed-by: Vinay Belgaumkar <vinay.belgaumkar@intel.com>
> +{
> + u64 ticks_gt;
> +
> + guc_engine_busyness_get_usage(guc, NULL, NULL, &ticks_gt);
> +
> + return ticks_gt;
> +}
> +
> /*
> * xe_guc_engine_busyness_ticks - Gets current accumulated
> * engine busyness ticks
> @@ -104,7 +150,7 @@ u64 xe_guc_engine_busyness_ticks(struct xe_guc *guc, struct xe_hw_engine *hwe)
> {
> u64 ticks_engine;
>
> - guc_engine_busyness_get_usage(guc, hwe, &ticks_engine);
> + guc_engine_busyness_get_usage(guc, hwe, &ticks_engine, NULL);
>
> return ticks_engine;
> }
> diff --git a/drivers/gpu/drm/xe/xe_guc_engine_busyness.h b/drivers/gpu/drm/xe/xe_guc_engine_busyness.h
> index d70f06209896..57325910ebc4 100644
> --- a/drivers/gpu/drm/xe/xe_guc_engine_busyness.h
> +++ b/drivers/gpu/drm/xe/xe_guc_engine_busyness.h
> @@ -12,6 +12,7 @@ struct xe_hw_engine;
> struct xe_guc;
>
> int xe_guc_engine_busyness_init(struct xe_guc *guc);
> +u64 xe_guc_engine_busyness_active_ticks(struct xe_guc *guc);
> u64 xe_guc_engine_busyness_ticks(struct xe_guc *guc, struct xe_hw_engine *hwe);
>
> #endif
> diff --git a/drivers/gpu/drm/xe/xe_pmu.c b/drivers/gpu/drm/xe/xe_pmu.c
> index 371ca6d7e215..c2be157a6f5d 100644
> --- a/drivers/gpu/drm/xe/xe_pmu.c
> +++ b/drivers/gpu/drm/xe/xe_pmu.c
> @@ -51,6 +51,20 @@ static bool is_engine_event(struct perf_event *event)
> return config_counter(event->attr.config) < __DRM_XE_PMU_OTHER(0, 0);
> }
>
> +static int
> +config_status(struct xe_device *xe, u64 config)
> +{
> + unsigned int gt_id = config_gt_id(config);
> +
> + if (gt_id >= XE_PMU_MAX_GT)
> + return -ENOENT;
> +
> + if (config_counter(config) == DRM_XE_PMU_TOTAL_ACTIVE_TICKS(0))
> + return 0;
> +
> + return -ENOENT;
> +}
> +
> static int engine_event_status(struct xe_hw_engine *hwe,
> enum drm_xe_pmu_engine_sample sample)
> {
> @@ -113,11 +127,13 @@ static int xe_pmu_event_init(struct perf_event *event)
> if (!cpumask_test_cpu(event->cpu, &xe_pmu_cpumask))
> return -EINVAL;
>
> - if (is_engine_event(event)) {
> + if (is_engine_event(event))
> ret = engine_event_init(event);
> - if (ret)
> - return ret;
> - }
> + else
> + ret = config_status(xe, event->attr.config);
> +
> + if (ret)
> + return ret;
>
> if (!event->parent) {
> drm_dev_get(&xe->drm);
> @@ -131,7 +147,8 @@ static u64 __xe_pmu_event_read(struct perf_event *event)
> {
> struct xe_device *xe =
> container_of(event->pmu, typeof(*xe), pmu.base);
> - const unsigned int gt_id = config_gt_id(event->attr.config);
> + u64 config = event->attr.config;
> + const unsigned int gt_id = config_gt_id(config);
> struct xe_gt *gt = xe_device_get_gt(xe, gt_id);
> u64 val;
>
> @@ -147,6 +164,11 @@ static u64 __xe_pmu_event_read(struct perf_event *event)
> val = xe_gt_engine_busy_ticks(gt, hwe);
> else
> drm_warn(&xe->drm, "unknown pmu engine event\n");
> + } else {
> + if (config_counter(config) == DRM_XE_PMU_TOTAL_ACTIVE_TICKS(0))
> + val = xe_gt_total_active_ticks(gt);
> + else
> + drm_warn(&xe->drm, "unknown pmu event\n");
> }
>
> return val;
> @@ -256,6 +278,12 @@ static const struct attribute_group xe_pmu_cpumask_attr_group = {
> .attrs = xe_cpumask_attrs,
> };
>
> +#define __event(__counter, __name) \
> +{ \
> + .counter = (__counter), \
> + .name = (__name), \
> +}
> +
> #define __engine_event(__sample, __name) \
> { \
> .sample = (__sample), \
> @@ -293,6 +321,23 @@ create_event_attributes(struct xe_pmu *pmu)
> __engine_event(DRM_XE_PMU_SAMPLE_BUSY_TICKS, "busy-ticks"),
> };
>
> + static const struct {
> + unsigned int counter;
> + const char *name;
> + } events[] = {
> + __event(0, "total-active-ticks"),
> + };
> +
> + /* Count how many counters we will be exposing. */
> + for_each_gt(gt, xe, j) {
> + for (i = 0; i < ARRAY_SIZE(events); i++) {
> + u64 config = __DRM_XE_PMU_OTHER(j, events[i].counter);
> +
> + if (!config_status(xe, config))
> + count++;
> + }
> + }
> +
> for_each_gt(gt, xe, j) {
> for_each_hw_engine(hwe, gt, id) {
> for (i = 0; i < ARRAY_SIZE(engine_events); i++) {
> @@ -315,6 +360,25 @@ create_event_attributes(struct xe_pmu *pmu)
> xe_iter = xe_attr;
> attr_iter = attr;
>
> + /* Initialize supported non-engine counters */
> + for_each_gt(gt, xe, j) {
> + for (i = 0; i < ARRAY_SIZE(events); i++) {
> + u64 config = __DRM_XE_PMU_OTHER(j, events[i].counter);
> + char *str;
> +
> + if (config_status(xe, config))
> + continue;
> +
> + str = kasprintf(GFP_KERNEL, "%s-gt%u",
> + events[i].name, j);
> + if (!str)
> + goto err;
> +
> + *attr_iter++ = &xe_iter->attr.attr;
> + xe_iter = add_xe_attr(xe_iter, str, config);
> + }
> + }
> +
> /* Initialize supported engine counters */
> for_each_gt(gt, xe, j) {
> for_each_hw_engine(hwe, gt, id) {
> diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h
> index f8456cda5cda..3134930b0160 100644
> --- a/include/uapi/drm/xe_drm.h
> +++ b/include/uapi/drm/xe_drm.h
> @@ -1341,12 +1341,29 @@ struct drm_xe_wait_user_fence {
> };
>
> /**
> - * DOC: XE PMU event config IDs
> + * DOC: XE PMU Event Config ID's
> *
> - * Check 'man perf_event_open' to use the ID's DRM_XE_PMU_XXXX listed in xe_drm.h
> + * Check 'man perf_event_open' to use the ID's DRM_XE_PMU_XXXX listed here
> * in 'struct perf_event_attr' as part of perf_event_open syscall to read a
> * particular event.
> *
> + * For example to open the DRM_XE_PMU_TOTAL_ACTIVE_TICKS(0):
> + *
> + * .. code-block:: C
> + *
> + * struct perf_event_attr attr;
> + * long long count;
> + * int cpu = 0;
> + * int fd;
> + *
> + * memset(&attr, 0, sizeof(struct perf_event_attr));
> + * attr.type = type; // eg: /sys/bus/event_source/devices/xe_0000_03_00.0/type
> + * attr.read_format = PERF_FORMAT_TOTAL_TIME_ENABLED;
> + * attr.use_clockid = 1;
> + * attr.clockid = CLOCK_MONOTONIC;
> + * attr.config = DRM_XE_PMU_TOTAL_ACTIVE_TICKS(0);
> + *
> + * fd = syscall(__NR_perf_event_open, &attr, -1, cpu, -1, 0);
> */
>
> /**
> @@ -1381,6 +1398,8 @@ enum drm_xe_pmu_engine_sample {
> __DRM_XE_PMU_GT_EVENT(gt, __DRM_XE_PMU_ENGINE(class, instance, \
> DRM_XE_PMU_SAMPLE_BUSY_TICKS))
>
> +#define DRM_XE_PMU_TOTAL_ACTIVE_TICKS(gt) __DRM_XE_PMU_OTHER(gt, 0)
> +
> #if defined(__cplusplus)
> }
> #endif
[-- Attachment #2: Type: text/html, Size: 13769 bytes --]
^ permalink raw reply [flat|nested] 23+ messages in thread* Re: [PATCH v4 5/8] drm/xe/guc: Add PMU counter for total active ticks
2023-12-22 7:45 ` [PATCH v4 5/8] drm/xe/guc: Add PMU counter for total active ticks Riana Tauro
2023-12-22 20:03 ` Belgaumkar, Vinay
@ 2024-01-03 6:54 ` Aravind Iddamsetty
1 sibling, 0 replies; 23+ messages in thread
From: Aravind Iddamsetty @ 2024-01-03 6:54 UTC (permalink / raw)
To: Riana Tauro, intel-xe
On 12/22/23 13:15, Riana Tauro wrote:
> GuC provides engine busyness ticks as a 64 bit counter which count
> as clock ticks. These counters are maintained in a
> shared memory buffer and internally updated on a continuous basis.
>
> GuC also provides a periodically total active ticks that GT has been
> active for (GuC loaded and running).
> This counter is exposed to the user such that busyness can
> be calculated as a percentage using
>
> busyness % = (engine active ticks/total active ticks) * 100.
>
> This patch provides a pmu counter for total active ticks.
>
> This is listed by perf tool as
>
> sudo ./perf list
> xe_0000_03_00.0/total-active-ticks-gt0/ [Kernel PMU event]
>
> and can be read using
>
> sudo ./perf stat -e xe_0000_03_00.0/total-active-ticks-gt0/ -I 1000
> time counts unit events
> 1.001332764 58942964 xe_0000_03_00.0/total-active-ticks-gt0/
> 2.011421147 21191869 xe_0000_03_00.0/total-active-ticks-gt0/
> 3.013223865 19269012 xe_0000_03_00.0/total-active-ticks-gt0/
>
> v2: change commit message and comment for
> total active ticks (Umesh, Tvrtko)
>
> Co-developed-by: Umesh Nerlige Ramappa <umesh.nerlige.ramappa@intel.com>
> Signed-off-by: Umesh Nerlige Ramappa <umesh.nerlige.ramappa@intel.com>
> Signed-off-by: Riana Tauro <riana.tauro@intel.com>
> ---
> drivers/gpu/drm/xe/xe_gt.c | 11 +++
> drivers/gpu/drm/xe/xe_gt.h | 2 +-
> drivers/gpu/drm/xe/xe_guc_engine_busyness.c | 72 ++++++++++++++++----
> drivers/gpu/drm/xe/xe_guc_engine_busyness.h | 1 +
> drivers/gpu/drm/xe/xe_pmu.c | 74 +++++++++++++++++++--
> include/uapi/drm/xe_drm.h | 23 ++++++-
> 6 files changed, 162 insertions(+), 21 deletions(-)
>
<snip>
> diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h
> index f8456cda5cda..3134930b0160 100644
> --- a/include/uapi/drm/xe_drm.h
> +++ b/include/uapi/drm/xe_drm.h
> @@ -1341,12 +1341,29 @@ struct drm_xe_wait_user_fence {
> };
>
> /**
> - * DOC: XE PMU event config IDs
> + * DOC: XE PMU Event Config ID's
> *
> - * Check 'man perf_event_open' to use the ID's DRM_XE_PMU_XXXX listed in xe_drm.h
the reason for mentioning xe_drm.h is the defines will not be included in the documentation
> + * Check 'man perf_event_open' to use the ID's DRM_XE_PMU_XXXX listed here
> * in 'struct perf_event_attr' as part of perf_event_open syscall to read a
> * particular event.
> *
> + * For example to open the DRM_XE_PMU_TOTAL_ACTIVE_TICKS(0):
> + *
> + * .. code-block:: C
> + *
> + * struct perf_event_attr attr;
> + * long long count;
> + * int cpu = 0;
> + * int fd;
> + *
> + * memset(&attr, 0, sizeof(struct perf_event_attr));
> + * attr.type = type; // eg: /sys/bus/event_source/devices/xe_0000_03_00.0/type
> + * attr.read_format = PERF_FORMAT_TOTAL_TIME_ENABLED;
> + * attr.use_clockid = 1;
> + * attr.clockid = CLOCK_MONOTONIC;
> + * attr.config = DRM_XE_PMU_TOTAL_ACTIVE_TICKS(0);
> + *
> + * fd = syscall(__NR_perf_event_open, &attr, -1, cpu, -1, 0);
> */
>
> /**
> @@ -1381,6 +1398,8 @@ enum drm_xe_pmu_engine_sample {
> __DRM_XE_PMU_GT_EVENT(gt, __DRM_XE_PMU_ENGINE(class, instance, \
> DRM_XE_PMU_SAMPLE_BUSY_TICKS))
>
> +#define DRM_XE_PMU_TOTAL_ACTIVE_TICKS(gt) __DRM_XE_PMU_OTHER(gt, 0)
> +
> #if defined(__cplusplus)
> }
> #endif
Thanks,
Aravind.
^ permalink raw reply [flat|nested] 23+ messages in thread
* [PATCH v4 6/8] drm/xe/guc: Expose engine busyness only for supported GuC version
2023-12-22 7:45 [PATCH v4 0/8] Engine busyness Riana Tauro
` (4 preceding siblings ...)
2023-12-22 7:45 ` [PATCH v4 5/8] drm/xe/guc: Add PMU counter for total active ticks Riana Tauro
@ 2023-12-22 7:46 ` Riana Tauro
2024-01-18 6:13 ` Nilawar, Badal
2023-12-22 7:46 ` [PATCH v4 7/8] drm/xe/guc: Dynamically enable/disable engine busyness stats Riana Tauro
2023-12-22 7:46 ` [PATCH v4 8/8] drm/xe/guc: Handle runtime suspend issues for engine busyness Riana Tauro
7 siblings, 1 reply; 23+ messages in thread
From: Riana Tauro @ 2023-12-22 7:46 UTC (permalink / raw)
To: intel-xe
Guc version numbers are 8 bits only so convert to 32 bit 8.8.8
to allow version comparisions. use compatibility version
for the same.
Engine busyness is supported only on GuC versions >= 70.11.1.
Allow enabling/reading engine busyness only on supported
GuC versions. Warn once if not supported.
v2: rebase
fix guc comparison error (Matthew Brost)
add a macro for guc version comparison
v3: do not show pmu counters if guc engine busyness
is not supported
v4: add version check comment only in the check function
remove it otherwise (Umesh)
Signed-off-by: Riana Tauro <riana.tauro@intel.com>
Reviewed-by: Umesh Nerlige Ramappa <umesh.nerlige.ramappa@intel.com>
---
drivers/gpu/drm/xe/xe_gt.c | 11 ++++++
drivers/gpu/drm/xe/xe_gt.h | 1 +
drivers/gpu/drm/xe/xe_guc_engine_busyness.c | 37 +++++++++++++++++++++
drivers/gpu/drm/xe/xe_guc_engine_busyness.h | 2 +-
drivers/gpu/drm/xe/xe_pmu.c | 12 +++++--
5 files changed, 59 insertions(+), 4 deletions(-)
diff --git a/drivers/gpu/drm/xe/xe_gt.c b/drivers/gpu/drm/xe/xe_gt.c
index 5825471a3422..a48cceaa7750 100644
--- a/drivers/gpu/drm/xe/xe_gt.c
+++ b/drivers/gpu/drm/xe/xe_gt.c
@@ -800,3 +800,14 @@ u64 xe_gt_total_active_ticks(struct xe_gt *gt)
{
return xe_guc_engine_busyness_active_ticks(>->uc.guc);
}
+
+/**
+ * xe_gt_engine_busyness_supported - Checks support for engine busyness
+ * @gt: GT structure
+ *
+ * Returns true if engine busyness is supported, false otherwise.
+ */
+bool xe_gt_engine_busyness_supported(struct xe_gt *gt)
+{
+ return xe_guc_engine_busyness_supported(>->uc.guc);
+}
diff --git a/drivers/gpu/drm/xe/xe_gt.h b/drivers/gpu/drm/xe/xe_gt.h
index 9bac85cdf609..bef99eb2fed2 100644
--- a/drivers/gpu/drm/xe/xe_gt.h
+++ b/drivers/gpu/drm/xe/xe_gt.h
@@ -42,6 +42,7 @@ int xe_gt_resume(struct xe_gt *gt);
void xe_gt_reset_async(struct xe_gt *gt);
void xe_gt_sanitize(struct xe_gt *gt);
+bool xe_gt_engine_busyness_supported(struct xe_gt *gt);
u64 xe_gt_engine_busy_ticks(struct xe_gt *gt, struct xe_hw_engine *hwe);
u64 xe_gt_total_active_ticks(struct xe_gt *gt);
/**
diff --git a/drivers/gpu/drm/xe/xe_guc_engine_busyness.c b/drivers/gpu/drm/xe/xe_guc_engine_busyness.c
index 24e72555647a..2dd06563d0ad 100644
--- a/drivers/gpu/drm/xe/xe_guc_engine_busyness.c
+++ b/drivers/gpu/drm/xe/xe_guc_engine_busyness.c
@@ -32,6 +32,9 @@
* engine busyness % = (ticks_engine / ticks_gt) * 100
*/
+/* GuC version number components are only 8-bit, so converting to a 32bit 8.8.8 */
+#define GUC_VER(maj, min, pat) (((maj) << 16) | ((min) << 8) | (pat))
+
static void guc_engine_busyness_usage_map(struct xe_guc *guc,
struct xe_hw_engine *hwe,
struct iosys_map *engine_map,
@@ -110,6 +113,9 @@ static void guc_engine_busyness_enable_stats(struct xe_guc *guc)
struct xe_device *xe = guc_to_xe(guc);
int ret;
+ if (!xe_guc_engine_busyness_supported(guc))
+ return;
+
ret = xe_guc_ct_send(&guc->ct, action, ARRAY_SIZE(action), 0, 0);
if (ret)
drm_err(&xe->drm, "Failed to enable usage stats %pe", ERR_PTR(ret));
@@ -122,6 +128,28 @@ static void guc_engine_busyness_fini(struct drm_device *drm, void *arg)
xe_bo_unpin_map_no_vm(guc->busy.bo);
}
+/*
+ * xe_guc_engine_busynes_supported- check if engine busyness is supported
+ * @guc: The GuC object
+ *
+ * Engine busyness is supported only above guc 70.11.1
+ *
+ * Returns true if supported, false otherwise
+ */
+bool xe_guc_engine_busyness_supported(struct xe_guc *guc)
+{
+ struct xe_uc_fw *uc_fw = &guc->fw;
+ struct xe_uc_fw_version *version = &uc_fw->versions.found[XE_UC_FW_VER_COMPATIBILITY];
+
+ if (GUC_VER(version->major, version->minor, version->patch) >= GUC_VER(1, 3, 1))
+ return true;
+
+ drm_WARN_ON_ONCE(&guc_to_xe(guc)->drm,
+ "Engine busyness supported from 70.11.1 GuC version\n");
+
+ return false;
+}
+
/*
* xe_guc_engine_busyness_active_ticks - Gets the total active ticks
* @guc: The GuC object
@@ -133,6 +161,9 @@ u64 xe_guc_engine_busyness_active_ticks(struct xe_guc *guc)
{
u64 ticks_gt;
+ if (!xe_guc_engine_busyness_supported(guc))
+ return 0;
+
guc_engine_busyness_get_usage(guc, NULL, NULL, &ticks_gt);
return ticks_gt;
@@ -150,6 +181,9 @@ u64 xe_guc_engine_busyness_ticks(struct xe_guc *guc, struct xe_hw_engine *hwe)
{
u64 ticks_engine;
+ if (!xe_guc_engine_busyness_supported(guc))
+ return 0;
+
guc_engine_busyness_get_usage(guc, hwe, &ticks_engine, NULL);
return ticks_engine;
@@ -173,6 +207,9 @@ int xe_guc_engine_busyness_init(struct xe_guc *guc)
u32 size;
int err;
+ if (!xe_guc_engine_busyness_supported(guc))
+ return 0;
+
/* Initialization already done */
if (guc->busy.bo)
return 0;
diff --git a/drivers/gpu/drm/xe/xe_guc_engine_busyness.h b/drivers/gpu/drm/xe/xe_guc_engine_busyness.h
index 57325910ebc4..e3c74e0236af 100644
--- a/drivers/gpu/drm/xe/xe_guc_engine_busyness.h
+++ b/drivers/gpu/drm/xe/xe_guc_engine_busyness.h
@@ -14,5 +14,5 @@ struct xe_guc;
int xe_guc_engine_busyness_init(struct xe_guc *guc);
u64 xe_guc_engine_busyness_active_ticks(struct xe_guc *guc);
u64 xe_guc_engine_busyness_ticks(struct xe_guc *guc, struct xe_hw_engine *hwe);
-
+bool xe_guc_engine_busyness_supported(struct xe_guc *guc);
#endif
diff --git a/drivers/gpu/drm/xe/xe_pmu.c b/drivers/gpu/drm/xe/xe_pmu.c
index c2be157a6f5d..f91652886b67 100644
--- a/drivers/gpu/drm/xe/xe_pmu.c
+++ b/drivers/gpu/drm/xe/xe_pmu.c
@@ -55,14 +55,16 @@ static int
config_status(struct xe_device *xe, u64 config)
{
unsigned int gt_id = config_gt_id(config);
+ struct xe_gt *gt = xe_device_get_gt(xe, gt_id);
if (gt_id >= XE_PMU_MAX_GT)
return -ENOENT;
- if (config_counter(config) == DRM_XE_PMU_TOTAL_ACTIVE_TICKS(0))
- return 0;
+ if (config_counter(config) == DRM_XE_PMU_TOTAL_ACTIVE_TICKS(0) &&
+ !xe_gt_engine_busyness_supported(gt))
+ return -ENOENT;
- return -ENOENT;
+ return 0;
}
static int engine_event_status(struct xe_hw_engine *hwe,
@@ -71,6 +73,10 @@ static int engine_event_status(struct xe_hw_engine *hwe,
if (!hwe)
return -ENODEV;
+ if (sample == DRM_XE_PMU_SAMPLE_BUSY_TICKS &&
+ !xe_gt_engine_busyness_supported(hwe->gt))
+ return -ENOENT;
+
/* Other engine events will be added, XE_ENGINE_SAMPLE_COUNT will be changed */
return (sample >= DRM_XE_PMU_SAMPLE_BUSY_TICKS && sample < XE_ENGINE_SAMPLE_COUNT)
? 0 : -ENOENT;
--
2.40.0
^ permalink raw reply related [flat|nested] 23+ messages in thread* Re: [PATCH v4 6/8] drm/xe/guc: Expose engine busyness only for supported GuC version
2023-12-22 7:46 ` [PATCH v4 6/8] drm/xe/guc: Expose engine busyness only for supported GuC version Riana Tauro
@ 2024-01-18 6:13 ` Nilawar, Badal
2024-01-19 10:13 ` Riana Tauro
0 siblings, 1 reply; 23+ messages in thread
From: Nilawar, Badal @ 2024-01-18 6:13 UTC (permalink / raw)
To: Riana Tauro, intel-xe
Hi Riana,
On 22-12-2023 13:16, Riana Tauro wrote:
> Guc version numbers are 8 bits only so convert to 32 bit 8.8.8
> to allow version comparisions. use compatibility version
> for the same.
>
> Engine busyness is supported only on GuC versions >= 70.11.1.
> Allow enabling/reading engine busyness only on supported
> GuC versions. Warn once if not supported.
>
> v2: rebase
> fix guc comparison error (Matthew Brost)
> add a macro for guc version comparison
>
> v3: do not show pmu counters if guc engine busyness
> is not supported
>
> v4: add version check comment only in the check function
> remove it otherwise (Umesh)
>
> Signed-off-by: Riana Tauro <riana.tauro@intel.com>
> Reviewed-by: Umesh Nerlige Ramappa <umesh.nerlige.ramappa@intel.com>
> ---
> drivers/gpu/drm/xe/xe_gt.c | 11 ++++++
> drivers/gpu/drm/xe/xe_gt.h | 1 +
> drivers/gpu/drm/xe/xe_guc_engine_busyness.c | 37 +++++++++++++++++++++
> drivers/gpu/drm/xe/xe_guc_engine_busyness.h | 2 +-
> drivers/gpu/drm/xe/xe_pmu.c | 12 +++++--
> 5 files changed, 59 insertions(+), 4 deletions(-)
>
> diff --git a/drivers/gpu/drm/xe/xe_gt.c b/drivers/gpu/drm/xe/xe_gt.c
> index 5825471a3422..a48cceaa7750 100644
> --- a/drivers/gpu/drm/xe/xe_gt.c
> +++ b/drivers/gpu/drm/xe/xe_gt.c
> @@ -800,3 +800,14 @@ u64 xe_gt_total_active_ticks(struct xe_gt *gt)
> {
> return xe_guc_engine_busyness_active_ticks(>->uc.guc);
> }
> +
> +/**
> + * xe_gt_engine_busyness_supported - Checks support for engine busyness
> + * @gt: GT structure
> + *
> + * Returns true if engine busyness is supported, false otherwise.
> + */
> +bool xe_gt_engine_busyness_supported(struct xe_gt *gt)
> +{
> + return xe_guc_engine_busyness_supported(>->uc.guc);
> +}
> diff --git a/drivers/gpu/drm/xe/xe_gt.h b/drivers/gpu/drm/xe/xe_gt.h
> index 9bac85cdf609..bef99eb2fed2 100644
> --- a/drivers/gpu/drm/xe/xe_gt.h
> +++ b/drivers/gpu/drm/xe/xe_gt.h
> @@ -42,6 +42,7 @@ int xe_gt_resume(struct xe_gt *gt);
> void xe_gt_reset_async(struct xe_gt *gt);
> void xe_gt_sanitize(struct xe_gt *gt);
>
> +bool xe_gt_engine_busyness_supported(struct xe_gt *gt);
> u64 xe_gt_engine_busy_ticks(struct xe_gt *gt, struct xe_hw_engine *hwe);
> u64 xe_gt_total_active_ticks(struct xe_gt *gt);
> /**
> diff --git a/drivers/gpu/drm/xe/xe_guc_engine_busyness.c b/drivers/gpu/drm/xe/xe_guc_engine_busyness.c
> index 24e72555647a..2dd06563d0ad 100644
> --- a/drivers/gpu/drm/xe/xe_guc_engine_busyness.c
> +++ b/drivers/gpu/drm/xe/xe_guc_engine_busyness.c
> @@ -32,6 +32,9 @@
> * engine busyness % = (ticks_engine / ticks_gt) * 100
> */
>
> +/* GuC version number components are only 8-bit, so converting to a 32bit 8.8.8 */
> +#define GUC_VER(maj, min, pat) (((maj) << 16) | ((min) << 8) | (pat))
> +
> static void guc_engine_busyness_usage_map(struct xe_guc *guc,
> struct xe_hw_engine *hwe,
> struct iosys_map *engine_map,
> @@ -110,6 +113,9 @@ static void guc_engine_busyness_enable_stats(struct xe_guc *guc)
> struct xe_device *xe = guc_to_xe(guc);
> int ret;
>
> + if (!xe_guc_engine_busyness_supported(guc))
> + return;
> +
> ret = xe_guc_ct_send(&guc->ct, action, ARRAY_SIZE(action), 0, 0);
> if (ret)
> drm_err(&xe->drm, "Failed to enable usage stats %pe", ERR_PTR(ret));
> @@ -122,6 +128,28 @@ static void guc_engine_busyness_fini(struct drm_device *drm, void *arg)
> xe_bo_unpin_map_no_vm(guc->busy.bo);
> }
>
> +/*
> + * xe_guc_engine_busynes_supported- check if engine busyness is supported
> + * @guc: The GuC object
> + *
> + * Engine busyness is supported only above guc 70.11.1
> + *
> + * Returns true if supported, false otherwise
> + */
> +bool xe_guc_engine_busyness_supported(struct xe_guc *guc)
> +{
> + struct xe_uc_fw *uc_fw = &guc->fw;
> + struct xe_uc_fw_version *version = &uc_fw->versions.found[XE_UC_FW_VER_COMPATIBILITY];
Why not XE_UC_FW_VER_RELEASE here? Or should we check firmware type
(compatibility or release) first and then derive version from it.
Regards,
Badal
> +
> + if (GUC_VER(version->major, version->minor, version->patch) >= GUC_VER(1, 3, 1))
> + return true;
> +
> + drm_WARN_ON_ONCE(&guc_to_xe(guc)->drm,
> + "Engine busyness supported from 70.11.1 GuC version\n");
> +
> + return false;
> +}
> +
> /*
> * xe_guc_engine_busyness_active_ticks - Gets the total active ticks
> * @guc: The GuC object
> @@ -133,6 +161,9 @@ u64 xe_guc_engine_busyness_active_ticks(struct xe_guc *guc)
> {
> u64 ticks_gt;
>
> + if (!xe_guc_engine_busyness_supported(guc))
> + return 0;
> +
> guc_engine_busyness_get_usage(guc, NULL, NULL, &ticks_gt);
>
> return ticks_gt;
> @@ -150,6 +181,9 @@ u64 xe_guc_engine_busyness_ticks(struct xe_guc *guc, struct xe_hw_engine *hwe)
> {
> u64 ticks_engine;
>
> + if (!xe_guc_engine_busyness_supported(guc))
> + return 0;
> +
> guc_engine_busyness_get_usage(guc, hwe, &ticks_engine, NULL);
>
> return ticks_engine;
> @@ -173,6 +207,9 @@ int xe_guc_engine_busyness_init(struct xe_guc *guc)
> u32 size;
> int err;
>
> + if (!xe_guc_engine_busyness_supported(guc))
> + return 0;
> +
> /* Initialization already done */
> if (guc->busy.bo)
> return 0;
> diff --git a/drivers/gpu/drm/xe/xe_guc_engine_busyness.h b/drivers/gpu/drm/xe/xe_guc_engine_busyness.h
> index 57325910ebc4..e3c74e0236af 100644
> --- a/drivers/gpu/drm/xe/xe_guc_engine_busyness.h
> +++ b/drivers/gpu/drm/xe/xe_guc_engine_busyness.h
> @@ -14,5 +14,5 @@ struct xe_guc;
> int xe_guc_engine_busyness_init(struct xe_guc *guc);
> u64 xe_guc_engine_busyness_active_ticks(struct xe_guc *guc);
> u64 xe_guc_engine_busyness_ticks(struct xe_guc *guc, struct xe_hw_engine *hwe);
> -
> +bool xe_guc_engine_busyness_supported(struct xe_guc *guc);
> #endif
> diff --git a/drivers/gpu/drm/xe/xe_pmu.c b/drivers/gpu/drm/xe/xe_pmu.c
> index c2be157a6f5d..f91652886b67 100644
> --- a/drivers/gpu/drm/xe/xe_pmu.c
> +++ b/drivers/gpu/drm/xe/xe_pmu.c
> @@ -55,14 +55,16 @@ static int
> config_status(struct xe_device *xe, u64 config)
> {
> unsigned int gt_id = config_gt_id(config);
> + struct xe_gt *gt = xe_device_get_gt(xe, gt_id);
>
> if (gt_id >= XE_PMU_MAX_GT)
> return -ENOENT;
>
> - if (config_counter(config) == DRM_XE_PMU_TOTAL_ACTIVE_TICKS(0))
> - return 0;
> + if (config_counter(config) == DRM_XE_PMU_TOTAL_ACTIVE_TICKS(0) &&
> + !xe_gt_engine_busyness_supported(gt))
> + return -ENOENT;
>
> - return -ENOENT;
> + return 0;
> }
>
> static int engine_event_status(struct xe_hw_engine *hwe,
> @@ -71,6 +73,10 @@ static int engine_event_status(struct xe_hw_engine *hwe,
> if (!hwe)
> return -ENODEV;
>
> + if (sample == DRM_XE_PMU_SAMPLE_BUSY_TICKS &&
> + !xe_gt_engine_busyness_supported(hwe->gt))
> + return -ENOENT;
> +
> /* Other engine events will be added, XE_ENGINE_SAMPLE_COUNT will be changed */
> return (sample >= DRM_XE_PMU_SAMPLE_BUSY_TICKS && sample < XE_ENGINE_SAMPLE_COUNT)
> ? 0 : -ENOENT;
^ permalink raw reply [flat|nested] 23+ messages in thread* Re: [PATCH v4 6/8] drm/xe/guc: Expose engine busyness only for supported GuC version
2024-01-18 6:13 ` Nilawar, Badal
@ 2024-01-19 10:13 ` Riana Tauro
2024-01-19 12:18 ` Nilawar, Badal
0 siblings, 1 reply; 23+ messages in thread
From: Riana Tauro @ 2024-01-19 10:13 UTC (permalink / raw)
To: Nilawar, Badal, intel-xe
On 1/18/2024 11:43 AM, Nilawar, Badal wrote:
> Hi Riana,
>
> On 22-12-2023 13:16, Riana Tauro wrote:
>> Guc version numbers are 8 bits only so convert to 32 bit 8.8.8
>> to allow version comparisions. use compatibility version
>> for the same.
>>
>> Engine busyness is supported only on GuC versions >= 70.11.1.
>> Allow enabling/reading engine busyness only on supported
>> GuC versions. Warn once if not supported.
>>
>> v2: rebase
>> fix guc comparison error (Matthew Brost)
>> add a macro for guc version comparison
>>
>> v3: do not show pmu counters if guc engine busyness
>> is not supported
>>
>> v4: add version check comment only in the check function
>> remove it otherwise (Umesh)
>>
>> Signed-off-by: Riana Tauro <riana.tauro@intel.com>
>> Reviewed-by: Umesh Nerlige Ramappa <umesh.nerlige.ramappa@intel.com>
>> ---
>> drivers/gpu/drm/xe/xe_gt.c | 11 ++++++
>> drivers/gpu/drm/xe/xe_gt.h | 1 +
>> drivers/gpu/drm/xe/xe_guc_engine_busyness.c | 37 +++++++++++++++++++++
>> drivers/gpu/drm/xe/xe_guc_engine_busyness.h | 2 +-
>> drivers/gpu/drm/xe/xe_pmu.c | 12 +++++--
>> 5 files changed, 59 insertions(+), 4 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/xe/xe_gt.c b/drivers/gpu/drm/xe/xe_gt.c
>> index 5825471a3422..a48cceaa7750 100644
>> --- a/drivers/gpu/drm/xe/xe_gt.c
>> +++ b/drivers/gpu/drm/xe/xe_gt.c
>> @@ -800,3 +800,14 @@ u64 xe_gt_total_active_ticks(struct xe_gt *gt)
>> {
>> return xe_guc_engine_busyness_active_ticks(>->uc.guc);
>> }
>> +
>> +/**
>> + * xe_gt_engine_busyness_supported - Checks support for engine busyness
>> + * @gt: GT structure
>> + *
>> + * Returns true if engine busyness is supported, false otherwise.
>> + */
>> +bool xe_gt_engine_busyness_supported(struct xe_gt *gt)
>> +{
>> + return xe_guc_engine_busyness_supported(>->uc.guc);
>> +}
>> diff --git a/drivers/gpu/drm/xe/xe_gt.h b/drivers/gpu/drm/xe/xe_gt.h
>> index 9bac85cdf609..bef99eb2fed2 100644
>> --- a/drivers/gpu/drm/xe/xe_gt.h
>> +++ b/drivers/gpu/drm/xe/xe_gt.h
>> @@ -42,6 +42,7 @@ int xe_gt_resume(struct xe_gt *gt);
>> void xe_gt_reset_async(struct xe_gt *gt);
>> void xe_gt_sanitize(struct xe_gt *gt);
>> +bool xe_gt_engine_busyness_supported(struct xe_gt *gt);
>> u64 xe_gt_engine_busy_ticks(struct xe_gt *gt, struct xe_hw_engine
>> *hwe);
>> u64 xe_gt_total_active_ticks(struct xe_gt *gt);
>> /**
>> diff --git a/drivers/gpu/drm/xe/xe_guc_engine_busyness.c
>> b/drivers/gpu/drm/xe/xe_guc_engine_busyness.c
>> index 24e72555647a..2dd06563d0ad 100644
>> --- a/drivers/gpu/drm/xe/xe_guc_engine_busyness.c
>> +++ b/drivers/gpu/drm/xe/xe_guc_engine_busyness.c
>> @@ -32,6 +32,9 @@
>> * engine busyness % = (ticks_engine / ticks_gt) * 100
>> */
>> +/* GuC version number components are only 8-bit, so converting to a
>> 32bit 8.8.8 */
>> +#define GUC_VER(maj, min, pat) (((maj) << 16) | ((min) << 8) | (pat))
>> +
>> static void guc_engine_busyness_usage_map(struct xe_guc *guc,
>> struct xe_hw_engine *hwe,
>> struct iosys_map *engine_map,
>> @@ -110,6 +113,9 @@ static void
>> guc_engine_busyness_enable_stats(struct xe_guc *guc)
>> struct xe_device *xe = guc_to_xe(guc);
>> int ret;
>> + if (!xe_guc_engine_busyness_supported(guc))
>> + return;
>> +
>> ret = xe_guc_ct_send(&guc->ct, action, ARRAY_SIZE(action), 0, 0);
>> if (ret)
>> drm_err(&xe->drm, "Failed to enable usage stats %pe",
>> ERR_PTR(ret));
>> @@ -122,6 +128,28 @@ static void guc_engine_busyness_fini(struct
>> drm_device *drm, void *arg)
>> xe_bo_unpin_map_no_vm(guc->busy.bo);
>> }
>> +/*
>> + * xe_guc_engine_busynes_supported- check if engine busyness is
>> supported
>> + * @guc: The GuC object
>> + *
>> + * Engine busyness is supported only above guc 70.11.1
>> + *
>> + * Returns true if supported, false otherwise
>> + */
>> +bool xe_guc_engine_busyness_supported(struct xe_guc *guc)
>> +{
>> + struct xe_uc_fw *uc_fw = &guc->fw;
>> + struct xe_uc_fw_version *version =
>> &uc_fw->versions.found[XE_UC_FW_VER_COMPATIBILITY];
> Why not XE_UC_FW_VER_RELEASE here? Or should we check firmware type
> (compatibility or release) first and then derive version from it.
Hi Badal
Even Release can be used. Used submission version (VF compatibility
version).
Why do we have to check firmware type ?
However, This patch is currently on hold.
Thanks
Riana
>
> Regards,
> Badal
>> +
>> + if (GUC_VER(version->major, version->minor, version->patch) >=
>> GUC_VER(1, 3, 1))
>> + return true;
>> +
>> + drm_WARN_ON_ONCE(&guc_to_xe(guc)->drm,
>> + "Engine busyness supported from 70.11.1 GuC version\n");
>> +
>> + return false;
>> +}
>> +
>> /*
>> * xe_guc_engine_busyness_active_ticks - Gets the total active ticks
>> * @guc: The GuC object
>> @@ -133,6 +161,9 @@ u64 xe_guc_engine_busyness_active_ticks(struct
>> xe_guc *guc)
>> {
>> u64 ticks_gt;
>> + if (!xe_guc_engine_busyness_supported(guc))
>> + return 0;
>> +
>> guc_engine_busyness_get_usage(guc, NULL, NULL, &ticks_gt);
>> return ticks_gt;
>> @@ -150,6 +181,9 @@ u64 xe_guc_engine_busyness_ticks(struct xe_guc
>> *guc, struct xe_hw_engine *hwe)
>> {
>> u64 ticks_engine;
>> + if (!xe_guc_engine_busyness_supported(guc))
>> + return 0;
>> +
>> guc_engine_busyness_get_usage(guc, hwe, &ticks_engine, NULL);
>> return ticks_engine;
>> @@ -173,6 +207,9 @@ int xe_guc_engine_busyness_init(struct xe_guc *guc)
>> u32 size;
>> int err;
>> + if (!xe_guc_engine_busyness_supported(guc))
>> + return 0;
>> +
>> /* Initialization already done */
>> if (guc->busy.bo)
>> return 0;
>> diff --git a/drivers/gpu/drm/xe/xe_guc_engine_busyness.h
>> b/drivers/gpu/drm/xe/xe_guc_engine_busyness.h
>> index 57325910ebc4..e3c74e0236af 100644
>> --- a/drivers/gpu/drm/xe/xe_guc_engine_busyness.h
>> +++ b/drivers/gpu/drm/xe/xe_guc_engine_busyness.h
>> @@ -14,5 +14,5 @@ struct xe_guc;
>> int xe_guc_engine_busyness_init(struct xe_guc *guc);
>> u64 xe_guc_engine_busyness_active_ticks(struct xe_guc *guc);
>> u64 xe_guc_engine_busyness_ticks(struct xe_guc *guc, struct
>> xe_hw_engine *hwe);
>> -
>> +bool xe_guc_engine_busyness_supported(struct xe_guc *guc);
>> #endif
>> diff --git a/drivers/gpu/drm/xe/xe_pmu.c b/drivers/gpu/drm/xe/xe_pmu.c
>> index c2be157a6f5d..f91652886b67 100644
>> --- a/drivers/gpu/drm/xe/xe_pmu.c
>> +++ b/drivers/gpu/drm/xe/xe_pmu.c
>> @@ -55,14 +55,16 @@ static int
>> config_status(struct xe_device *xe, u64 config)
>> {
>> unsigned int gt_id = config_gt_id(config);
>> + struct xe_gt *gt = xe_device_get_gt(xe, gt_id);
>> if (gt_id >= XE_PMU_MAX_GT)
>> return -ENOENT;
>> - if (config_counter(config) == DRM_XE_PMU_TOTAL_ACTIVE_TICKS(0))
>> - return 0;
>> + if (config_counter(config) == DRM_XE_PMU_TOTAL_ACTIVE_TICKS(0) &&
>> + !xe_gt_engine_busyness_supported(gt))
>> + return -ENOENT;
>> - return -ENOENT;
>> + return 0;
>> }
>> static int engine_event_status(struct xe_hw_engine *hwe,
>> @@ -71,6 +73,10 @@ static int engine_event_status(struct xe_hw_engine
>> *hwe,
>> if (!hwe)
>> return -ENODEV;
>> + if (sample == DRM_XE_PMU_SAMPLE_BUSY_TICKS &&
>> + !xe_gt_engine_busyness_supported(hwe->gt))
>> + return -ENOENT;
>> +
>> /* Other engine events will be added, XE_ENGINE_SAMPLE_COUNT
>> will be changed */
>> return (sample >= DRM_XE_PMU_SAMPLE_BUSY_TICKS && sample <
>> XE_ENGINE_SAMPLE_COUNT)
>> ? 0 : -ENOENT;
^ permalink raw reply [flat|nested] 23+ messages in thread* Re: [PATCH v4 6/8] drm/xe/guc: Expose engine busyness only for supported GuC version
2024-01-19 10:13 ` Riana Tauro
@ 2024-01-19 12:18 ` Nilawar, Badal
0 siblings, 0 replies; 23+ messages in thread
From: Nilawar, Badal @ 2024-01-19 12:18 UTC (permalink / raw)
To: Riana Tauro, intel-xe
On 19-01-2024 15:43, Riana Tauro wrote:
>
>
> On 1/18/2024 11:43 AM, Nilawar, Badal wrote:
>> Hi Riana,
>>
>> On 22-12-2023 13:16, Riana Tauro wrote:
>>> Guc version numbers are 8 bits only so convert to 32 bit 8.8.8
>>> to allow version comparisions. use compatibility version
>>> for the same.
>>>
>>> Engine busyness is supported only on GuC versions >= 70.11.1.
>>> Allow enabling/reading engine busyness only on supported
>>> GuC versions. Warn once if not supported.
>>>
>>> v2: rebase
>>> fix guc comparison error (Matthew Brost)
>>> add a macro for guc version comparison
>>>
>>> v3: do not show pmu counters if guc engine busyness
>>> is not supported
>>>
>>> v4: add version check comment only in the check function
>>> remove it otherwise (Umesh)
>>>
>>> Signed-off-by: Riana Tauro <riana.tauro@intel.com>
>>> Reviewed-by: Umesh Nerlige Ramappa <umesh.nerlige.ramappa@intel.com>
>>> ---
>>> drivers/gpu/drm/xe/xe_gt.c | 11 ++++++
>>> drivers/gpu/drm/xe/xe_gt.h | 1 +
>>> drivers/gpu/drm/xe/xe_guc_engine_busyness.c | 37 +++++++++++++++++++++
>>> drivers/gpu/drm/xe/xe_guc_engine_busyness.h | 2 +-
>>> drivers/gpu/drm/xe/xe_pmu.c | 12 +++++--
>>> 5 files changed, 59 insertions(+), 4 deletions(-)
>>>
>>> diff --git a/drivers/gpu/drm/xe/xe_gt.c b/drivers/gpu/drm/xe/xe_gt.c
>>> index 5825471a3422..a48cceaa7750 100644
>>> --- a/drivers/gpu/drm/xe/xe_gt.c
>>> +++ b/drivers/gpu/drm/xe/xe_gt.c
>>> @@ -800,3 +800,14 @@ u64 xe_gt_total_active_ticks(struct xe_gt *gt)
>>> {
>>> return xe_guc_engine_busyness_active_ticks(>->uc.guc);
>>> }
>>> +
>>> +/**
>>> + * xe_gt_engine_busyness_supported - Checks support for engine busyness
>>> + * @gt: GT structure
>>> + *
>>> + * Returns true if engine busyness is supported, false otherwise.
>>> + */
>>> +bool xe_gt_engine_busyness_supported(struct xe_gt *gt)
>>> +{
>>> + return xe_guc_engine_busyness_supported(>->uc.guc);
>>> +}
>>> diff --git a/drivers/gpu/drm/xe/xe_gt.h b/drivers/gpu/drm/xe/xe_gt.h
>>> index 9bac85cdf609..bef99eb2fed2 100644
>>> --- a/drivers/gpu/drm/xe/xe_gt.h
>>> +++ b/drivers/gpu/drm/xe/xe_gt.h
>>> @@ -42,6 +42,7 @@ int xe_gt_resume(struct xe_gt *gt);
>>> void xe_gt_reset_async(struct xe_gt *gt);
>>> void xe_gt_sanitize(struct xe_gt *gt);
>>> +bool xe_gt_engine_busyness_supported(struct xe_gt *gt);
>>> u64 xe_gt_engine_busy_ticks(struct xe_gt *gt, struct xe_hw_engine
>>> *hwe);
>>> u64 xe_gt_total_active_ticks(struct xe_gt *gt);
>>> /**
>>> diff --git a/drivers/gpu/drm/xe/xe_guc_engine_busyness.c
>>> b/drivers/gpu/drm/xe/xe_guc_engine_busyness.c
>>> index 24e72555647a..2dd06563d0ad 100644
>>> --- a/drivers/gpu/drm/xe/xe_guc_engine_busyness.c
>>> +++ b/drivers/gpu/drm/xe/xe_guc_engine_busyness.c
>>> @@ -32,6 +32,9 @@
>>> * engine busyness % = (ticks_engine / ticks_gt) * 100
>>> */
>>> +/* GuC version number components are only 8-bit, so converting to a
>>> 32bit 8.8.8 */
>>> +#define GUC_VER(maj, min, pat) (((maj) << 16) | ((min) << 8) |
>>> (pat))
>>> +
>>> static void guc_engine_busyness_usage_map(struct xe_guc *guc,
>>> struct xe_hw_engine *hwe,
>>> struct iosys_map *engine_map,
>>> @@ -110,6 +113,9 @@ static void
>>> guc_engine_busyness_enable_stats(struct xe_guc *guc)
>>> struct xe_device *xe = guc_to_xe(guc);
>>> int ret;
>>> + if (!xe_guc_engine_busyness_supported(guc))
>>> + return;
>>> +
>>> ret = xe_guc_ct_send(&guc->ct, action, ARRAY_SIZE(action), 0, 0);
>>> if (ret)
>>> drm_err(&xe->drm, "Failed to enable usage stats %pe",
>>> ERR_PTR(ret));
>>> @@ -122,6 +128,28 @@ static void guc_engine_busyness_fini(struct
>>> drm_device *drm, void *arg)
>>> xe_bo_unpin_map_no_vm(guc->busy.bo);
>>> }
>>> +/*
>>> + * xe_guc_engine_busynes_supported- check if engine busyness is
>>> supported
>>> + * @guc: The GuC object
>>> + *
>>> + * Engine busyness is supported only above guc 70.11.1
>>> + *
>>> + * Returns true if supported, false otherwise
>>> + */
>>> +bool xe_guc_engine_busyness_supported(struct xe_guc *guc)
>>> +{
>>> + struct xe_uc_fw *uc_fw = &guc->fw;
>>> + struct xe_uc_fw_version *version =
>>> &uc_fw->versions.found[XE_UC_FW_VER_COMPATIBILITY];
>> Why not XE_UC_FW_VER_RELEASE here? Or should we check firmware type
>> (compatibility or release) first and then derive version from it.
> Hi Badal
>
> Even Release can be used. Used submission version (VF compatibility
> version).
Ok
> Why do we have to check firmware type ?
Sorry, I was confused in release vs compatibility.
>
> However, This patch is currently on hold.
Ok
Thanks,
Badal
>
> Thanks
> Riana
>>
>> Regards,
>> Badal
>>> +
>>> + if (GUC_VER(version->major, version->minor, version->patch) >=
>>> GUC_VER(1, 3, 1))
>>> + return true;
>>> +
>>> + drm_WARN_ON_ONCE(&guc_to_xe(guc)->drm,
>>> + "Engine busyness supported from 70.11.1 GuC version\n");
>>> +
>>> + return false;
>>> +}
>>> +
>>> /*
>>> * xe_guc_engine_busyness_active_ticks - Gets the total active ticks
>>> * @guc: The GuC object
>>> @@ -133,6 +161,9 @@ u64 xe_guc_engine_busyness_active_ticks(struct
>>> xe_guc *guc)
>>> {
>>> u64 ticks_gt;
>>> + if (!xe_guc_engine_busyness_supported(guc))
>>> + return 0;
>>> +
>>> guc_engine_busyness_get_usage(guc, NULL, NULL, &ticks_gt);
>>> return ticks_gt;
>>> @@ -150,6 +181,9 @@ u64 xe_guc_engine_busyness_ticks(struct xe_guc
>>> *guc, struct xe_hw_engine *hwe)
>>> {
>>> u64 ticks_engine;
>>> + if (!xe_guc_engine_busyness_supported(guc))
>>> + return 0;
>>> +
>>> guc_engine_busyness_get_usage(guc, hwe, &ticks_engine, NULL);
>>> return ticks_engine;
>>> @@ -173,6 +207,9 @@ int xe_guc_engine_busyness_init(struct xe_guc *guc)
>>> u32 size;
>>> int err;
>>> + if (!xe_guc_engine_busyness_supported(guc))
>>> + return 0;
>>> +
>>> /* Initialization already done */
>>> if (guc->busy.bo)
>>> return 0;
>>> diff --git a/drivers/gpu/drm/xe/xe_guc_engine_busyness.h
>>> b/drivers/gpu/drm/xe/xe_guc_engine_busyness.h
>>> index 57325910ebc4..e3c74e0236af 100644
>>> --- a/drivers/gpu/drm/xe/xe_guc_engine_busyness.h
>>> +++ b/drivers/gpu/drm/xe/xe_guc_engine_busyness.h
>>> @@ -14,5 +14,5 @@ struct xe_guc;
>>> int xe_guc_engine_busyness_init(struct xe_guc *guc);
>>> u64 xe_guc_engine_busyness_active_ticks(struct xe_guc *guc);
>>> u64 xe_guc_engine_busyness_ticks(struct xe_guc *guc, struct
>>> xe_hw_engine *hwe);
>>> -
>>> +bool xe_guc_engine_busyness_supported(struct xe_guc *guc);
>>> #endif
>>> diff --git a/drivers/gpu/drm/xe/xe_pmu.c b/drivers/gpu/drm/xe/xe_pmu.c
>>> index c2be157a6f5d..f91652886b67 100644
>>> --- a/drivers/gpu/drm/xe/xe_pmu.c
>>> +++ b/drivers/gpu/drm/xe/xe_pmu.c
>>> @@ -55,14 +55,16 @@ static int
>>> config_status(struct xe_device *xe, u64 config)
>>> {
>>> unsigned int gt_id = config_gt_id(config);
>>> + struct xe_gt *gt = xe_device_get_gt(xe, gt_id);
>>> if (gt_id >= XE_PMU_MAX_GT)
>>> return -ENOENT;
>>> - if (config_counter(config) == DRM_XE_PMU_TOTAL_ACTIVE_TICKS(0))
>>> - return 0;
>>> + if (config_counter(config) == DRM_XE_PMU_TOTAL_ACTIVE_TICKS(0) &&
>>> + !xe_gt_engine_busyness_supported(gt))
>>> + return -ENOENT;
>>> - return -ENOENT;
>>> + return 0;
>>> }
>>> static int engine_event_status(struct xe_hw_engine *hwe,
>>> @@ -71,6 +73,10 @@ static int engine_event_status(struct xe_hw_engine
>>> *hwe,
>>> if (!hwe)
>>> return -ENODEV;
>>> + if (sample == DRM_XE_PMU_SAMPLE_BUSY_TICKS &&
>>> + !xe_gt_engine_busyness_supported(hwe->gt))
>>> + return -ENOENT;
>>> +
>>> /* Other engine events will be added, XE_ENGINE_SAMPLE_COUNT
>>> will be changed */
>>> return (sample >= DRM_XE_PMU_SAMPLE_BUSY_TICKS && sample <
>>> XE_ENGINE_SAMPLE_COUNT)
>>> ? 0 : -ENOENT;
^ permalink raw reply [flat|nested] 23+ messages in thread
* [PATCH v4 7/8] drm/xe/guc: Dynamically enable/disable engine busyness stats
2023-12-22 7:45 [PATCH v4 0/8] Engine busyness Riana Tauro
` (5 preceding siblings ...)
2023-12-22 7:46 ` [PATCH v4 6/8] drm/xe/guc: Expose engine busyness only for supported GuC version Riana Tauro
@ 2023-12-22 7:46 ` Riana Tauro
2023-12-22 7:46 ` [PATCH v4 8/8] drm/xe/guc: Handle runtime suspend issues for engine busyness Riana Tauro
7 siblings, 0 replies; 23+ messages in thread
From: Riana Tauro @ 2023-12-22 7:46 UTC (permalink / raw)
To: intel-xe
Dynamically enable/disable engine busyness stats using GuC
action when PMU interface is opened and closed to avoid
power penality.
Co-developed-by: John Harrison <John.C.Harrison@Intel.com>
Signed-off-by: John Harrison <John.C.Harrison@Intel.com>
Signed-off-by: Riana Tauro <riana.tauro@intel.com>
---
drivers/gpu/drm/xe/xe_guc_engine_busyness.c | 96 ++++++++++++++++++++-
drivers/gpu/drm/xe/xe_guc_engine_busyness.h | 2 +
drivers/gpu/drm/xe/xe_guc_types.h | 14 +++
drivers/gpu/drm/xe/xe_pmu.c | 32 +++++++
4 files changed, 140 insertions(+), 4 deletions(-)
diff --git a/drivers/gpu/drm/xe/xe_guc_engine_busyness.c b/drivers/gpu/drm/xe/xe_guc_engine_busyness.c
index 2dd06563d0ad..79ae06b71943 100644
--- a/drivers/gpu/drm/xe/xe_guc_engine_busyness.c
+++ b/drivers/gpu/drm/xe/xe_guc_engine_busyness.c
@@ -8,6 +8,7 @@
#include "abi/guc_actions_abi.h"
#include "xe_bo.h"
+#include "xe_device.h"
#include "xe_guc.h"
#include "xe_guc_ct.h"
@@ -102,9 +103,9 @@ static void guc_engine_busyness_get_usage(struct xe_guc *guc,
*ticks_gt = gt_ticks;
}
-static void guc_engine_busyness_enable_stats(struct xe_guc *guc)
+static void guc_engine_busyness_action_usage_stats(struct xe_guc *guc, bool enable)
{
- u32 ggtt_addr = xe_bo_ggtt_addr(guc->busy.bo);
+ u32 ggtt_addr = enable ? xe_bo_ggtt_addr(guc->busy.bo) : 0;
u32 action[] = {
XE_GUC_ACTION_SET_DEVICE_ENGINE_UTILIZATION,
ggtt_addr,
@@ -121,6 +122,45 @@ static void guc_engine_busyness_enable_stats(struct xe_guc *guc)
drm_err(&xe->drm, "Failed to enable usage stats %pe", ERR_PTR(ret));
}
+static void guc_engine_busyness_enable_stats(struct xe_guc *guc, bool enable)
+{
+ struct xe_device *xe = guc_to_xe(guc);
+ bool skip;
+
+ spin_lock(&guc->busy.enable_lock);
+ skip = enable == guc->busy.enabled;
+ if (!skip)
+ guc->busy.enabled = enable;
+ spin_unlock(&guc->busy.enable_lock);
+
+ if (skip)
+ return;
+
+ xe_device_mem_access_get(xe);
+ guc_engine_busyness_action_usage_stats(guc, enable);
+ xe_device_mem_access_put(xe);
+}
+
+static void guc_engine_busyness_toggle_stats(struct xe_guc *guc)
+{
+ if (!guc->submission_state.enabled)
+ return;
+
+ /* Pmu_ref can increase before the worker thread runs this function */
+ if (guc->busy.pmu_ref >= 1)
+ guc_engine_busyness_enable_stats(guc, true);
+ else if (guc->busy.pmu_ref == 0)
+ guc_engine_busyness_enable_stats(guc, false);
+}
+
+static void guc_engine_buysness_worker_func(struct work_struct *w)
+{
+ struct xe_guc *guc = container_of(w, struct xe_guc,
+ busy.enable_worker);
+
+ guc_engine_busyness_toggle_stats(guc);
+}
+
static void guc_engine_busyness_fini(struct drm_device *drm, void *arg)
{
struct xe_guc *guc = arg;
@@ -150,6 +190,52 @@ bool xe_guc_engine_busyness_supported(struct xe_guc *guc)
return false;
}
+/*
+ * xe_guc_engine_busyness_pin - Dynamically enables engine busyness stats
+ * @guc: The GuC object
+ * @pmu_locked: boolean to indicate pmu event is started, locked by pmu spinlock
+ *
+ * Dynamically enables engine busyness by queueing a worker thread
+ * if guc submission is not yet enabled or if pmu event is started.
+ */
+void xe_guc_engine_busyness_pin(struct xe_guc *guc, bool pmu_locked)
+{
+ /* Engine busyness supported only on GuC >= 70.11.1 */
+ if (!xe_guc_engine_busyness_supported(guc))
+ return;
+
+ if (pmu_locked)
+ guc->busy.pmu_ref++;
+
+ if (!guc->submission_state.enabled || pmu_locked)
+ queue_work(system_unbound_wq, &guc->busy.enable_worker);
+ else
+ guc_engine_busyness_enable_stats(guc, true);
+}
+
+/*
+ * xe_guc_engine_busyness_unpin - Dynamically disables engine busyness stats
+ * @guc: The GuC object
+ * @pmu_locked: boolean to indicate pmu event is stopped, locked by pmu spinlock
+ *
+ * Dynamically disables engine busyness by queueing a worker thread
+ * if guc submission is not yet enabled or if pmu event is stopped.
+ */
+void xe_guc_engine_busyness_unpin(struct xe_guc *guc, bool pmu_locked)
+{
+ /* Engine busyness supported only on GuC >= 70.11.1 */
+ if (!xe_guc_engine_busyness_supported(guc))
+ return;
+
+ if (pmu_locked)
+ guc->busy.pmu_ref--;
+
+ if (!guc->submission_state.enabled || pmu_locked)
+ queue_work(system_unbound_wq, &guc->busy.enable_worker);
+ else
+ guc_engine_busyness_toggle_stats(guc);
+}
+
/*
* xe_guc_engine_busyness_active_ticks - Gets the total active ticks
* @guc: The GuC object
@@ -224,9 +310,11 @@ int xe_guc_engine_busyness_init(struct xe_guc *guc)
if (IS_ERR(bo))
return PTR_ERR(bo);
+ spin_lock_init(&guc->busy.enable_lock);
+ INIT_WORK(&guc->busy.enable_worker, guc_engine_buysness_worker_func);
guc->busy.bo = bo;
-
- guc_engine_busyness_enable_stats(guc);
+ guc->busy.enabled = false;
+ guc->busy.pmu_ref = 0;
err = drmm_add_action_or_reset(&xe->drm, guc_engine_busyness_fini, guc);
if (err)
diff --git a/drivers/gpu/drm/xe/xe_guc_engine_busyness.h b/drivers/gpu/drm/xe/xe_guc_engine_busyness.h
index e3c74e0236af..008af1c0838a 100644
--- a/drivers/gpu/drm/xe/xe_guc_engine_busyness.h
+++ b/drivers/gpu/drm/xe/xe_guc_engine_busyness.h
@@ -15,4 +15,6 @@ int xe_guc_engine_busyness_init(struct xe_guc *guc);
u64 xe_guc_engine_busyness_active_ticks(struct xe_guc *guc);
u64 xe_guc_engine_busyness_ticks(struct xe_guc *guc, struct xe_hw_engine *hwe);
bool xe_guc_engine_busyness_supported(struct xe_guc *guc);
+void xe_guc_engine_busyness_pin(struct xe_guc *guc, bool pmu_locked);
+void xe_guc_engine_busyness_unpin(struct xe_guc *guc, bool pmu_locked);
#endif
diff --git a/drivers/gpu/drm/xe/xe_guc_types.h b/drivers/gpu/drm/xe/xe_guc_types.h
index a75728071f46..1d4123fec9c0 100644
--- a/drivers/gpu/drm/xe/xe_guc_types.h
+++ b/drivers/gpu/drm/xe/xe_guc_types.h
@@ -89,6 +89,20 @@ struct xe_guc {
struct {
/** @bo: GGTT buffer object of engine busyness that is shared with GuC */
struct xe_bo *bo;
+ /** @enabled: state of engine stats */
+ bool enabled;
+ /** @enable_lock: for accessing @enabled */
+ spinlock_t enable_lock;
+ /**
+ * @enable_worker: Async worker for enabling/disabling
+ * busyness tracking from PMU
+ */
+ struct work_struct enable_worker;
+ /**
+ * @pmu_ref: how many outstanding PMU counters have
+ * been requested, locked by PMU spinlock
+ */
+ int pmu_ref;
} busy;
/**
diff --git a/drivers/gpu/drm/xe/xe_pmu.c b/drivers/gpu/drm/xe/xe_pmu.c
index f91652886b67..3161ed157bd2 100644
--- a/drivers/gpu/drm/xe/xe_pmu.c
+++ b/drivers/gpu/drm/xe/xe_pmu.c
@@ -9,6 +9,9 @@
#include "xe_device.h"
#include "xe_gt.h"
+#include "xe_gt_clock.h"
+#include "xe_guc_engine_busyness.h"
+#include "xe_mmio.h"
#define XE_ENGINE_SAMPLE_COUNT (DRM_XE_PMU_SAMPLE_BUSY_TICKS + 1)
@@ -93,6 +96,8 @@ static int engine_event_init(struct perf_event *event)
hwe = xe_gt_hw_engine(gt, xe_hw_engine_from_user_class(engine_event_class(event)),
engine_event_instance(event), true);
+ xe_guc_engine_busyness_pin(>->uc.guc, false);
+
return engine_event_status(hwe, engine_event_sample(event));
}
@@ -204,6 +209,19 @@ static void xe_pmu_event_read(struct perf_event *event)
static void xe_pmu_enable(struct perf_event *event)
{
+ struct xe_device *xe =
+ container_of(event->pmu, typeof(*xe), pmu.base);
+ const int gt_id = config_gt_id(event->attr.config);
+ struct xe_gt *gt = xe_device_get_gt(xe, gt_id);
+ struct xe_pmu *pmu = &xe->pmu;
+ unsigned long flags;
+
+ if (is_engine_event(event) ||
+ config_counter(event->attr.config) == DRM_XE_PMU_TOTAL_ACTIVE_TICKS(0)) {
+ spin_lock_irqsave(&pmu->lock, flags);
+ xe_guc_engine_busyness_pin(>->uc.guc, true);
+ spin_unlock_irqrestore(&pmu->lock, flags);
+ }
/*
* Store the current counter value so we can report the correct delta
* for all listeners. Even when the event was already enabled and has
@@ -227,9 +245,23 @@ static void xe_pmu_event_start(struct perf_event *event, int flags)
static void xe_pmu_event_stop(struct perf_event *event, int flags)
{
+ struct xe_device *xe =
+ container_of(event->pmu, typeof(*xe), pmu.base);
+ const int gt_id = config_gt_id(event->attr.config);
+ struct xe_gt *gt = xe_device_get_gt(xe, gt_id);
+ struct xe_pmu *pmu = &xe->pmu;
+ unsigned long irqflags;
+
if (flags & PERF_EF_UPDATE)
xe_pmu_event_read(event);
+ if (is_engine_event(event) ||
+ config_counter(event->attr.config) == DRM_XE_PMU_TOTAL_ACTIVE_TICKS(0)) {
+ spin_lock_irqsave(&pmu->lock, irqflags);
+ xe_guc_engine_busyness_unpin(>->uc.guc, true);
+ spin_unlock_irqrestore(&pmu->lock, irqflags);
+ }
+
event->hw.state = PERF_HES_STOPPED;
}
--
2.40.0
^ permalink raw reply related [flat|nested] 23+ messages in thread* [PATCH v4 8/8] drm/xe/guc: Handle runtime suspend issues for engine busyness
2023-12-22 7:45 [PATCH v4 0/8] Engine busyness Riana Tauro
` (6 preceding siblings ...)
2023-12-22 7:46 ` [PATCH v4 7/8] drm/xe/guc: Dynamically enable/disable engine busyness stats Riana Tauro
@ 2023-12-22 7:46 ` Riana Tauro
7 siblings, 0 replies; 23+ messages in thread
From: Riana Tauro @ 2023-12-22 7:46 UTC (permalink / raw)
To: intel-xe
1) During runtime suspend, when card enters D3hot, values read
from the shared memory maintained by GuC returns 0xFF.
Waking up for every perf read when
device is runtime suspended causes power penality.
Store the last read busy ticks and total active ticks and return
these values when suspended
2) When the device is runtime resumed, guc is loaded again. If pmu
interface was opened to collect busyness events, the guc stats
have to be re-enabled to resume collection after suspend.
Disable/enable guc stats if pmu is opened and is already collecting
busyness events and device gets runtime suspended/resumed.
v2: rebase
fix local variable naming (Umesh)
Signed-off-by: Riana Tauro <riana.tauro@intel.com>
---
drivers/gpu/drm/xe/xe_gt.c | 4 ++
drivers/gpu/drm/xe/xe_guc_engine_busyness.c | 51 ++++++++++++++++++++-
drivers/gpu/drm/xe/xe_guc_engine_busyness.h | 2 +
drivers/gpu/drm/xe/xe_guc_types.h | 5 ++
drivers/gpu/drm/xe/xe_pmu.c | 10 ++++
drivers/gpu/drm/xe/xe_pmu.h | 4 ++
6 files changed, 74 insertions(+), 2 deletions(-)
diff --git a/drivers/gpu/drm/xe/xe_gt.c b/drivers/gpu/drm/xe/xe_gt.c
index a48cceaa7750..6d20bf5f4997 100644
--- a/drivers/gpu/drm/xe/xe_gt.c
+++ b/drivers/gpu/drm/xe/xe_gt.c
@@ -696,6 +696,8 @@ int xe_gt_suspend(struct xe_gt *gt)
if (err)
goto err_force_wake;
+ xe_pmu_suspend(gt);
+
XE_WARN_ON(xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL));
xe_device_mem_access_put(gt_to_xe(gt));
xe_gt_info(gt, "suspended\n");
@@ -724,6 +726,8 @@ int xe_gt_resume(struct xe_gt *gt)
if (err)
goto err_force_wake;
+ xe_pmu_resume(gt);
+
XE_WARN_ON(xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL));
xe_device_mem_access_put(gt_to_xe(gt));
xe_gt_info(gt, "resumed\n");
diff --git a/drivers/gpu/drm/xe/xe_guc_engine_busyness.c b/drivers/gpu/drm/xe/xe_guc_engine_busyness.c
index 79ae06b71943..c86f9ed2440e 100644
--- a/drivers/gpu/drm/xe/xe_guc_engine_busyness.c
+++ b/drivers/gpu/drm/xe/xe_guc_engine_busyness.c
@@ -96,11 +96,15 @@ static void guc_engine_busyness_get_usage(struct xe_guc *guc,
#undef read_engine_usage
#undef read_global_field
- if (ticks_engine)
+ if (hwe && ticks_engine) {
*ticks_engine = engine_ticks;
+ guc->busy.prev_busy_ticks[hwe->class][hwe->logical_instance] = engine_ticks;
+ }
- if (ticks_gt)
+ if (ticks_gt) {
*ticks_gt = gt_ticks;
+ guc->busy.prev_gt_ticks = gt_ticks;
+ }
}
static void guc_engine_busyness_action_usage_stats(struct xe_guc *guc, bool enable)
@@ -236,6 +240,36 @@ void xe_guc_engine_busyness_unpin(struct xe_guc *guc, bool pmu_locked)
guc_engine_busyness_toggle_stats(guc);
}
+/*
+ * xe_guc_engine_busyness_resume - Helper to resume engine busyness
+ * @guc: The GuC object
+ *
+ * Enable engine busyness if there were outstanding pmu events before
+ * suspend and the collection has to be resumed. This is necessary
+ * as there is a common path for both Runtime suspend and system suspend
+ * and it reloads GuC on resume.
+ */
+void xe_guc_engine_busyness_resume(struct xe_guc *guc)
+{
+ if (guc->busy.pmu_ref)
+ guc_engine_busyness_toggle_stats(guc);
+}
+
+/*
+ * xe_guc_engine_busyness_suspend - Helper to suspend engine busyness
+ * @guc: The GuC object
+ *
+ * Disable engine busyness if there are any outstanding pmu events
+ * and if its suspended. This is necessary as there is a common
+ * path for both Runtime suspend and system suspend
+ * and it reloads GuC on resume.
+ */
+void xe_guc_engine_busyness_suspend(struct xe_guc *guc)
+{
+ if (guc->busy.pmu_ref)
+ guc->busy.enabled = false;
+}
+
/*
* xe_guc_engine_busyness_active_ticks - Gets the total active ticks
* @guc: The GuC object
@@ -245,12 +279,19 @@ void xe_guc_engine_busyness_unpin(struct xe_guc *guc, bool pmu_locked)
*/
u64 xe_guc_engine_busyness_active_ticks(struct xe_guc *guc)
{
+ struct xe_device *xe = guc_to_xe(guc);
+ bool device_awake;
u64 ticks_gt;
if (!xe_guc_engine_busyness_supported(guc))
return 0;
+ device_awake = xe_device_mem_access_get_if_ongoing(xe);
+ if (!device_awake)
+ return guc->busy.prev_gt_ticks;
+
guc_engine_busyness_get_usage(guc, NULL, NULL, &ticks_gt);
+ xe_device_mem_access_put(xe);
return ticks_gt;
}
@@ -265,12 +306,18 @@ u64 xe_guc_engine_busyness_active_ticks(struct xe_guc *guc)
*/
u64 xe_guc_engine_busyness_ticks(struct xe_guc *guc, struct xe_hw_engine *hwe)
{
+ struct xe_device *xe = guc_to_xe(guc);
+ bool device_awake;
u64 ticks_engine;
if (!xe_guc_engine_busyness_supported(guc))
return 0;
+ device_awake = xe_device_mem_access_get_if_ongoing(xe);
+ if (!device_awake)
+ return guc->busy.prev_busy_ticks[hwe->class][hwe->logical_instance];
guc_engine_busyness_get_usage(guc, hwe, &ticks_engine, NULL);
+ xe_device_mem_access_put(xe);
return ticks_engine;
}
diff --git a/drivers/gpu/drm/xe/xe_guc_engine_busyness.h b/drivers/gpu/drm/xe/xe_guc_engine_busyness.h
index 008af1c0838a..b33692d77f7d 100644
--- a/drivers/gpu/drm/xe/xe_guc_engine_busyness.h
+++ b/drivers/gpu/drm/xe/xe_guc_engine_busyness.h
@@ -17,4 +17,6 @@ u64 xe_guc_engine_busyness_ticks(struct xe_guc *guc, struct xe_hw_engine *hwe);
bool xe_guc_engine_busyness_supported(struct xe_guc *guc);
void xe_guc_engine_busyness_pin(struct xe_guc *guc, bool pmu_locked);
void xe_guc_engine_busyness_unpin(struct xe_guc *guc, bool pmu_locked);
+void xe_guc_engine_busyness_suspend(struct xe_guc *guc);
+void xe_guc_engine_busyness_resume(struct xe_guc *guc);
#endif
diff --git a/drivers/gpu/drm/xe/xe_guc_types.h b/drivers/gpu/drm/xe/xe_guc_types.h
index 1d4123fec9c0..7790bf41371a 100644
--- a/drivers/gpu/drm/xe/xe_guc_types.h
+++ b/drivers/gpu/drm/xe/xe_guc_types.h
@@ -15,6 +15,7 @@
#include "xe_guc_fwif.h"
#include "xe_guc_log_types.h"
#include "xe_guc_pc_types.h"
+#include "xe_hw_engine.h"
#include "xe_uc_fw_types.h"
/**
@@ -103,6 +104,10 @@ struct xe_guc {
* been requested, locked by PMU spinlock
*/
int pmu_ref;
+ /** @prev_busy_ticks: array containing last stored busy ticks */
+ u64 prev_busy_ticks[XE_ENGINE_CLASS_MAX][XE_HW_ENGINE_MAX_INSTANCE];
+ /** @prev_gt_ticks: last stored gt ticks */
+ u64 prev_gt_ticks;
} busy;
/**
diff --git a/drivers/gpu/drm/xe/xe_pmu.c b/drivers/gpu/drm/xe/xe_pmu.c
index 3161ed157bd2..ee505a9afbed 100644
--- a/drivers/gpu/drm/xe/xe_pmu.c
+++ b/drivers/gpu/drm/xe/xe_pmu.c
@@ -548,6 +548,16 @@ static void xe_pmu_unregister_cpuhp_state(struct xe_pmu *pmu)
cpuhp_state_remove_instance(cpuhp_slot, &pmu->cpuhp.node);
}
+void xe_pmu_suspend(struct xe_gt *gt)
+{
+ xe_guc_engine_busyness_suspend(>->uc.guc);
+}
+
+void xe_pmu_resume(struct xe_gt *gt)
+{
+ xe_guc_engine_busyness_resume(>->uc.guc);
+}
+
static void xe_pmu_unregister(struct drm_device *device, void *arg)
{
struct xe_pmu *pmu = arg;
diff --git a/drivers/gpu/drm/xe/xe_pmu.h b/drivers/gpu/drm/xe/xe_pmu.h
index d6fca18466f4..568bcf250934 100644
--- a/drivers/gpu/drm/xe/xe_pmu.h
+++ b/drivers/gpu/drm/xe/xe_pmu.h
@@ -13,10 +13,14 @@
int xe_pmu_init(void);
void xe_pmu_exit(void);
void xe_pmu_register(struct xe_pmu *pmu);
+void xe_pmu_suspend(struct xe_gt *gt);
+void xe_pmu_resume(struct xe_gt *gt);
#else
static inline int xe_pmu_init(void) { return 0; }
static inline void xe_pmu_exit(void) {}
static inline void xe_pmu_register(struct xe_pmu *pmu) {}
+static inline void xe_pmu_suspend(struct xe_gt *gt) {}
+static inline void xe_pmu_resume(struct xe_gt *gt) {}
#endif
#endif
--
2.40.0
^ permalink raw reply related [flat|nested] 23+ messages in thread