* [PATCH v3 1/8] drm/xe/cri: Add new performance limit reasons bits
2025-10-29 23:45 [PATCH v3 0/8] drm/xe: CRI support in gt_throttle + refactors Lucas De Marchi
@ 2025-10-29 23:45 ` Lucas De Marchi
2025-10-29 23:45 ` [PATCH v3 2/8] drm/xe/gt_throttle: Tidy up perf reasons reading Lucas De Marchi
` (6 subsequent siblings)
7 siblings, 0 replies; 17+ messages in thread
From: Lucas De Marchi @ 2025-10-29 23:45 UTC (permalink / raw)
To: intel-xe; +Cc: Lucas De Marchi, Raag Jadav, Sk Anirban
From: Sk Anirban <sk.anirban@intel.com>
Crescent Island has some additional and different bits for performance
limit reasons. Add the new definitions and use them for CRI.
Signed-off-by: Sk Anirban <sk.anirban@intel.com>
Reviewed-by: Raag Jadav <raag.jadav@intel.com>
Signed-off-by: Lucas De Marchi <lucas.demarchi@intel.com>
---
v3:
- Reword commit message (Raag)
v2:
- Tidy up read_status() and register definition
- s/throttle_cri_specific_attrs/cri_throttle_attrs/
---
drivers/gpu/drm/xe/regs/xe_gt_regs.h | 13 ++
drivers/gpu/drm/xe/xe_gt_throttle.c | 251 ++++++++++++++++++++++++++++++++++-
2 files changed, 261 insertions(+), 3 deletions(-)
diff --git a/drivers/gpu/drm/xe/regs/xe_gt_regs.h b/drivers/gpu/drm/xe/regs/xe_gt_regs.h
index a895a8e801a90..2088256ad3819 100644
--- a/drivers/gpu/drm/xe/regs/xe_gt_regs.h
+++ b/drivers/gpu/drm/xe/regs/xe_gt_regs.h
@@ -590,6 +590,7 @@
#define GT_GFX_RC6 XE_REG(0x138108)
#define GT0_PERF_LIMIT_REASONS XE_REG(0x1381a8)
+/* Common performance limit reason bits - available on all platforms */
#define GT0_PERF_LIMIT_REASONS_MASK 0xde3
#define PROCHOT_MASK REG_BIT(0)
#define THERMAL_LIMIT_MASK REG_BIT(1)
@@ -599,6 +600,18 @@
#define POWER_LIMIT_4_MASK REG_BIT(8)
#define POWER_LIMIT_1_MASK REG_BIT(10)
#define POWER_LIMIT_2_MASK REG_BIT(11)
+/* Platform-specific performance limit reason bits - for Crescent Island */
+#define CRI_PERF_LIMIT_REASONS_MASK 0xfdff
+#define SOC_THERMAL_LIMIT_MASK REG_BIT(1)
+#define MEM_THERMAL_MASK REG_BIT(2)
+#define VR_THERMAL_MASK REG_BIT(3)
+#define ICCMAX_MASK REG_BIT(4)
+#define SOC_AVG_THERMAL_MASK REG_BIT(6)
+#define FASTVMODE_MASK REG_BIT(7)
+#define PSYS_PL1_MASK REG_BIT(12)
+#define PSYS_PL2_MASK REG_BIT(13)
+#define P0_FREQ_MASK REG_BIT(14)
+#define PSYS_CRIT_MASK REG_BIT(15)
#define GT_PERF_STATUS XE_REG(0x1381b4)
#define VOLTAGE_MASK REG_GENMASK(10, 0)
diff --git a/drivers/gpu/drm/xe/xe_gt_throttle.c b/drivers/gpu/drm/xe/xe_gt_throttle.c
index aa962c783cdf7..a4fe43f4663dd 100644
--- a/drivers/gpu/drm/xe/xe_gt_throttle.c
+++ b/drivers/gpu/drm/xe/xe_gt_throttle.c
@@ -12,6 +12,7 @@
#include "xe_gt_sysfs.h"
#include "xe_gt_throttle.h"
#include "xe_mmio.h"
+#include "xe_platform_types.h"
#include "xe_pm.h"
/**
@@ -28,6 +29,24 @@
* device/gt#/freq0/throttle/reason_ratl - Frequency throttle due to RATL
* device/gt#/freq0/throttle/reason_vr_thermalert - Frequency throttle due to VR THERMALERT
* device/gt#/freq0/throttle/reason_vr_tdc - Frequency throttle due to VR TDC
+ *
+ * The following attributes are available on Crescent Island platform:
+ * device/gt#/freq0/throttle/status - Overall throttle status
+ * device/gt#/freq0/throttle/reason_pl1 - Frequency throttle due to package PL1
+ * device/gt#/freq0/throttle/reason_pl2 - Frequency throttle due to package PL2
+ * device/gt#/freq0/throttle/reason_pl4 - Frequency throttle due to PL4
+ * device/gt#/freq0/throttle/reason_prochot - Frequency throttle due to prochot
+ * device/gt#/freq0/throttle/reason_soc_thermal - Frequency throttle due to SoC thermal
+ * device/gt#/freq0/throttle/reason_mem_thermal - Frequency throttle due to memory thermal
+ * device/gt#/freq0/throttle/reason_vr_thermal - Frequency throttle due to VR thermal
+ * device/gt#/freq0/throttle/reason_iccmax - Frequency throttle due to ICCMAX
+ * device/gt#/freq0/throttle/reason_ratl - Frequency throttle due to RATL thermal algorithm
+ * device/gt#/freq0/throttle/reason_soc_avg_thermal - Frequency throttle due to SoC average temp
+ * device/gt#/freq0/throttle/reason_fastvmode - Frequency throttle due to VR is hitting FastVMode
+ * device/gt#/freq0/throttle/reason_psys_pl1 - Frequency throttle due to PSYS PL1
+ * device/gt#/freq0/throttle/reason_psys_pl2 - Frequency throttle due to PSYS PL2
+ * device/gt#/freq0/throttle/reason_p0_freq - Frequency throttle due to P0 frequency
+ * device/gt#/freq0/throttle/reason_psys_crit - Frequency throttle due to PSYS critical
*/
static struct xe_gt *
@@ -52,9 +71,17 @@ u32 xe_gt_throttle_get_limit_reasons(struct xe_gt *gt)
static u32 read_status(struct xe_gt *gt)
{
- u32 status = xe_gt_throttle_get_limit_reasons(gt) & GT0_PERF_LIMIT_REASONS_MASK;
+ struct xe_device *xe = gt_to_xe(gt);
+ u32 status, mask;
+
+ if (xe->info.platform == XE_CRESCENTISLAND)
+ mask = CRI_PERF_LIMIT_REASONS_MASK;
+ else
+ mask = GT0_PERF_LIMIT_REASONS_MASK;
+ status = xe_gt_throttle_get_limit_reasons(gt) & mask;
xe_gt_dbg(gt, "throttle reasons: 0x%08x\n", status);
+
return status;
}
@@ -86,6 +113,13 @@ static u32 read_reason_thermal(struct xe_gt *gt)
return thermal;
}
+static u32 read_reason_soc_thermal(struct xe_gt *gt)
+{
+ u32 thermal = xe_gt_throttle_get_limit_reasons(gt) & SOC_THERMAL_LIMIT_MASK;
+
+ return thermal;
+}
+
static u32 read_reason_prochot(struct xe_gt *gt)
{
u32 prochot = xe_gt_throttle_get_limit_reasons(gt) & PROCHOT_MASK;
@@ -107,6 +141,13 @@ static u32 read_reason_vr_thermalert(struct xe_gt *gt)
return thermalert;
}
+static u32 read_reason_soc_avg_thermal(struct xe_gt *gt)
+{
+ u32 soc_avg_thermal = xe_gt_throttle_get_limit_reasons(gt) & SOC_AVG_THERMAL_MASK;
+
+ return soc_avg_thermal;
+}
+
static u32 read_reason_vr_tdc(struct xe_gt *gt)
{
u32 tdc = xe_gt_throttle_get_limit_reasons(gt) & VR_TDC_MASK;
@@ -114,6 +155,62 @@ static u32 read_reason_vr_tdc(struct xe_gt *gt)
return tdc;
}
+static u32 read_reason_fastvmode(struct xe_gt *gt)
+{
+ u32 fastvmode = xe_gt_throttle_get_limit_reasons(gt) & FASTVMODE_MASK;
+
+ return fastvmode;
+}
+
+static u32 read_reason_mem_thermal(struct xe_gt *gt)
+{
+ u32 mem_thermal = xe_gt_throttle_get_limit_reasons(gt) & MEM_THERMAL_MASK;
+
+ return mem_thermal;
+}
+
+static u32 read_reason_vr_thermal(struct xe_gt *gt)
+{
+ u32 vr_thermal = xe_gt_throttle_get_limit_reasons(gt) & VR_THERMAL_MASK;
+
+ return vr_thermal;
+}
+
+static u32 read_reason_iccmax(struct xe_gt *gt)
+{
+ u32 iccmax = xe_gt_throttle_get_limit_reasons(gt) & ICCMAX_MASK;
+
+ return iccmax;
+}
+
+static u32 read_reason_psys_pl1(struct xe_gt *gt)
+{
+ u32 psys_pl1 = xe_gt_throttle_get_limit_reasons(gt) & PSYS_PL1_MASK;
+
+ return psys_pl1;
+}
+
+static u32 read_reason_psys_pl2(struct xe_gt *gt)
+{
+ u32 psys_pl2 = xe_gt_throttle_get_limit_reasons(gt) & PSYS_PL2_MASK;
+
+ return psys_pl2;
+}
+
+static u32 read_reason_p0_freq(struct xe_gt *gt)
+{
+ u32 p0_freq = xe_gt_throttle_get_limit_reasons(gt) & P0_FREQ_MASK;
+
+ return p0_freq;
+}
+
+static u32 read_reason_psys_crit(struct xe_gt *gt)
+{
+ u32 psys_crit = xe_gt_throttle_get_limit_reasons(gt) & PSYS_CRIT_MASK;
+
+ return psys_crit;
+}
+
static ssize_t status_show(struct kobject *kobj,
struct kobj_attribute *attr, char *buff)
{
@@ -169,6 +266,17 @@ static ssize_t reason_thermal_show(struct kobject *kobj,
}
static struct kobj_attribute attr_reason_thermal = __ATTR_RO(reason_thermal);
+static ssize_t reason_soc_thermal_show(struct kobject *kobj,
+ struct kobj_attribute *attr, char *buff)
+{
+ struct device *dev = kobj_to_dev(kobj);
+ struct xe_gt *gt = dev_to_gt(dev);
+ bool thermal = !!read_reason_soc_thermal(gt);
+
+ return sysfs_emit(buff, "%u\n", thermal);
+}
+static struct kobj_attribute attr_reason_soc_thermal = __ATTR_RO(reason_soc_thermal);
+
static ssize_t reason_prochot_show(struct kobject *kobj,
struct kobj_attribute *attr, char *buff)
{
@@ -202,6 +310,17 @@ static ssize_t reason_vr_thermalert_show(struct kobject *kobj,
}
static struct kobj_attribute attr_reason_vr_thermalert = __ATTR_RO(reason_vr_thermalert);
+static ssize_t reason_soc_avg_thermal_show(struct kobject *kobj,
+ struct kobj_attribute *attr, char *buff)
+{
+ struct device *dev = kobj_to_dev(kobj);
+ struct xe_gt *gt = dev_to_gt(dev);
+ bool avg_thermalert = !!read_reason_soc_avg_thermal(gt);
+
+ return sysfs_emit(buff, "%u\n", avg_thermalert);
+}
+static struct kobj_attribute attr_reason_soc_avg_thermal = __ATTR_RO(reason_soc_avg_thermal);
+
static ssize_t reason_vr_tdc_show(struct kobject *kobj,
struct kobj_attribute *attr, char *buff)
{
@@ -213,6 +332,94 @@ static ssize_t reason_vr_tdc_show(struct kobject *kobj,
}
static struct kobj_attribute attr_reason_vr_tdc = __ATTR_RO(reason_vr_tdc);
+static ssize_t reason_fastvmode_show(struct kobject *kobj,
+ struct kobj_attribute *attr, char *buff)
+{
+ struct device *dev = kobj_to_dev(kobj);
+ struct xe_gt *gt = dev_to_gt(dev);
+ bool fastvmode = !!read_reason_fastvmode(gt);
+
+ return sysfs_emit(buff, "%u\n", fastvmode);
+}
+static struct kobj_attribute attr_reason_fastvmode = __ATTR_RO(reason_fastvmode);
+
+static ssize_t reason_mem_thermal_show(struct kobject *kobj,
+ struct kobj_attribute *attr, char *buff)
+{
+ struct device *dev = kobj_to_dev(kobj);
+ struct xe_gt *gt = dev_to_gt(dev);
+ bool mem_thermal = !!read_reason_mem_thermal(gt);
+
+ return sysfs_emit(buff, "%u\n", mem_thermal);
+}
+static struct kobj_attribute attr_reason_mem_thermal = __ATTR_RO(reason_mem_thermal);
+
+static ssize_t reason_vr_thermal_show(struct kobject *kobj,
+ struct kobj_attribute *attr, char *buff)
+{
+ struct device *dev = kobj_to_dev(kobj);
+ struct xe_gt *gt = dev_to_gt(dev);
+ bool vr_thermal = !!read_reason_vr_thermal(gt);
+
+ return sysfs_emit(buff, "%u\n", vr_thermal);
+}
+static struct kobj_attribute attr_reason_vr_thermal = __ATTR_RO(reason_vr_thermal);
+
+static ssize_t reason_iccmax_show(struct kobject *kobj,
+ struct kobj_attribute *attr, char *buff)
+{
+ struct device *dev = kobj_to_dev(kobj);
+ struct xe_gt *gt = dev_to_gt(dev);
+ bool iccmax = !!read_reason_iccmax(gt);
+
+ return sysfs_emit(buff, "%u\n", iccmax);
+}
+static struct kobj_attribute attr_reason_iccmax = __ATTR_RO(reason_iccmax);
+
+static ssize_t reason_psys_pl1_show(struct kobject *kobj,
+ struct kobj_attribute *attr, char *buff)
+{
+ struct device *dev = kobj_to_dev(kobj);
+ struct xe_gt *gt = dev_to_gt(dev);
+ bool psys_pl1 = !!read_reason_psys_pl1(gt);
+
+ return sysfs_emit(buff, "%u\n", psys_pl1);
+}
+static struct kobj_attribute attr_reason_psys_pl1 = __ATTR_RO(reason_psys_pl1);
+
+static ssize_t reason_psys_pl2_show(struct kobject *kobj,
+ struct kobj_attribute *attr, char *buff)
+{
+ struct device *dev = kobj_to_dev(kobj);
+ struct xe_gt *gt = dev_to_gt(dev);
+ bool psys_pl2 = !!read_reason_psys_pl2(gt);
+
+ return sysfs_emit(buff, "%u\n", psys_pl2);
+}
+static struct kobj_attribute attr_reason_psys_pl2 = __ATTR_RO(reason_psys_pl2);
+
+static ssize_t reason_p0_freq_show(struct kobject *kobj,
+ struct kobj_attribute *attr, char *buff)
+{
+ struct device *dev = kobj_to_dev(kobj);
+ struct xe_gt *gt = dev_to_gt(dev);
+ bool p0_freq = !!read_reason_p0_freq(gt);
+
+ return sysfs_emit(buff, "%u\n", p0_freq);
+}
+static struct kobj_attribute attr_reason_p0_freq = __ATTR_RO(reason_p0_freq);
+
+static ssize_t reason_psys_crit_show(struct kobject *kobj,
+ struct kobj_attribute *attr, char *buff)
+{
+ struct device *dev = kobj_to_dev(kobj);
+ struct xe_gt *gt = dev_to_gt(dev);
+ bool psys_crit = !!read_reason_psys_crit(gt);
+
+ return sysfs_emit(buff, "%u\n", psys_crit);
+}
+static struct kobj_attribute attr_reason_psys_crit = __ATTR_RO(reason_psys_crit);
+
static struct attribute *throttle_attrs[] = {
&attr_status.attr,
&attr_reason_pl1.attr,
@@ -226,24 +433,62 @@ static struct attribute *throttle_attrs[] = {
NULL
};
+static struct attribute *cri_throttle_attrs[] = {
+ &attr_status.attr,
+ &attr_reason_prochot.attr,
+ &attr_reason_soc_thermal.attr,
+ &attr_reason_mem_thermal.attr,
+ &attr_reason_vr_thermal.attr,
+ &attr_reason_iccmax.attr,
+ &attr_reason_ratl.attr,
+ &attr_reason_soc_avg_thermal.attr,
+ &attr_reason_fastvmode.attr,
+ &attr_reason_pl4.attr,
+ &attr_reason_pl1.attr,
+ &attr_reason_pl2.attr,
+ &attr_reason_psys_pl1.attr,
+ &attr_reason_psys_pl2.attr,
+ &attr_reason_p0_freq.attr,
+ &attr_reason_psys_crit.attr,
+ NULL
+};
+
static const struct attribute_group throttle_group_attrs = {
.name = "throttle",
.attrs = throttle_attrs,
};
+static const struct attribute_group cri_throttle_group_attrs = {
+ .name = "throttle",
+ .attrs = cri_throttle_attrs,
+};
+
+static const struct attribute_group *get_platform_throttle_group(struct xe_device *xe)
+{
+ switch (xe->info.platform) {
+ case XE_CRESCENTISLAND:
+ return &cri_throttle_group_attrs;
+ default:
+ return &throttle_group_attrs;
+ }
+}
+
static void gt_throttle_sysfs_fini(void *arg)
{
struct xe_gt *gt = arg;
+ struct xe_device *xe = gt_to_xe(gt);
+ const struct attribute_group *group = get_platform_throttle_group(xe);
- sysfs_remove_group(gt->freq, &throttle_group_attrs);
+ sysfs_remove_group(gt->freq, group);
}
int xe_gt_throttle_init(struct xe_gt *gt)
{
struct xe_device *xe = gt_to_xe(gt);
+ const struct attribute_group *group = get_platform_throttle_group(xe);
int err;
- err = sysfs_create_group(gt->freq, &throttle_group_attrs);
+ err = sysfs_create_group(gt->freq, group);
if (err)
return err;
--
2.51.0
^ permalink raw reply related [flat|nested] 17+ messages in thread* [PATCH v3 2/8] drm/xe/gt_throttle: Tidy up perf reasons reading
2025-10-29 23:45 [PATCH v3 0/8] drm/xe: CRI support in gt_throttle + refactors Lucas De Marchi
2025-10-29 23:45 ` [PATCH v3 1/8] drm/xe/cri: Add new performance limit reasons bits Lucas De Marchi
@ 2025-10-29 23:45 ` Lucas De Marchi
2025-10-29 23:45 ` [PATCH v3 3/8] drm/xe/gt_throttle: Always read and mask Lucas De Marchi
` (5 subsequent siblings)
7 siblings, 0 replies; 17+ messages in thread
From: Lucas De Marchi @ 2025-10-29 23:45 UTC (permalink / raw)
To: intel-xe; +Cc: Lucas De Marchi, Raag Jadav
There's no need to be so verbose with two functions per bit:
read_reason_xxxxx() and reason_xxxxx_show(). Drop the former and just
use a new is_throttled_by() that receives the mask as parameter.
Reviewed-by: Raag Jadav <raag.jadav@intel.com>
Signed-off-by: Lucas De Marchi <lucas.demarchi@intel.com>
---
drivers/gpu/drm/xe/xe_gt_throttle.c | 182 +++++-------------------------------
1 file changed, 21 insertions(+), 161 deletions(-)
diff --git a/drivers/gpu/drm/xe/xe_gt_throttle.c b/drivers/gpu/drm/xe/xe_gt_throttle.c
index a4fe43f4663dd..c69710f2dbeb5 100644
--- a/drivers/gpu/drm/xe/xe_gt_throttle.c
+++ b/drivers/gpu/drm/xe/xe_gt_throttle.c
@@ -85,130 +85,9 @@ static u32 read_status(struct xe_gt *gt)
return status;
}
-static u32 read_reason_pl1(struct xe_gt *gt)
+static bool is_throttled_by(struct xe_gt *gt, u32 mask)
{
- u32 pl1 = xe_gt_throttle_get_limit_reasons(gt) & POWER_LIMIT_1_MASK;
-
- return pl1;
-}
-
-static u32 read_reason_pl2(struct xe_gt *gt)
-{
- u32 pl2 = xe_gt_throttle_get_limit_reasons(gt) & POWER_LIMIT_2_MASK;
-
- return pl2;
-}
-
-static u32 read_reason_pl4(struct xe_gt *gt)
-{
- u32 pl4 = xe_gt_throttle_get_limit_reasons(gt) & POWER_LIMIT_4_MASK;
-
- return pl4;
-}
-
-static u32 read_reason_thermal(struct xe_gt *gt)
-{
- u32 thermal = xe_gt_throttle_get_limit_reasons(gt) & THERMAL_LIMIT_MASK;
-
- return thermal;
-}
-
-static u32 read_reason_soc_thermal(struct xe_gt *gt)
-{
- u32 thermal = xe_gt_throttle_get_limit_reasons(gt) & SOC_THERMAL_LIMIT_MASK;
-
- return thermal;
-}
-
-static u32 read_reason_prochot(struct xe_gt *gt)
-{
- u32 prochot = xe_gt_throttle_get_limit_reasons(gt) & PROCHOT_MASK;
-
- return prochot;
-}
-
-static u32 read_reason_ratl(struct xe_gt *gt)
-{
- u32 ratl = xe_gt_throttle_get_limit_reasons(gt) & RATL_MASK;
-
- return ratl;
-}
-
-static u32 read_reason_vr_thermalert(struct xe_gt *gt)
-{
- u32 thermalert = xe_gt_throttle_get_limit_reasons(gt) & VR_THERMALERT_MASK;
-
- return thermalert;
-}
-
-static u32 read_reason_soc_avg_thermal(struct xe_gt *gt)
-{
- u32 soc_avg_thermal = xe_gt_throttle_get_limit_reasons(gt) & SOC_AVG_THERMAL_MASK;
-
- return soc_avg_thermal;
-}
-
-static u32 read_reason_vr_tdc(struct xe_gt *gt)
-{
- u32 tdc = xe_gt_throttle_get_limit_reasons(gt) & VR_TDC_MASK;
-
- return tdc;
-}
-
-static u32 read_reason_fastvmode(struct xe_gt *gt)
-{
- u32 fastvmode = xe_gt_throttle_get_limit_reasons(gt) & FASTVMODE_MASK;
-
- return fastvmode;
-}
-
-static u32 read_reason_mem_thermal(struct xe_gt *gt)
-{
- u32 mem_thermal = xe_gt_throttle_get_limit_reasons(gt) & MEM_THERMAL_MASK;
-
- return mem_thermal;
-}
-
-static u32 read_reason_vr_thermal(struct xe_gt *gt)
-{
- u32 vr_thermal = xe_gt_throttle_get_limit_reasons(gt) & VR_THERMAL_MASK;
-
- return vr_thermal;
-}
-
-static u32 read_reason_iccmax(struct xe_gt *gt)
-{
- u32 iccmax = xe_gt_throttle_get_limit_reasons(gt) & ICCMAX_MASK;
-
- return iccmax;
-}
-
-static u32 read_reason_psys_pl1(struct xe_gt *gt)
-{
- u32 psys_pl1 = xe_gt_throttle_get_limit_reasons(gt) & PSYS_PL1_MASK;
-
- return psys_pl1;
-}
-
-static u32 read_reason_psys_pl2(struct xe_gt *gt)
-{
- u32 psys_pl2 = xe_gt_throttle_get_limit_reasons(gt) & PSYS_PL2_MASK;
-
- return psys_pl2;
-}
-
-static u32 read_reason_p0_freq(struct xe_gt *gt)
-{
- u32 p0_freq = xe_gt_throttle_get_limit_reasons(gt) & P0_FREQ_MASK;
-
- return p0_freq;
-}
-
-static u32 read_reason_psys_crit(struct xe_gt *gt)
-{
- u32 psys_crit = xe_gt_throttle_get_limit_reasons(gt) & PSYS_CRIT_MASK;
-
- return psys_crit;
+ return xe_gt_throttle_get_limit_reasons(gt) & mask;
}
static ssize_t status_show(struct kobject *kobj,
@@ -216,9 +95,8 @@ static ssize_t status_show(struct kobject *kobj,
{
struct device *dev = kobj_to_dev(kobj);
struct xe_gt *gt = dev_to_gt(dev);
- bool status = !!read_status(gt);
- return sysfs_emit(buff, "%u\n", status);
+ return sysfs_emit(buff, "%u\n", !!read_status(gt));
}
static struct kobj_attribute attr_status = __ATTR_RO(status);
@@ -227,9 +105,8 @@ static ssize_t reason_pl1_show(struct kobject *kobj,
{
struct device *dev = kobj_to_dev(kobj);
struct xe_gt *gt = dev_to_gt(dev);
- bool pl1 = !!read_reason_pl1(gt);
- return sysfs_emit(buff, "%u\n", pl1);
+ return sysfs_emit(buff, "%u\n", is_throttled_by(gt, POWER_LIMIT_1_MASK));
}
static struct kobj_attribute attr_reason_pl1 = __ATTR_RO(reason_pl1);
@@ -238,9 +115,8 @@ static ssize_t reason_pl2_show(struct kobject *kobj,
{
struct device *dev = kobj_to_dev(kobj);
struct xe_gt *gt = dev_to_gt(dev);
- bool pl2 = !!read_reason_pl2(gt);
- return sysfs_emit(buff, "%u\n", pl2);
+ return sysfs_emit(buff, "%u\n", is_throttled_by(gt, POWER_LIMIT_2_MASK));
}
static struct kobj_attribute attr_reason_pl2 = __ATTR_RO(reason_pl2);
@@ -249,9 +125,8 @@ static ssize_t reason_pl4_show(struct kobject *kobj,
{
struct device *dev = kobj_to_dev(kobj);
struct xe_gt *gt = dev_to_gt(dev);
- bool pl4 = !!read_reason_pl4(gt);
- return sysfs_emit(buff, "%u\n", pl4);
+ return sysfs_emit(buff, "%u\n", is_throttled_by(gt, POWER_LIMIT_4_MASK));
}
static struct kobj_attribute attr_reason_pl4 = __ATTR_RO(reason_pl4);
@@ -260,9 +135,8 @@ static ssize_t reason_thermal_show(struct kobject *kobj,
{
struct device *dev = kobj_to_dev(kobj);
struct xe_gt *gt = dev_to_gt(dev);
- bool thermal = !!read_reason_thermal(gt);
- return sysfs_emit(buff, "%u\n", thermal);
+ return sysfs_emit(buff, "%u\n", is_throttled_by(gt, THERMAL_LIMIT_MASK));
}
static struct kobj_attribute attr_reason_thermal = __ATTR_RO(reason_thermal);
@@ -271,9 +145,8 @@ static ssize_t reason_soc_thermal_show(struct kobject *kobj,
{
struct device *dev = kobj_to_dev(kobj);
struct xe_gt *gt = dev_to_gt(dev);
- bool thermal = !!read_reason_soc_thermal(gt);
- return sysfs_emit(buff, "%u\n", thermal);
+ return sysfs_emit(buff, "%u\n", is_throttled_by(gt, SOC_THERMAL_LIMIT_MASK));
}
static struct kobj_attribute attr_reason_soc_thermal = __ATTR_RO(reason_soc_thermal);
@@ -282,9 +155,8 @@ static ssize_t reason_prochot_show(struct kobject *kobj,
{
struct device *dev = kobj_to_dev(kobj);
struct xe_gt *gt = dev_to_gt(dev);
- bool prochot = !!read_reason_prochot(gt);
- return sysfs_emit(buff, "%u\n", prochot);
+ return sysfs_emit(buff, "%u\n", is_throttled_by(gt, PROCHOT_MASK));
}
static struct kobj_attribute attr_reason_prochot = __ATTR_RO(reason_prochot);
@@ -293,9 +165,8 @@ static ssize_t reason_ratl_show(struct kobject *kobj,
{
struct device *dev = kobj_to_dev(kobj);
struct xe_gt *gt = dev_to_gt(dev);
- bool ratl = !!read_reason_ratl(gt);
- return sysfs_emit(buff, "%u\n", ratl);
+ return sysfs_emit(buff, "%u\n", is_throttled_by(gt, RATL_MASK));
}
static struct kobj_attribute attr_reason_ratl = __ATTR_RO(reason_ratl);
@@ -304,9 +175,8 @@ static ssize_t reason_vr_thermalert_show(struct kobject *kobj,
{
struct device *dev = kobj_to_dev(kobj);
struct xe_gt *gt = dev_to_gt(dev);
- bool thermalert = !!read_reason_vr_thermalert(gt);
- return sysfs_emit(buff, "%u\n", thermalert);
+ return sysfs_emit(buff, "%u\n", is_throttled_by(gt, VR_THERMALERT_MASK));
}
static struct kobj_attribute attr_reason_vr_thermalert = __ATTR_RO(reason_vr_thermalert);
@@ -315,9 +185,8 @@ static ssize_t reason_soc_avg_thermal_show(struct kobject *kobj,
{
struct device *dev = kobj_to_dev(kobj);
struct xe_gt *gt = dev_to_gt(dev);
- bool avg_thermalert = !!read_reason_soc_avg_thermal(gt);
- return sysfs_emit(buff, "%u\n", avg_thermalert);
+ return sysfs_emit(buff, "%u\n", is_throttled_by(gt, SOC_AVG_THERMAL_MASK));
}
static struct kobj_attribute attr_reason_soc_avg_thermal = __ATTR_RO(reason_soc_avg_thermal);
@@ -326,9 +195,8 @@ static ssize_t reason_vr_tdc_show(struct kobject *kobj,
{
struct device *dev = kobj_to_dev(kobj);
struct xe_gt *gt = dev_to_gt(dev);
- bool tdc = !!read_reason_vr_tdc(gt);
- return sysfs_emit(buff, "%u\n", tdc);
+ return sysfs_emit(buff, "%u\n", is_throttled_by(gt, VR_TDC_MASK));
}
static struct kobj_attribute attr_reason_vr_tdc = __ATTR_RO(reason_vr_tdc);
@@ -337,9 +205,8 @@ static ssize_t reason_fastvmode_show(struct kobject *kobj,
{
struct device *dev = kobj_to_dev(kobj);
struct xe_gt *gt = dev_to_gt(dev);
- bool fastvmode = !!read_reason_fastvmode(gt);
- return sysfs_emit(buff, "%u\n", fastvmode);
+ return sysfs_emit(buff, "%u\n", is_throttled_by(gt, FASTVMODE_MASK));
}
static struct kobj_attribute attr_reason_fastvmode = __ATTR_RO(reason_fastvmode);
@@ -348,9 +215,8 @@ static ssize_t reason_mem_thermal_show(struct kobject *kobj,
{
struct device *dev = kobj_to_dev(kobj);
struct xe_gt *gt = dev_to_gt(dev);
- bool mem_thermal = !!read_reason_mem_thermal(gt);
- return sysfs_emit(buff, "%u\n", mem_thermal);
+ return sysfs_emit(buff, "%u\n", is_throttled_by(gt, MEM_THERMAL_MASK));
}
static struct kobj_attribute attr_reason_mem_thermal = __ATTR_RO(reason_mem_thermal);
@@ -359,9 +225,8 @@ static ssize_t reason_vr_thermal_show(struct kobject *kobj,
{
struct device *dev = kobj_to_dev(kobj);
struct xe_gt *gt = dev_to_gt(dev);
- bool vr_thermal = !!read_reason_vr_thermal(gt);
- return sysfs_emit(buff, "%u\n", vr_thermal);
+ return sysfs_emit(buff, "%u\n", is_throttled_by(gt, VR_THERMAL_MASK));
}
static struct kobj_attribute attr_reason_vr_thermal = __ATTR_RO(reason_vr_thermal);
@@ -370,9 +235,8 @@ static ssize_t reason_iccmax_show(struct kobject *kobj,
{
struct device *dev = kobj_to_dev(kobj);
struct xe_gt *gt = dev_to_gt(dev);
- bool iccmax = !!read_reason_iccmax(gt);
- return sysfs_emit(buff, "%u\n", iccmax);
+ return sysfs_emit(buff, "%u\n", is_throttled_by(gt, ICCMAX_MASK));
}
static struct kobj_attribute attr_reason_iccmax = __ATTR_RO(reason_iccmax);
@@ -381,9 +245,8 @@ static ssize_t reason_psys_pl1_show(struct kobject *kobj,
{
struct device *dev = kobj_to_dev(kobj);
struct xe_gt *gt = dev_to_gt(dev);
- bool psys_pl1 = !!read_reason_psys_pl1(gt);
- return sysfs_emit(buff, "%u\n", psys_pl1);
+ return sysfs_emit(buff, "%u\n", is_throttled_by(gt, PSYS_PL1_MASK));
}
static struct kobj_attribute attr_reason_psys_pl1 = __ATTR_RO(reason_psys_pl1);
@@ -392,9 +255,8 @@ static ssize_t reason_psys_pl2_show(struct kobject *kobj,
{
struct device *dev = kobj_to_dev(kobj);
struct xe_gt *gt = dev_to_gt(dev);
- bool psys_pl2 = !!read_reason_psys_pl2(gt);
- return sysfs_emit(buff, "%u\n", psys_pl2);
+ return sysfs_emit(buff, "%u\n", is_throttled_by(gt, PSYS_PL2_MASK));
}
static struct kobj_attribute attr_reason_psys_pl2 = __ATTR_RO(reason_psys_pl2);
@@ -403,9 +265,8 @@ static ssize_t reason_p0_freq_show(struct kobject *kobj,
{
struct device *dev = kobj_to_dev(kobj);
struct xe_gt *gt = dev_to_gt(dev);
- bool p0_freq = !!read_reason_p0_freq(gt);
- return sysfs_emit(buff, "%u\n", p0_freq);
+ return sysfs_emit(buff, "%u\n", is_throttled_by(gt, P0_FREQ_MASK));
}
static struct kobj_attribute attr_reason_p0_freq = __ATTR_RO(reason_p0_freq);
@@ -414,9 +275,8 @@ static ssize_t reason_psys_crit_show(struct kobject *kobj,
{
struct device *dev = kobj_to_dev(kobj);
struct xe_gt *gt = dev_to_gt(dev);
- bool psys_crit = !!read_reason_psys_crit(gt);
- return sysfs_emit(buff, "%u\n", psys_crit);
+ return sysfs_emit(buff, "%u\n", is_throttled_by(gt, PSYS_CRIT_MASK));
}
static struct kobj_attribute attr_reason_psys_crit = __ATTR_RO(reason_psys_crit);
--
2.51.0
^ permalink raw reply related [flat|nested] 17+ messages in thread* [PATCH v3 3/8] drm/xe/gt_throttle: Always read and mask
2025-10-29 23:45 [PATCH v3 0/8] drm/xe: CRI support in gt_throttle + refactors Lucas De Marchi
2025-10-29 23:45 ` [PATCH v3 1/8] drm/xe/cri: Add new performance limit reasons bits Lucas De Marchi
2025-10-29 23:45 ` [PATCH v3 2/8] drm/xe/gt_throttle: Tidy up perf reasons reading Lucas De Marchi
@ 2025-10-29 23:45 ` Lucas De Marchi
2025-10-29 23:45 ` [PATCH v3 4/8] drm/xe/gt_throttle: Add throttle_to_gt() Lucas De Marchi
` (4 subsequent siblings)
7 siblings, 0 replies; 17+ messages in thread
From: Lucas De Marchi @ 2025-10-29 23:45 UTC (permalink / raw)
To: intel-xe; +Cc: Lucas De Marchi, Raag Jadav
Use a single function to read and mask the value the callers will be
interested in. This reduces the risk of a caller using a plain call to
xe_gt_throttle_get_limit_reasons() without applying any mask, which can
return unexpected bits for future platforms.
Select which reg and mask it's going to be used according to the
platform and gt type and always use that one function.
There was an odd xe_gt_dbg() when reading the status, which is not done
for any other throttle/* sysfs file, so just make the status be as
special as everybody else.
Reviewed-by: Raag Jadav <raag.jadav@intel.com>
Signed-off-by: Lucas De Marchi <lucas.demarchi@intel.com>
---
drivers/gpu/drm/xe/xe_gt_throttle.c | 28 ++++++++++------------------
1 file changed, 10 insertions(+), 18 deletions(-)
diff --git a/drivers/gpu/drm/xe/xe_gt_throttle.c b/drivers/gpu/drm/xe/xe_gt_throttle.c
index c69710f2dbeb5..f743444f641a8 100644
--- a/drivers/gpu/drm/xe/xe_gt_throttle.c
+++ b/drivers/gpu/drm/xe/xe_gt_throttle.c
@@ -8,7 +8,6 @@
#include <regs/xe_gt_regs.h>
#include "xe_device.h"
#include "xe_gt.h"
-#include "xe_gt_printk.h"
#include "xe_gt_sysfs.h"
#include "xe_gt_throttle.h"
#include "xe_mmio.h"
@@ -57,32 +56,25 @@ dev_to_gt(struct device *dev)
u32 xe_gt_throttle_get_limit_reasons(struct xe_gt *gt)
{
- u32 reg;
+ struct xe_device *xe = gt_to_xe(gt);
+ struct xe_reg reg;
+ u32 val, mask;
- xe_pm_runtime_get(gt_to_xe(gt));
if (xe_gt_is_media_type(gt))
- reg = xe_mmio_read32(>->mmio, MTL_MEDIA_PERF_LIMIT_REASONS);
+ reg = MTL_MEDIA_PERF_LIMIT_REASONS;
else
- reg = xe_mmio_read32(>->mmio, GT0_PERF_LIMIT_REASONS);
- xe_pm_runtime_put(gt_to_xe(gt));
-
- return reg;
-}
-
-static u32 read_status(struct xe_gt *gt)
-{
- struct xe_device *xe = gt_to_xe(gt);
- u32 status, mask;
+ reg = GT0_PERF_LIMIT_REASONS;
if (xe->info.platform == XE_CRESCENTISLAND)
mask = CRI_PERF_LIMIT_REASONS_MASK;
else
mask = GT0_PERF_LIMIT_REASONS_MASK;
- status = xe_gt_throttle_get_limit_reasons(gt) & mask;
- xe_gt_dbg(gt, "throttle reasons: 0x%08x\n", status);
+ xe_pm_runtime_get(xe);
+ val = xe_mmio_read32(>->mmio, reg) & mask;
+ xe_pm_runtime_put(xe);
- return status;
+ return val;
}
static bool is_throttled_by(struct xe_gt *gt, u32 mask)
@@ -96,7 +88,7 @@ static ssize_t status_show(struct kobject *kobj,
struct device *dev = kobj_to_dev(kobj);
struct xe_gt *gt = dev_to_gt(dev);
- return sysfs_emit(buff, "%u\n", !!read_status(gt));
+ return sysfs_emit(buff, "%u\n", is_throttled_by(gt, U32_MAX));
}
static struct kobj_attribute attr_status = __ATTR_RO(status);
--
2.51.0
^ permalink raw reply related [flat|nested] 17+ messages in thread* [PATCH v3 4/8] drm/xe/gt_throttle: Add throttle_to_gt()
2025-10-29 23:45 [PATCH v3 0/8] drm/xe: CRI support in gt_throttle + refactors Lucas De Marchi
` (2 preceding siblings ...)
2025-10-29 23:45 ` [PATCH v3 3/8] drm/xe/gt_throttle: Always read and mask Lucas De Marchi
@ 2025-10-29 23:45 ` Lucas De Marchi
2025-10-29 23:45 ` [PATCH v3 5/8] drm/xe/gt_throttle: Tidy up attribute definition Lucas De Marchi
` (3 subsequent siblings)
7 siblings, 0 replies; 17+ messages in thread
From: Lucas De Marchi @ 2025-10-29 23:45 UTC (permalink / raw)
To: intel-xe; +Cc: Lucas De Marchi, Raag Jadav
Reduce boilerplate code by adding a helper to go directly from the
throttle kobject to the gt. Note that there's already a kobj_to_gt(),
but that actually converts our kobj_gt object to gt.
Reviewed-by: Raag Jadav <raag.jadav@intel.com>
Signed-off-by: Lucas De Marchi <lucas.demarchi@intel.com>
---
drivers/gpu/drm/xe/xe_gt_throttle.c | 65 ++++++++++++++-----------------------
1 file changed, 25 insertions(+), 40 deletions(-)
diff --git a/drivers/gpu/drm/xe/xe_gt_throttle.c b/drivers/gpu/drm/xe/xe_gt_throttle.c
index f743444f641a8..e08023c5e77b6 100644
--- a/drivers/gpu/drm/xe/xe_gt_throttle.c
+++ b/drivers/gpu/drm/xe/xe_gt_throttle.c
@@ -48,12 +48,16 @@
* device/gt#/freq0/throttle/reason_psys_crit - Frequency throttle due to PSYS critical
*/
-static struct xe_gt *
-dev_to_gt(struct device *dev)
+static struct xe_gt *dev_to_gt(struct device *dev)
{
return kobj_to_gt(dev->kobj.parent);
}
+static struct xe_gt *throttle_to_gt(struct kobject *kobj)
+{
+ return dev_to_gt(kobj_to_dev(kobj));
+}
+
u32 xe_gt_throttle_get_limit_reasons(struct xe_gt *gt)
{
struct xe_device *xe = gt_to_xe(gt);
@@ -85,8 +89,7 @@ static bool is_throttled_by(struct xe_gt *gt, u32 mask)
static ssize_t status_show(struct kobject *kobj,
struct kobj_attribute *attr, char *buff)
{
- struct device *dev = kobj_to_dev(kobj);
- struct xe_gt *gt = dev_to_gt(dev);
+ struct xe_gt *gt = throttle_to_gt(kobj);
return sysfs_emit(buff, "%u\n", is_throttled_by(gt, U32_MAX));
}
@@ -95,8 +98,7 @@ static struct kobj_attribute attr_status = __ATTR_RO(status);
static ssize_t reason_pl1_show(struct kobject *kobj,
struct kobj_attribute *attr, char *buff)
{
- struct device *dev = kobj_to_dev(kobj);
- struct xe_gt *gt = dev_to_gt(dev);
+ struct xe_gt *gt = throttle_to_gt(kobj);
return sysfs_emit(buff, "%u\n", is_throttled_by(gt, POWER_LIMIT_1_MASK));
}
@@ -105,8 +107,7 @@ static struct kobj_attribute attr_reason_pl1 = __ATTR_RO(reason_pl1);
static ssize_t reason_pl2_show(struct kobject *kobj,
struct kobj_attribute *attr, char *buff)
{
- struct device *dev = kobj_to_dev(kobj);
- struct xe_gt *gt = dev_to_gt(dev);
+ struct xe_gt *gt = throttle_to_gt(kobj);
return sysfs_emit(buff, "%u\n", is_throttled_by(gt, POWER_LIMIT_2_MASK));
}
@@ -115,8 +116,7 @@ static struct kobj_attribute attr_reason_pl2 = __ATTR_RO(reason_pl2);
static ssize_t reason_pl4_show(struct kobject *kobj,
struct kobj_attribute *attr, char *buff)
{
- struct device *dev = kobj_to_dev(kobj);
- struct xe_gt *gt = dev_to_gt(dev);
+ struct xe_gt *gt = throttle_to_gt(kobj);
return sysfs_emit(buff, "%u\n", is_throttled_by(gt, POWER_LIMIT_4_MASK));
}
@@ -125,8 +125,7 @@ static struct kobj_attribute attr_reason_pl4 = __ATTR_RO(reason_pl4);
static ssize_t reason_thermal_show(struct kobject *kobj,
struct kobj_attribute *attr, char *buff)
{
- struct device *dev = kobj_to_dev(kobj);
- struct xe_gt *gt = dev_to_gt(dev);
+ struct xe_gt *gt = throttle_to_gt(kobj);
return sysfs_emit(buff, "%u\n", is_throttled_by(gt, THERMAL_LIMIT_MASK));
}
@@ -135,8 +134,7 @@ static struct kobj_attribute attr_reason_thermal = __ATTR_RO(reason_thermal);
static ssize_t reason_soc_thermal_show(struct kobject *kobj,
struct kobj_attribute *attr, char *buff)
{
- struct device *dev = kobj_to_dev(kobj);
- struct xe_gt *gt = dev_to_gt(dev);
+ struct xe_gt *gt = throttle_to_gt(kobj);
return sysfs_emit(buff, "%u\n", is_throttled_by(gt, SOC_THERMAL_LIMIT_MASK));
}
@@ -145,8 +143,7 @@ static struct kobj_attribute attr_reason_soc_thermal = __ATTR_RO(reason_soc_ther
static ssize_t reason_prochot_show(struct kobject *kobj,
struct kobj_attribute *attr, char *buff)
{
- struct device *dev = kobj_to_dev(kobj);
- struct xe_gt *gt = dev_to_gt(dev);
+ struct xe_gt *gt = throttle_to_gt(kobj);
return sysfs_emit(buff, "%u\n", is_throttled_by(gt, PROCHOT_MASK));
}
@@ -155,8 +152,7 @@ static struct kobj_attribute attr_reason_prochot = __ATTR_RO(reason_prochot);
static ssize_t reason_ratl_show(struct kobject *kobj,
struct kobj_attribute *attr, char *buff)
{
- struct device *dev = kobj_to_dev(kobj);
- struct xe_gt *gt = dev_to_gt(dev);
+ struct xe_gt *gt = throttle_to_gt(kobj);
return sysfs_emit(buff, "%u\n", is_throttled_by(gt, RATL_MASK));
}
@@ -165,8 +161,7 @@ static struct kobj_attribute attr_reason_ratl = __ATTR_RO(reason_ratl);
static ssize_t reason_vr_thermalert_show(struct kobject *kobj,
struct kobj_attribute *attr, char *buff)
{
- struct device *dev = kobj_to_dev(kobj);
- struct xe_gt *gt = dev_to_gt(dev);
+ struct xe_gt *gt = throttle_to_gt(kobj);
return sysfs_emit(buff, "%u\n", is_throttled_by(gt, VR_THERMALERT_MASK));
}
@@ -175,8 +170,7 @@ static struct kobj_attribute attr_reason_vr_thermalert = __ATTR_RO(reason_vr_the
static ssize_t reason_soc_avg_thermal_show(struct kobject *kobj,
struct kobj_attribute *attr, char *buff)
{
- struct device *dev = kobj_to_dev(kobj);
- struct xe_gt *gt = dev_to_gt(dev);
+ struct xe_gt *gt = throttle_to_gt(kobj);
return sysfs_emit(buff, "%u\n", is_throttled_by(gt, SOC_AVG_THERMAL_MASK));
}
@@ -185,8 +179,7 @@ static struct kobj_attribute attr_reason_soc_avg_thermal = __ATTR_RO(reason_soc_
static ssize_t reason_vr_tdc_show(struct kobject *kobj,
struct kobj_attribute *attr, char *buff)
{
- struct device *dev = kobj_to_dev(kobj);
- struct xe_gt *gt = dev_to_gt(dev);
+ struct xe_gt *gt = throttle_to_gt(kobj);
return sysfs_emit(buff, "%u\n", is_throttled_by(gt, VR_TDC_MASK));
}
@@ -195,8 +188,7 @@ static struct kobj_attribute attr_reason_vr_tdc = __ATTR_RO(reason_vr_tdc);
static ssize_t reason_fastvmode_show(struct kobject *kobj,
struct kobj_attribute *attr, char *buff)
{
- struct device *dev = kobj_to_dev(kobj);
- struct xe_gt *gt = dev_to_gt(dev);
+ struct xe_gt *gt = throttle_to_gt(kobj);
return sysfs_emit(buff, "%u\n", is_throttled_by(gt, FASTVMODE_MASK));
}
@@ -205,8 +197,7 @@ static struct kobj_attribute attr_reason_fastvmode = __ATTR_RO(reason_fastvmode)
static ssize_t reason_mem_thermal_show(struct kobject *kobj,
struct kobj_attribute *attr, char *buff)
{
- struct device *dev = kobj_to_dev(kobj);
- struct xe_gt *gt = dev_to_gt(dev);
+ struct xe_gt *gt = throttle_to_gt(kobj);
return sysfs_emit(buff, "%u\n", is_throttled_by(gt, MEM_THERMAL_MASK));
}
@@ -215,8 +206,7 @@ static struct kobj_attribute attr_reason_mem_thermal = __ATTR_RO(reason_mem_ther
static ssize_t reason_vr_thermal_show(struct kobject *kobj,
struct kobj_attribute *attr, char *buff)
{
- struct device *dev = kobj_to_dev(kobj);
- struct xe_gt *gt = dev_to_gt(dev);
+ struct xe_gt *gt = throttle_to_gt(kobj);
return sysfs_emit(buff, "%u\n", is_throttled_by(gt, VR_THERMAL_MASK));
}
@@ -225,8 +215,7 @@ static struct kobj_attribute attr_reason_vr_thermal = __ATTR_RO(reason_vr_therma
static ssize_t reason_iccmax_show(struct kobject *kobj,
struct kobj_attribute *attr, char *buff)
{
- struct device *dev = kobj_to_dev(kobj);
- struct xe_gt *gt = dev_to_gt(dev);
+ struct xe_gt *gt = throttle_to_gt(kobj);
return sysfs_emit(buff, "%u\n", is_throttled_by(gt, ICCMAX_MASK));
}
@@ -235,8 +224,7 @@ static struct kobj_attribute attr_reason_iccmax = __ATTR_RO(reason_iccmax);
static ssize_t reason_psys_pl1_show(struct kobject *kobj,
struct kobj_attribute *attr, char *buff)
{
- struct device *dev = kobj_to_dev(kobj);
- struct xe_gt *gt = dev_to_gt(dev);
+ struct xe_gt *gt = throttle_to_gt(kobj);
return sysfs_emit(buff, "%u\n", is_throttled_by(gt, PSYS_PL1_MASK));
}
@@ -245,8 +233,7 @@ static struct kobj_attribute attr_reason_psys_pl1 = __ATTR_RO(reason_psys_pl1);
static ssize_t reason_psys_pl2_show(struct kobject *kobj,
struct kobj_attribute *attr, char *buff)
{
- struct device *dev = kobj_to_dev(kobj);
- struct xe_gt *gt = dev_to_gt(dev);
+ struct xe_gt *gt = throttle_to_gt(kobj);
return sysfs_emit(buff, "%u\n", is_throttled_by(gt, PSYS_PL2_MASK));
}
@@ -255,8 +242,7 @@ static struct kobj_attribute attr_reason_psys_pl2 = __ATTR_RO(reason_psys_pl2);
static ssize_t reason_p0_freq_show(struct kobject *kobj,
struct kobj_attribute *attr, char *buff)
{
- struct device *dev = kobj_to_dev(kobj);
- struct xe_gt *gt = dev_to_gt(dev);
+ struct xe_gt *gt = throttle_to_gt(kobj);
return sysfs_emit(buff, "%u\n", is_throttled_by(gt, P0_FREQ_MASK));
}
@@ -265,8 +251,7 @@ static struct kobj_attribute attr_reason_p0_freq = __ATTR_RO(reason_p0_freq);
static ssize_t reason_psys_crit_show(struct kobject *kobj,
struct kobj_attribute *attr, char *buff)
{
- struct device *dev = kobj_to_dev(kobj);
- struct xe_gt *gt = dev_to_gt(dev);
+ struct xe_gt *gt = throttle_to_gt(kobj);
return sysfs_emit(buff, "%u\n", is_throttled_by(gt, PSYS_CRIT_MASK));
}
--
2.51.0
^ permalink raw reply related [flat|nested] 17+ messages in thread* [PATCH v3 5/8] drm/xe/gt_throttle: Tidy up attribute definition
2025-10-29 23:45 [PATCH v3 0/8] drm/xe: CRI support in gt_throttle + refactors Lucas De Marchi
` (3 preceding siblings ...)
2025-10-29 23:45 ` [PATCH v3 4/8] drm/xe/gt_throttle: Add throttle_to_gt() Lucas De Marchi
@ 2025-10-29 23:45 ` Lucas De Marchi
2025-10-29 23:45 ` [PATCH v3 6/8] drm/xe: Improve freq and throttle documentation Lucas De Marchi
` (2 subsequent siblings)
7 siblings, 0 replies; 17+ messages in thread
From: Lucas De Marchi @ 2025-10-29 23:45 UTC (permalink / raw)
To: intel-xe; +Cc: Lucas De Marchi, Raag Jadav
Move the attribute definitions to be grouped together rather than near
the show() function: checkpatch keeps complaining about the missing
newline when defining new attributes and it reads better to group
everything, which should match e.g. the xe_pmu.c style.
While grouping them, also define a THROTTLE_ATTR_RO(), similar to
DEVICE_ATTR_RO(), and use it to define all attributes. This makes it
shorter and with a familiar syntax.
Finally, during the cri_throttle_attrs[] array definition, also
highlight what's coming from common attributes and what is CRI-specific.
These 3 things could be done as separate commits, but they are all about
the same thing: reduce the attribute definition verbosity and are very
simple and mechanical.
Reviewed-by: Raag Jadav <raag.jadav@intel.com>
Signed-off-by: Lucas De Marchi <lucas.demarchi@intel.com>
---
v2:
- Reorder attribute definition to match cri_throttle_attrs array (Raag)
---
drivers/gpu/drm/xe/xe_gt_throttle.c | 55 +++++++++++++++++++++----------------
1 file changed, 31 insertions(+), 24 deletions(-)
diff --git a/drivers/gpu/drm/xe/xe_gt_throttle.c b/drivers/gpu/drm/xe/xe_gt_throttle.c
index e08023c5e77b6..b5317cf714e2e 100644
--- a/drivers/gpu/drm/xe/xe_gt_throttle.c
+++ b/drivers/gpu/drm/xe/xe_gt_throttle.c
@@ -93,7 +93,6 @@ static ssize_t status_show(struct kobject *kobj,
return sysfs_emit(buff, "%u\n", is_throttled_by(gt, U32_MAX));
}
-static struct kobj_attribute attr_status = __ATTR_RO(status);
static ssize_t reason_pl1_show(struct kobject *kobj,
struct kobj_attribute *attr, char *buff)
@@ -102,7 +101,6 @@ static ssize_t reason_pl1_show(struct kobject *kobj,
return sysfs_emit(buff, "%u\n", is_throttled_by(gt, POWER_LIMIT_1_MASK));
}
-static struct kobj_attribute attr_reason_pl1 = __ATTR_RO(reason_pl1);
static ssize_t reason_pl2_show(struct kobject *kobj,
struct kobj_attribute *attr, char *buff)
@@ -111,7 +109,6 @@ static ssize_t reason_pl2_show(struct kobject *kobj,
return sysfs_emit(buff, "%u\n", is_throttled_by(gt, POWER_LIMIT_2_MASK));
}
-static struct kobj_attribute attr_reason_pl2 = __ATTR_RO(reason_pl2);
static ssize_t reason_pl4_show(struct kobject *kobj,
struct kobj_attribute *attr, char *buff)
@@ -120,7 +117,6 @@ static ssize_t reason_pl4_show(struct kobject *kobj,
return sysfs_emit(buff, "%u\n", is_throttled_by(gt, POWER_LIMIT_4_MASK));
}
-static struct kobj_attribute attr_reason_pl4 = __ATTR_RO(reason_pl4);
static ssize_t reason_thermal_show(struct kobject *kobj,
struct kobj_attribute *attr, char *buff)
@@ -129,7 +125,6 @@ static ssize_t reason_thermal_show(struct kobject *kobj,
return sysfs_emit(buff, "%u\n", is_throttled_by(gt, THERMAL_LIMIT_MASK));
}
-static struct kobj_attribute attr_reason_thermal = __ATTR_RO(reason_thermal);
static ssize_t reason_soc_thermal_show(struct kobject *kobj,
struct kobj_attribute *attr, char *buff)
@@ -138,7 +133,6 @@ static ssize_t reason_soc_thermal_show(struct kobject *kobj,
return sysfs_emit(buff, "%u\n", is_throttled_by(gt, SOC_THERMAL_LIMIT_MASK));
}
-static struct kobj_attribute attr_reason_soc_thermal = __ATTR_RO(reason_soc_thermal);
static ssize_t reason_prochot_show(struct kobject *kobj,
struct kobj_attribute *attr, char *buff)
@@ -147,7 +141,6 @@ static ssize_t reason_prochot_show(struct kobject *kobj,
return sysfs_emit(buff, "%u\n", is_throttled_by(gt, PROCHOT_MASK));
}
-static struct kobj_attribute attr_reason_prochot = __ATTR_RO(reason_prochot);
static ssize_t reason_ratl_show(struct kobject *kobj,
struct kobj_attribute *attr, char *buff)
@@ -156,7 +149,6 @@ static ssize_t reason_ratl_show(struct kobject *kobj,
return sysfs_emit(buff, "%u\n", is_throttled_by(gt, RATL_MASK));
}
-static struct kobj_attribute attr_reason_ratl = __ATTR_RO(reason_ratl);
static ssize_t reason_vr_thermalert_show(struct kobject *kobj,
struct kobj_attribute *attr, char *buff)
@@ -165,7 +157,6 @@ static ssize_t reason_vr_thermalert_show(struct kobject *kobj,
return sysfs_emit(buff, "%u\n", is_throttled_by(gt, VR_THERMALERT_MASK));
}
-static struct kobj_attribute attr_reason_vr_thermalert = __ATTR_RO(reason_vr_thermalert);
static ssize_t reason_soc_avg_thermal_show(struct kobject *kobj,
struct kobj_attribute *attr, char *buff)
@@ -174,7 +165,6 @@ static ssize_t reason_soc_avg_thermal_show(struct kobject *kobj,
return sysfs_emit(buff, "%u\n", is_throttled_by(gt, SOC_AVG_THERMAL_MASK));
}
-static struct kobj_attribute attr_reason_soc_avg_thermal = __ATTR_RO(reason_soc_avg_thermal);
static ssize_t reason_vr_tdc_show(struct kobject *kobj,
struct kobj_attribute *attr, char *buff)
@@ -183,7 +173,6 @@ static ssize_t reason_vr_tdc_show(struct kobject *kobj,
return sysfs_emit(buff, "%u\n", is_throttled_by(gt, VR_TDC_MASK));
}
-static struct kobj_attribute attr_reason_vr_tdc = __ATTR_RO(reason_vr_tdc);
static ssize_t reason_fastvmode_show(struct kobject *kobj,
struct kobj_attribute *attr, char *buff)
@@ -192,7 +181,6 @@ static ssize_t reason_fastvmode_show(struct kobject *kobj,
return sysfs_emit(buff, "%u\n", is_throttled_by(gt, FASTVMODE_MASK));
}
-static struct kobj_attribute attr_reason_fastvmode = __ATTR_RO(reason_fastvmode);
static ssize_t reason_mem_thermal_show(struct kobject *kobj,
struct kobj_attribute *attr, char *buff)
@@ -201,7 +189,6 @@ static ssize_t reason_mem_thermal_show(struct kobject *kobj,
return sysfs_emit(buff, "%u\n", is_throttled_by(gt, MEM_THERMAL_MASK));
}
-static struct kobj_attribute attr_reason_mem_thermal = __ATTR_RO(reason_mem_thermal);
static ssize_t reason_vr_thermal_show(struct kobject *kobj,
struct kobj_attribute *attr, char *buff)
@@ -210,7 +197,6 @@ static ssize_t reason_vr_thermal_show(struct kobject *kobj,
return sysfs_emit(buff, "%u\n", is_throttled_by(gt, VR_THERMAL_MASK));
}
-static struct kobj_attribute attr_reason_vr_thermal = __ATTR_RO(reason_vr_thermal);
static ssize_t reason_iccmax_show(struct kobject *kobj,
struct kobj_attribute *attr, char *buff)
@@ -219,7 +205,6 @@ static ssize_t reason_iccmax_show(struct kobject *kobj,
return sysfs_emit(buff, "%u\n", is_throttled_by(gt, ICCMAX_MASK));
}
-static struct kobj_attribute attr_reason_iccmax = __ATTR_RO(reason_iccmax);
static ssize_t reason_psys_pl1_show(struct kobject *kobj,
struct kobj_attribute *attr, char *buff)
@@ -228,7 +213,6 @@ static ssize_t reason_psys_pl1_show(struct kobject *kobj,
return sysfs_emit(buff, "%u\n", is_throttled_by(gt, PSYS_PL1_MASK));
}
-static struct kobj_attribute attr_reason_psys_pl1 = __ATTR_RO(reason_psys_pl1);
static ssize_t reason_psys_pl2_show(struct kobject *kobj,
struct kobj_attribute *attr, char *buff)
@@ -237,7 +221,6 @@ static ssize_t reason_psys_pl2_show(struct kobject *kobj,
return sysfs_emit(buff, "%u\n", is_throttled_by(gt, PSYS_PL2_MASK));
}
-static struct kobj_attribute attr_reason_psys_pl2 = __ATTR_RO(reason_psys_pl2);
static ssize_t reason_p0_freq_show(struct kobject *kobj,
struct kobj_attribute *attr, char *buff)
@@ -246,7 +229,6 @@ static ssize_t reason_p0_freq_show(struct kobject *kobj,
return sysfs_emit(buff, "%u\n", is_throttled_by(gt, P0_FREQ_MASK));
}
-static struct kobj_attribute attr_reason_p0_freq = __ATTR_RO(reason_p0_freq);
static ssize_t reason_psys_crit_show(struct kobject *kobj,
struct kobj_attribute *attr, char *buff)
@@ -255,7 +237,19 @@ static ssize_t reason_psys_crit_show(struct kobject *kobj,
return sysfs_emit(buff, "%u\n", is_throttled_by(gt, PSYS_CRIT_MASK));
}
-static struct kobj_attribute attr_reason_psys_crit = __ATTR_RO(reason_psys_crit);
+
+#define THROTTLE_ATTR_RO(name) \
+ struct kobj_attribute attr_##name = __ATTR_RO(name)
+
+static THROTTLE_ATTR_RO(status);
+static THROTTLE_ATTR_RO(reason_pl1);
+static THROTTLE_ATTR_RO(reason_pl2);
+static THROTTLE_ATTR_RO(reason_pl4);
+static THROTTLE_ATTR_RO(reason_thermal);
+static THROTTLE_ATTR_RO(reason_prochot);
+static THROTTLE_ATTR_RO(reason_ratl);
+static THROTTLE_ATTR_RO(reason_vr_thermalert);
+static THROTTLE_ATTR_RO(reason_vr_tdc);
static struct attribute *throttle_attrs[] = {
&attr_status.attr,
@@ -270,19 +264,32 @@ static struct attribute *throttle_attrs[] = {
NULL
};
+static THROTTLE_ATTR_RO(reason_vr_thermal);
+static THROTTLE_ATTR_RO(reason_soc_thermal);
+static THROTTLE_ATTR_RO(reason_mem_thermal);
+static THROTTLE_ATTR_RO(reason_iccmax);
+static THROTTLE_ATTR_RO(reason_soc_avg_thermal);
+static THROTTLE_ATTR_RO(reason_fastvmode);
+static THROTTLE_ATTR_RO(reason_psys_pl1);
+static THROTTLE_ATTR_RO(reason_psys_pl2);
+static THROTTLE_ATTR_RO(reason_p0_freq);
+static THROTTLE_ATTR_RO(reason_psys_crit);
+
static struct attribute *cri_throttle_attrs[] = {
+ /* Common */
&attr_status.attr,
+ &attr_reason_pl1.attr,
+ &attr_reason_pl2.attr,
+ &attr_reason_pl4.attr,
&attr_reason_prochot.attr,
+ &attr_reason_ratl.attr,
+ /* CRI */
+ &attr_reason_vr_thermal.attr,
&attr_reason_soc_thermal.attr,
&attr_reason_mem_thermal.attr,
- &attr_reason_vr_thermal.attr,
&attr_reason_iccmax.attr,
- &attr_reason_ratl.attr,
&attr_reason_soc_avg_thermal.attr,
&attr_reason_fastvmode.attr,
- &attr_reason_pl4.attr,
- &attr_reason_pl1.attr,
- &attr_reason_pl2.attr,
&attr_reason_psys_pl1.attr,
&attr_reason_psys_pl2.attr,
&attr_reason_p0_freq.attr,
--
2.51.0
^ permalink raw reply related [flat|nested] 17+ messages in thread* [PATCH v3 6/8] drm/xe: Improve freq and throttle documentation
2025-10-29 23:45 [PATCH v3 0/8] drm/xe: CRI support in gt_throttle + refactors Lucas De Marchi
` (4 preceding siblings ...)
2025-10-29 23:45 ` [PATCH v3 5/8] drm/xe/gt_throttle: Tidy up attribute definition Lucas De Marchi
@ 2025-10-29 23:45 ` Lucas De Marchi
2025-10-29 23:45 ` [PATCH v3 7/8] drm/xe/gt_throttle: Drop individual show functions Lucas De Marchi
2025-10-29 23:45 ` [PATCH v3 8/8] drm/xe/gt_throttle: Avoid TOCTOU when monitoring reasons Lucas De Marchi
7 siblings, 0 replies; 17+ messages in thread
From: Lucas De Marchi @ 2025-10-29 23:45 UTC (permalink / raw)
To: intel-xe; +Cc: Lucas De Marchi, Raag Jadav
Add xe_gt_throttle under the "GT Frequency Management" and improve the
narrative making sure the documentation for both *_freq and throttle/*
attributes follow the same style.
Reviewed-by: Raag Jadav <raag.jadav@intel.com>
Signed-off-by: Lucas De Marchi <lucas.demarchi@intel.com>
---
Documentation/gpu/xe/xe_gt_freq.rst | 3 ++
drivers/gpu/drm/xe/xe_gt_freq.c | 30 ++++++++++---------
drivers/gpu/drm/xe/xe_gt_throttle.c | 60 ++++++++++++++++++++-----------------
3 files changed, 52 insertions(+), 41 deletions(-)
diff --git a/Documentation/gpu/xe/xe_gt_freq.rst b/Documentation/gpu/xe/xe_gt_freq.rst
index c0811200e3275..182d6aabeee18 100644
--- a/Documentation/gpu/xe/xe_gt_freq.rst
+++ b/Documentation/gpu/xe/xe_gt_freq.rst
@@ -7,6 +7,9 @@ Xe GT Frequency Management
.. kernel-doc:: drivers/gpu/drm/xe/xe_gt_freq.c
:doc: Xe GT Frequency Management
+.. kernel-doc:: drivers/gpu/drm/xe/xe_gt_throttle.c
+ :doc: Xe GT Throttle
+
Internal API
============
diff --git a/drivers/gpu/drm/xe/xe_gt_freq.c b/drivers/gpu/drm/xe/xe_gt_freq.c
index e88f113226bca..849ea6c86e8e2 100644
--- a/drivers/gpu/drm/xe/xe_gt_freq.c
+++ b/drivers/gpu/drm/xe/xe_gt_freq.c
@@ -29,24 +29,26 @@
* PCODE is the ultimate decision maker of the actual running frequency, based
* on thermal and other running conditions.
*
- * Xe's Freq provides a sysfs API for frequency management:
+ * Xe's Freq provides a sysfs API for frequency management under
+ * ``<device>/tile#/gt#/freq0/`` directory.
*
- * device/tile#/gt#/freq0/<item>_freq *read-only* files:
+ * **Read-only** attributes:
*
- * - act_freq: The actual resolved frequency decided by PCODE.
- * - cur_freq: The current one requested by GuC PC to the PCODE.
- * - rpn_freq: The Render Performance (RP) N level, which is the minimal one.
- * - rpa_freq: The Render Performance (RP) A level, which is the achievable one.
- * Calculated by PCODE at runtime based on multiple running conditions
- * - rpe_freq: The Render Performance (RP) E level, which is the efficient one.
- * Calculated by PCODE at runtime based on multiple running conditions
- * - rp0_freq: The Render Performance (RP) 0 level, which is the maximum one.
+ * - ``act_freq``: The actual resolved frequency decided by PCODE.
+ * - ``cur_freq``: The current one requested by GuC PC to the PCODE.
+ * - ``rpn_freq``: The Render Performance (RP) N level, which is the minimal one.
+ * - ``rpa_freq``: The Render Performance (RP) A level, which is the achievable one.
+ * Calculated by PCODE at runtime based on multiple running conditions
+ * - ``rpe_freq``: The Render Performance (RP) E level, which is the efficient one.
+ * Calculated by PCODE at runtime based on multiple running conditions
+ * - ``rp0_freq``: The Render Performance (RP) 0 level, which is the maximum one.
*
- * device/tile#/gt#/freq0/<item>_freq *read-write* files:
+ * **Read-write** attributes:
*
- * - min_freq: Min frequency request.
- * - max_freq: Max frequency request.
- * If max <= min, then freq_min becomes a fixed frequency request.
+ * - ``min_freq``: Min frequency request.
+ * - ``max_freq``: Max frequency request.
+ * If max <= min, then freq_min becomes a fixed frequency
+ * request.
*/
static struct xe_guc_pc *
diff --git a/drivers/gpu/drm/xe/xe_gt_throttle.c b/drivers/gpu/drm/xe/xe_gt_throttle.c
index b5317cf714e2e..8647d2c997347 100644
--- a/drivers/gpu/drm/xe/xe_gt_throttle.c
+++ b/drivers/gpu/drm/xe/xe_gt_throttle.c
@@ -17,35 +17,41 @@
/**
* DOC: Xe GT Throttle
*
- * Provides sysfs entries and other helpers for frequency throttle reasons in GT
- *
- * device/gt#/freq0/throttle/status - Overall status
- * device/gt#/freq0/throttle/reason_pl1 - Frequency throttle due to PL1
- * device/gt#/freq0/throttle/reason_pl2 - Frequency throttle due to PL2
- * device/gt#/freq0/throttle/reason_pl4 - Frequency throttle due to PL4, Iccmax etc.
- * device/gt#/freq0/throttle/reason_thermal - Frequency throttle due to thermal
- * device/gt#/freq0/throttle/reason_prochot - Frequency throttle due to prochot
- * device/gt#/freq0/throttle/reason_ratl - Frequency throttle due to RATL
- * device/gt#/freq0/throttle/reason_vr_thermalert - Frequency throttle due to VR THERMALERT
- * device/gt#/freq0/throttle/reason_vr_tdc - Frequency throttle due to VR TDC
+ * The GT frequency may be throttled by hardware/firmware for various reasons
+ * that are provided through attributes under the ``freq0/throttle/`` directory.
+ * Their availability depend on the platform and some may not be visible if that
+ * reason is not available.
*
* The following attributes are available on Crescent Island platform:
- * device/gt#/freq0/throttle/status - Overall throttle status
- * device/gt#/freq0/throttle/reason_pl1 - Frequency throttle due to package PL1
- * device/gt#/freq0/throttle/reason_pl2 - Frequency throttle due to package PL2
- * device/gt#/freq0/throttle/reason_pl4 - Frequency throttle due to PL4
- * device/gt#/freq0/throttle/reason_prochot - Frequency throttle due to prochot
- * device/gt#/freq0/throttle/reason_soc_thermal - Frequency throttle due to SoC thermal
- * device/gt#/freq0/throttle/reason_mem_thermal - Frequency throttle due to memory thermal
- * device/gt#/freq0/throttle/reason_vr_thermal - Frequency throttle due to VR thermal
- * device/gt#/freq0/throttle/reason_iccmax - Frequency throttle due to ICCMAX
- * device/gt#/freq0/throttle/reason_ratl - Frequency throttle due to RATL thermal algorithm
- * device/gt#/freq0/throttle/reason_soc_avg_thermal - Frequency throttle due to SoC average temp
- * device/gt#/freq0/throttle/reason_fastvmode - Frequency throttle due to VR is hitting FastVMode
- * device/gt#/freq0/throttle/reason_psys_pl1 - Frequency throttle due to PSYS PL1
- * device/gt#/freq0/throttle/reason_psys_pl2 - Frequency throttle due to PSYS PL2
- * device/gt#/freq0/throttle/reason_p0_freq - Frequency throttle due to P0 frequency
- * device/gt#/freq0/throttle/reason_psys_crit - Frequency throttle due to PSYS critical
+ *
+ * - ``status``: Overall throttle status
+ * - ``reason_pl1``: package PL1
+ * - ``reason_pl2``: package PL2
+ * - ``reason_pl4``: package PL4
+ * - ``reason_prochot``: prochot
+ * - ``reason_soc_thermal``: SoC thermal
+ * - ``reason_mem_thermal``: Memory thermal
+ * - ``reason_vr_thermal``: VR thermal
+ * - ``reason_iccmax``: ICCMAX
+ * - ``reason_ratl``: RATL thermal algorithm
+ * - ``reason_soc_avg_thermal``: SoC average temp
+ * - ``reason_fastvmode``: VR is hitting FastVMode
+ * - ``reason_psys_pl1``: PSYS PL1
+ * - ``reason_psys_pl2``: PSYS PL2
+ * - ``reason_p0_freq``: P0 frequency
+ * - ``reason_psys_crit``: PSYS critical
+ *
+ * Other platforms support the following reasons:
+ *
+ * - ``status``: Overall status
+ * - ``reason_pl1``: package PL1
+ * - ``reason_pl2``: package PL2
+ * - ``reason_pl4``: package PL4, Iccmax etc.
+ * - ``reason_thermal``: thermal
+ * - ``reason_prochot``: prochot
+ * - ``reason_ratl``: RATL hermal algorithm
+ * - ``reason_vr_thermalert``: VR THERMALERT
+ * - ``reason_vr_tdc``: VR TDC
*/
static struct xe_gt *dev_to_gt(struct device *dev)
--
2.51.0
^ permalink raw reply related [flat|nested] 17+ messages in thread* [PATCH v3 7/8] drm/xe/gt_throttle: Drop individual show functions
2025-10-29 23:45 [PATCH v3 0/8] drm/xe: CRI support in gt_throttle + refactors Lucas De Marchi
` (5 preceding siblings ...)
2025-10-29 23:45 ` [PATCH v3 6/8] drm/xe: Improve freq and throttle documentation Lucas De Marchi
@ 2025-10-29 23:45 ` Lucas De Marchi
2025-10-29 23:45 ` [PATCH v3 8/8] drm/xe/gt_throttle: Avoid TOCTOU when monitoring reasons Lucas De Marchi
7 siblings, 0 replies; 17+ messages in thread
From: Lucas De Marchi @ 2025-10-29 23:45 UTC (permalink / raw)
To: intel-xe; +Cc: Lucas De Marchi, Raag Jadav
They are all doing the same thing with the mask being the param. Just
declare our own attribute to store the mask and provide a single
function.
Another common pattern is to define the show function in the macro,
however on follow up work the mask may be used for returning more
information, so it'd need to be stored in any case.
Reviewed-by: Raag Jadav <raag.jadav@intel.com>
Signed-off-by: Lucas De Marchi <lucas.demarchi@intel.com>
---
drivers/gpu/drm/xe/xe_gt_throttle.c | 254 +++++++++---------------------------
1 file changed, 62 insertions(+), 192 deletions(-)
diff --git a/drivers/gpu/drm/xe/xe_gt_throttle.c b/drivers/gpu/drm/xe/xe_gt_throttle.c
index 8647d2c997347..fa7068aac3344 100644
--- a/drivers/gpu/drm/xe/xe_gt_throttle.c
+++ b/drivers/gpu/drm/xe/xe_gt_throttle.c
@@ -54,6 +54,11 @@
* - ``reason_vr_tdc``: VR TDC
*/
+struct throttle_attribute {
+ struct kobj_attribute attr;
+ u32 mask;
+};
+
static struct xe_gt *dev_to_gt(struct device *dev)
{
return kobj_to_gt(dev->kobj.parent);
@@ -64,6 +69,11 @@ static struct xe_gt *throttle_to_gt(struct kobject *kobj)
return dev_to_gt(kobj_to_dev(kobj));
}
+static struct throttle_attribute *kobj_attribute_to_throttle(struct kobj_attribute *attr)
+{
+ return container_of(attr, struct throttle_attribute, attr);
+}
+
u32 xe_gt_throttle_get_limit_reasons(struct xe_gt *gt)
{
struct xe_device *xe = gt_to_xe(gt);
@@ -92,214 +102,74 @@ static bool is_throttled_by(struct xe_gt *gt, u32 mask)
return xe_gt_throttle_get_limit_reasons(gt) & mask;
}
-static ssize_t status_show(struct kobject *kobj,
+static ssize_t reason_show(struct kobject *kobj,
struct kobj_attribute *attr, char *buff)
{
+ struct throttle_attribute *ta = kobj_attribute_to_throttle(attr);
struct xe_gt *gt = throttle_to_gt(kobj);
- return sysfs_emit(buff, "%u\n", is_throttled_by(gt, U32_MAX));
-}
-
-static ssize_t reason_pl1_show(struct kobject *kobj,
- struct kobj_attribute *attr, char *buff)
-{
- struct xe_gt *gt = throttle_to_gt(kobj);
-
- return sysfs_emit(buff, "%u\n", is_throttled_by(gt, POWER_LIMIT_1_MASK));
-}
-
-static ssize_t reason_pl2_show(struct kobject *kobj,
- struct kobj_attribute *attr, char *buff)
-{
- struct xe_gt *gt = throttle_to_gt(kobj);
-
- return sysfs_emit(buff, "%u\n", is_throttled_by(gt, POWER_LIMIT_2_MASK));
-}
-
-static ssize_t reason_pl4_show(struct kobject *kobj,
- struct kobj_attribute *attr, char *buff)
-{
- struct xe_gt *gt = throttle_to_gt(kobj);
-
- return sysfs_emit(buff, "%u\n", is_throttled_by(gt, POWER_LIMIT_4_MASK));
-}
-
-static ssize_t reason_thermal_show(struct kobject *kobj,
- struct kobj_attribute *attr, char *buff)
-{
- struct xe_gt *gt = throttle_to_gt(kobj);
-
- return sysfs_emit(buff, "%u\n", is_throttled_by(gt, THERMAL_LIMIT_MASK));
-}
-
-static ssize_t reason_soc_thermal_show(struct kobject *kobj,
- struct kobj_attribute *attr, char *buff)
-{
- struct xe_gt *gt = throttle_to_gt(kobj);
-
- return sysfs_emit(buff, "%u\n", is_throttled_by(gt, SOC_THERMAL_LIMIT_MASK));
-}
-
-static ssize_t reason_prochot_show(struct kobject *kobj,
- struct kobj_attribute *attr, char *buff)
-{
- struct xe_gt *gt = throttle_to_gt(kobj);
-
- return sysfs_emit(buff, "%u\n", is_throttled_by(gt, PROCHOT_MASK));
-}
-
-static ssize_t reason_ratl_show(struct kobject *kobj,
- struct kobj_attribute *attr, char *buff)
-{
- struct xe_gt *gt = throttle_to_gt(kobj);
-
- return sysfs_emit(buff, "%u\n", is_throttled_by(gt, RATL_MASK));
-}
-
-static ssize_t reason_vr_thermalert_show(struct kobject *kobj,
- struct kobj_attribute *attr, char *buff)
-{
- struct xe_gt *gt = throttle_to_gt(kobj);
-
- return sysfs_emit(buff, "%u\n", is_throttled_by(gt, VR_THERMALERT_MASK));
-}
-
-static ssize_t reason_soc_avg_thermal_show(struct kobject *kobj,
- struct kobj_attribute *attr, char *buff)
-{
- struct xe_gt *gt = throttle_to_gt(kobj);
-
- return sysfs_emit(buff, "%u\n", is_throttled_by(gt, SOC_AVG_THERMAL_MASK));
-}
-
-static ssize_t reason_vr_tdc_show(struct kobject *kobj,
- struct kobj_attribute *attr, char *buff)
-{
- struct xe_gt *gt = throttle_to_gt(kobj);
-
- return sysfs_emit(buff, "%u\n", is_throttled_by(gt, VR_TDC_MASK));
-}
-
-static ssize_t reason_fastvmode_show(struct kobject *kobj,
- struct kobj_attribute *attr, char *buff)
-{
- struct xe_gt *gt = throttle_to_gt(kobj);
-
- return sysfs_emit(buff, "%u\n", is_throttled_by(gt, FASTVMODE_MASK));
-}
-
-static ssize_t reason_mem_thermal_show(struct kobject *kobj,
- struct kobj_attribute *attr, char *buff)
-{
- struct xe_gt *gt = throttle_to_gt(kobj);
-
- return sysfs_emit(buff, "%u\n", is_throttled_by(gt, MEM_THERMAL_MASK));
-}
-
-static ssize_t reason_vr_thermal_show(struct kobject *kobj,
- struct kobj_attribute *attr, char *buff)
-{
- struct xe_gt *gt = throttle_to_gt(kobj);
-
- return sysfs_emit(buff, "%u\n", is_throttled_by(gt, VR_THERMAL_MASK));
-}
-
-static ssize_t reason_iccmax_show(struct kobject *kobj,
- struct kobj_attribute *attr, char *buff)
-{
- struct xe_gt *gt = throttle_to_gt(kobj);
-
- return sysfs_emit(buff, "%u\n", is_throttled_by(gt, ICCMAX_MASK));
-}
-
-static ssize_t reason_psys_pl1_show(struct kobject *kobj,
- struct kobj_attribute *attr, char *buff)
-{
- struct xe_gt *gt = throttle_to_gt(kobj);
-
- return sysfs_emit(buff, "%u\n", is_throttled_by(gt, PSYS_PL1_MASK));
-}
-
-static ssize_t reason_psys_pl2_show(struct kobject *kobj,
- struct kobj_attribute *attr, char *buff)
-{
- struct xe_gt *gt = throttle_to_gt(kobj);
-
- return sysfs_emit(buff, "%u\n", is_throttled_by(gt, PSYS_PL2_MASK));
-}
-
-static ssize_t reason_p0_freq_show(struct kobject *kobj,
- struct kobj_attribute *attr, char *buff)
-{
- struct xe_gt *gt = throttle_to_gt(kobj);
-
- return sysfs_emit(buff, "%u\n", is_throttled_by(gt, P0_FREQ_MASK));
+ return sysfs_emit(buff, "%u\n", is_throttled_by(gt, ta->mask));
}
-static ssize_t reason_psys_crit_show(struct kobject *kobj,
- struct kobj_attribute *attr, char *buff)
-{
- struct xe_gt *gt = throttle_to_gt(kobj);
-
- return sysfs_emit(buff, "%u\n", is_throttled_by(gt, PSYS_CRIT_MASK));
-}
-
-#define THROTTLE_ATTR_RO(name) \
- struct kobj_attribute attr_##name = __ATTR_RO(name)
+#define THROTTLE_ATTR_RO(name, _mask) \
+ struct throttle_attribute attr_##name = { \
+ .attr = __ATTR(name, 0444, reason_show, NULL), \
+ .mask = _mask, \
+ }
-static THROTTLE_ATTR_RO(status);
-static THROTTLE_ATTR_RO(reason_pl1);
-static THROTTLE_ATTR_RO(reason_pl2);
-static THROTTLE_ATTR_RO(reason_pl4);
-static THROTTLE_ATTR_RO(reason_thermal);
-static THROTTLE_ATTR_RO(reason_prochot);
-static THROTTLE_ATTR_RO(reason_ratl);
-static THROTTLE_ATTR_RO(reason_vr_thermalert);
-static THROTTLE_ATTR_RO(reason_vr_tdc);
+static THROTTLE_ATTR_RO(status, U32_MAX);
+static THROTTLE_ATTR_RO(reason_pl1, POWER_LIMIT_1_MASK);
+static THROTTLE_ATTR_RO(reason_pl2, POWER_LIMIT_2_MASK);
+static THROTTLE_ATTR_RO(reason_pl4, POWER_LIMIT_4_MASK);
+static THROTTLE_ATTR_RO(reason_thermal, THERMAL_LIMIT_MASK);
+static THROTTLE_ATTR_RO(reason_prochot, PROCHOT_MASK);
+static THROTTLE_ATTR_RO(reason_ratl, RATL_MASK);
+static THROTTLE_ATTR_RO(reason_vr_thermalert, VR_THERMALERT_MASK);
+static THROTTLE_ATTR_RO(reason_vr_tdc, VR_TDC_MASK);
static struct attribute *throttle_attrs[] = {
- &attr_status.attr,
- &attr_reason_pl1.attr,
- &attr_reason_pl2.attr,
- &attr_reason_pl4.attr,
- &attr_reason_thermal.attr,
- &attr_reason_prochot.attr,
- &attr_reason_ratl.attr,
- &attr_reason_vr_thermalert.attr,
- &attr_reason_vr_tdc.attr,
+ &attr_status.attr.attr,
+ &attr_reason_pl1.attr.attr,
+ &attr_reason_pl2.attr.attr,
+ &attr_reason_pl4.attr.attr,
+ &attr_reason_thermal.attr.attr,
+ &attr_reason_prochot.attr.attr,
+ &attr_reason_ratl.attr.attr,
+ &attr_reason_vr_thermalert.attr.attr,
+ &attr_reason_vr_tdc.attr.attr,
NULL
};
-static THROTTLE_ATTR_RO(reason_vr_thermal);
-static THROTTLE_ATTR_RO(reason_soc_thermal);
-static THROTTLE_ATTR_RO(reason_mem_thermal);
-static THROTTLE_ATTR_RO(reason_iccmax);
-static THROTTLE_ATTR_RO(reason_soc_avg_thermal);
-static THROTTLE_ATTR_RO(reason_fastvmode);
-static THROTTLE_ATTR_RO(reason_psys_pl1);
-static THROTTLE_ATTR_RO(reason_psys_pl2);
-static THROTTLE_ATTR_RO(reason_p0_freq);
-static THROTTLE_ATTR_RO(reason_psys_crit);
+static THROTTLE_ATTR_RO(reason_vr_thermal, VR_THERMAL_MASK);
+static THROTTLE_ATTR_RO(reason_soc_thermal, SOC_THERMAL_LIMIT_MASK);
+static THROTTLE_ATTR_RO(reason_mem_thermal, MEM_THERMAL_MASK);
+static THROTTLE_ATTR_RO(reason_iccmax, ICCMAX_MASK);
+static THROTTLE_ATTR_RO(reason_soc_avg_thermal, SOC_AVG_THERMAL_MASK);
+static THROTTLE_ATTR_RO(reason_fastvmode, FASTVMODE_MASK);
+static THROTTLE_ATTR_RO(reason_psys_pl1, PSYS_PL1_MASK);
+static THROTTLE_ATTR_RO(reason_psys_pl2, PSYS_PL2_MASK);
+static THROTTLE_ATTR_RO(reason_p0_freq, P0_FREQ_MASK);
+static THROTTLE_ATTR_RO(reason_psys_crit, PSYS_CRIT_MASK);
static struct attribute *cri_throttle_attrs[] = {
/* Common */
- &attr_status.attr,
- &attr_reason_pl1.attr,
- &attr_reason_pl2.attr,
- &attr_reason_pl4.attr,
- &attr_reason_prochot.attr,
- &attr_reason_ratl.attr,
+ &attr_status.attr.attr,
+ &attr_reason_pl1.attr.attr,
+ &attr_reason_pl2.attr.attr,
+ &attr_reason_pl4.attr.attr,
+ &attr_reason_prochot.attr.attr,
+ &attr_reason_ratl.attr.attr,
/* CRI */
- &attr_reason_vr_thermal.attr,
- &attr_reason_soc_thermal.attr,
- &attr_reason_mem_thermal.attr,
- &attr_reason_iccmax.attr,
- &attr_reason_soc_avg_thermal.attr,
- &attr_reason_fastvmode.attr,
- &attr_reason_psys_pl1.attr,
- &attr_reason_psys_pl2.attr,
- &attr_reason_p0_freq.attr,
- &attr_reason_psys_crit.attr,
+ &attr_reason_vr_thermal.attr.attr,
+ &attr_reason_soc_thermal.attr.attr,
+ &attr_reason_mem_thermal.attr.attr,
+ &attr_reason_iccmax.attr.attr,
+ &attr_reason_soc_avg_thermal.attr.attr,
+ &attr_reason_fastvmode.attr.attr,
+ &attr_reason_psys_pl1.attr.attr,
+ &attr_reason_psys_pl2.attr.attr,
+ &attr_reason_p0_freq.attr.attr,
+ &attr_reason_psys_crit.attr.attr,
NULL
};
--
2.51.0
^ permalink raw reply related [flat|nested] 17+ messages in thread* [PATCH v3 8/8] drm/xe/gt_throttle: Avoid TOCTOU when monitoring reasons
2025-10-29 23:45 [PATCH v3 0/8] drm/xe: CRI support in gt_throttle + refactors Lucas De Marchi
` (6 preceding siblings ...)
2025-10-29 23:45 ` [PATCH v3 7/8] drm/xe/gt_throttle: Drop individual show functions Lucas De Marchi
@ 2025-10-29 23:45 ` Lucas De Marchi
2025-10-30 9:53 ` Raag Jadav
2025-10-30 15:42 ` Rodrigo Vivi
7 siblings, 2 replies; 17+ messages in thread
From: Lucas De Marchi @ 2025-10-29 23:45 UTC (permalink / raw)
To: intel-xe; +Cc: Lucas De Marchi, Raag Jadav, Rodrigo Vivi
It's currently not possible to safely monitor if there's throttling
happening and what are the reasons. The approach of reading the status
and then reading the reasons is not reliable as by the time sysadmin
reads the reason, the throttling could not be happening anymore.
Previous tentative to fix that[1] was breaking the ABI and potentially
sysadmin's scripts. This takes a different approach of adding and
documenting the additional attribute. It's still valuable, though
redundant, to provide the simpler 0/1 interface.
In order to avoid userspace knowledge on the bitmask meaning and to be
able to maintain the kernel side in sync with possible changes in
future, just walk the attribute group and check what are the masks that
match the value read.
[1] https://lore.kernel.org/intel-xe/20241025092238.167042-1-raag.jadav@intel.com/
Cc: Raag Jadav <raag.jadav@intel.com>
Cc: Rodrigo Vivi <rodrigo.vivi@intel.com>
Signed-off-by: Lucas De Marchi <lucas.demarchi@intel.com>
---
- v2: Use space as separator (Rodrigo)
---
drivers/gpu/drm/xe/xe_gt_throttle.c | 52 +++++++++++++++++++++++++++++++++++--
1 file changed, 50 insertions(+), 2 deletions(-)
diff --git a/drivers/gpu/drm/xe/xe_gt_throttle.c b/drivers/gpu/drm/xe/xe_gt_throttle.c
index fa7068aac3344..ca45aea8c17a6 100644
--- a/drivers/gpu/drm/xe/xe_gt_throttle.c
+++ b/drivers/gpu/drm/xe/xe_gt_throttle.c
@@ -22,9 +22,15 @@
* Their availability depend on the platform and some may not be visible if that
* reason is not available.
*
+ * The ``status_reasons`` attribute can be used by sysadmin monitoring all
+ * possible reasons for throttling and reporting them. It's preferred over
+ * monitoring ``status`` and then reading the reason both for simplicity and to
+ * avoid TOCTOU (time-of-check to time-of-use).
+ *
* The following attributes are available on Crescent Island platform:
*
- * - ``status``: Overall throttle status
+ * - ``status``: Overall throttle status (0: no throttling, 1: throttling)
+ * - ``status_reasons``: All reasons causing throttling separated by newline.
* - ``reason_pl1``: package PL1
* - ``reason_pl2``: package PL2
* - ``reason_pl4``: package PL4
@@ -43,7 +49,8 @@
*
* Other platforms support the following reasons:
*
- * - ``status``: Overall status
+ * - ``status``: Overall throttle status (0: no throttling, 1: throttling)
+ * - ``status_reasons``: All reasons causing throttling separated by newline.
* - ``reason_pl1``: package PL1
* - ``reason_pl2``: package PL2
* - ``reason_pl4``: package PL4, Iccmax etc.
@@ -111,12 +118,51 @@ static ssize_t reason_show(struct kobject *kobj,
return sysfs_emit(buff, "%u\n", is_throttled_by(gt, ta->mask));
}
+static const struct attribute_group *get_platform_throttle_group(struct xe_device *xe);
+
+static ssize_t status_reasons_show(struct kobject *kobj,
+ struct kobj_attribute *attr, char *buff)
+{
+ struct xe_gt *gt = throttle_to_gt(kobj);
+ struct xe_device *xe = gt_to_xe(gt);
+ const struct attribute_group *group;
+ struct attribute **pother;
+ ssize_t ret = 0;
+ u32 reasons;
+
+ reasons = xe_gt_throttle_get_limit_reasons(gt);
+ group = get_platform_throttle_group(xe);
+
+ for (pother = group->attrs; *pother; pother++) {
+ struct kobj_attribute *kattr = container_of(*pother, struct kobj_attribute, attr);
+ struct throttle_attribute *other_ta = kobj_attribute_to_throttle(kattr);
+
+ if (other_ta->mask != U32_MAX && reasons & other_ta->mask)
+ ret += sysfs_emit_at(buff, ret, "%s ", (*pother)->name);
+ }
+
+ /* Drop extra space from last iteration above */
+ if (ret)
+ ret--;
+
+ ret += sysfs_emit_at(buff, ret, "\n");
+
+ return ret;
+}
+
#define THROTTLE_ATTR_RO(name, _mask) \
struct throttle_attribute attr_##name = { \
.attr = __ATTR(name, 0444, reason_show, NULL), \
.mask = _mask, \
}
+#define THROTTLE_ATTR_RO_FUNC(name, _mask, _show) \
+ struct throttle_attribute attr_##name = { \
+ .attr = __ATTR(name, 0444, _show, NULL), \
+ .mask = _mask, \
+ }
+
+static THROTTLE_ATTR_RO_FUNC(status_reasons, 0, status_reasons_show);
static THROTTLE_ATTR_RO(status, U32_MAX);
static THROTTLE_ATTR_RO(reason_pl1, POWER_LIMIT_1_MASK);
static THROTTLE_ATTR_RO(reason_pl2, POWER_LIMIT_2_MASK);
@@ -128,6 +174,7 @@ static THROTTLE_ATTR_RO(reason_vr_thermalert, VR_THERMALERT_MASK);
static THROTTLE_ATTR_RO(reason_vr_tdc, VR_TDC_MASK);
static struct attribute *throttle_attrs[] = {
+ &attr_status_reasons.attr.attr,
&attr_status.attr.attr,
&attr_reason_pl1.attr.attr,
&attr_reason_pl2.attr.attr,
@@ -153,6 +200,7 @@ static THROTTLE_ATTR_RO(reason_psys_crit, PSYS_CRIT_MASK);
static struct attribute *cri_throttle_attrs[] = {
/* Common */
+ &attr_status_reasons.attr.attr,
&attr_status.attr.attr,
&attr_reason_pl1.attr.attr,
&attr_reason_pl2.attr.attr,
--
2.51.0
^ permalink raw reply related [flat|nested] 17+ messages in thread* Re: [PATCH v3 8/8] drm/xe/gt_throttle: Avoid TOCTOU when monitoring reasons
2025-10-29 23:45 ` [PATCH v3 8/8] drm/xe/gt_throttle: Avoid TOCTOU when monitoring reasons Lucas De Marchi
@ 2025-10-30 9:53 ` Raag Jadav
2025-10-30 14:55 ` Lucas De Marchi
2025-10-30 15:42 ` Rodrigo Vivi
1 sibling, 1 reply; 17+ messages in thread
From: Raag Jadav @ 2025-10-30 9:53 UTC (permalink / raw)
To: Lucas De Marchi; +Cc: intel-xe, Rodrigo Vivi
On Wed, Oct 29, 2025 at 04:45:10PM -0700, Lucas De Marchi wrote:
> It's currently not possible to safely monitor if there's throttling
> happening and what are the reasons. The approach of reading the status
> and then reading the reasons is not reliable as by the time sysadmin
> reads the reason, the throttling could not be happening anymore.
>
> Previous tentative to fix that[1] was breaking the ABI and potentially
> sysadmin's scripts. This takes a different approach of adding and
> documenting the additional attribute. It's still valuable, though
> redundant, to provide the simpler 0/1 interface.
>
> In order to avoid userspace knowledge on the bitmask meaning and to be
> able to maintain the kernel side in sync with possible changes in
> future, just walk the attribute group and check what are the masks that
> match the value read.
>
> [1] https://lore.kernel.org/intel-xe/20241025092238.167042-1-raag.jadav@intel.com/
...
> +static const struct attribute_group *get_platform_throttle_group(struct xe_device *xe);
> +
> +static ssize_t status_reasons_show(struct kobject *kobj,
> + struct kobj_attribute *attr, char *buff)
> +{
> + struct xe_gt *gt = throttle_to_gt(kobj);
> + struct xe_device *xe = gt_to_xe(gt);
> + const struct attribute_group *group;
> + struct attribute **pother;
> + ssize_t ret = 0;
> + u32 reasons;
> +
> + reasons = xe_gt_throttle_get_limit_reasons(gt);
> + group = get_platform_throttle_group(xe);
> +
> + for (pother = group->attrs; *pother; pother++) {
> + struct kobj_attribute *kattr = container_of(*pother, struct kobj_attribute, attr);
> + struct throttle_attribute *other_ta = kobj_attribute_to_throttle(kattr);
> +
> + if (other_ta->mask != U32_MAX && reasons & other_ta->mask)
> + ret += sysfs_emit_at(buff, ret, "%s ", (*pother)->name);
Much better.
> + }
> +
> + /* Drop extra space from last iteration above */
> + if (ret)
> + ret--;
> +
> + ret += sysfs_emit_at(buff, ret, "\n");
I went through the documentation again and I couldn't find any rules
related to empty files or whether it is allowed (just thinking out
loud about no throttling cases).
Thoughts?
Raag
^ permalink raw reply [flat|nested] 17+ messages in thread* Re: [PATCH v3 8/8] drm/xe/gt_throttle: Avoid TOCTOU when monitoring reasons
2025-10-30 9:53 ` Raag Jadav
@ 2025-10-30 14:55 ` Lucas De Marchi
2025-10-30 15:47 ` Rodrigo Vivi
0 siblings, 1 reply; 17+ messages in thread
From: Lucas De Marchi @ 2025-10-30 14:55 UTC (permalink / raw)
To: Raag Jadav; +Cc: intel-xe, Rodrigo Vivi
On Thu, Oct 30, 2025 at 10:53:36AM +0100, Raag Jadav wrote:
>On Wed, Oct 29, 2025 at 04:45:10PM -0700, Lucas De Marchi wrote:
>> It's currently not possible to safely monitor if there's throttling
>> happening and what are the reasons. The approach of reading the status
>> and then reading the reasons is not reliable as by the time sysadmin
>> reads the reason, the throttling could not be happening anymore.
>>
>> Previous tentative to fix that[1] was breaking the ABI and potentially
>> sysadmin's scripts. This takes a different approach of adding and
>> documenting the additional attribute. It's still valuable, though
>> redundant, to provide the simpler 0/1 interface.
>>
>> In order to avoid userspace knowledge on the bitmask meaning and to be
>> able to maintain the kernel side in sync with possible changes in
>> future, just walk the attribute group and check what are the masks that
>> match the value read.
>>
>> [1] https://lore.kernel.org/intel-xe/20241025092238.167042-1-raag.jadav@intel.com/
>
>...
>
>> +static const struct attribute_group *get_platform_throttle_group(struct xe_device *xe);
>> +
>> +static ssize_t status_reasons_show(struct kobject *kobj,
>> + struct kobj_attribute *attr, char *buff)
>> +{
>> + struct xe_gt *gt = throttle_to_gt(kobj);
>> + struct xe_device *xe = gt_to_xe(gt);
>> + const struct attribute_group *group;
>> + struct attribute **pother;
>> + ssize_t ret = 0;
>> + u32 reasons;
>> +
>> + reasons = xe_gt_throttle_get_limit_reasons(gt);
>> + group = get_platform_throttle_group(xe);
>> +
>> + for (pother = group->attrs; *pother; pother++) {
>> + struct kobj_attribute *kattr = container_of(*pother, struct kobj_attribute, attr);
>> + struct throttle_attribute *other_ta = kobj_attribute_to_throttle(kattr);
>> +
>> + if (other_ta->mask != U32_MAX && reasons & other_ta->mask)
>> + ret += sysfs_emit_at(buff, ret, "%s ", (*pother)->name);
>
>Much better.
>
>> + }
>> +
>> + /* Drop extra space from last iteration above */
>> + if (ret)
>> + ret--;
>> +
>> + ret += sysfs_emit_at(buff, ret, "\n");
>
>I went through the documentation again and I couldn't find any rules
>related to empty files or whether it is allowed (just thinking out
>loud about no throttling cases).
do you mean if "empty" files are allowed in sysfs? I don't think there's
any problem with that. It's also not empty, it has a newline there ;)
Lucas De Marchi
>
>Thoughts?
>
>Raag
^ permalink raw reply [flat|nested] 17+ messages in thread* Re: [PATCH v3 8/8] drm/xe/gt_throttle: Avoid TOCTOU when monitoring reasons
2025-10-30 14:55 ` Lucas De Marchi
@ 2025-10-30 15:47 ` Rodrigo Vivi
2025-10-30 16:06 ` Raag Jadav
0 siblings, 1 reply; 17+ messages in thread
From: Rodrigo Vivi @ 2025-10-30 15:47 UTC (permalink / raw)
To: Lucas De Marchi; +Cc: Raag Jadav, intel-xe
On Thu, Oct 30, 2025 at 09:55:30AM -0500, Lucas De Marchi wrote:
> On Thu, Oct 30, 2025 at 10:53:36AM +0100, Raag Jadav wrote:
> > On Wed, Oct 29, 2025 at 04:45:10PM -0700, Lucas De Marchi wrote:
> > > It's currently not possible to safely monitor if there's throttling
> > > happening and what are the reasons. The approach of reading the status
> > > and then reading the reasons is not reliable as by the time sysadmin
> > > reads the reason, the throttling could not be happening anymore.
> > >
> > > Previous tentative to fix that[1] was breaking the ABI and potentially
> > > sysadmin's scripts. This takes a different approach of adding and
> > > documenting the additional attribute. It's still valuable, though
> > > redundant, to provide the simpler 0/1 interface.
> > >
> > > In order to avoid userspace knowledge on the bitmask meaning and to be
> > > able to maintain the kernel side in sync with possible changes in
> > > future, just walk the attribute group and check what are the masks that
> > > match the value read.
> > >
> > > [1] https://lore.kernel.org/intel-xe/20241025092238.167042-1-raag.jadav@intel.com/
> >
> > ...
> >
> > > +static const struct attribute_group *get_platform_throttle_group(struct xe_device *xe);
> > > +
> > > +static ssize_t status_reasons_show(struct kobject *kobj,
> > > + struct kobj_attribute *attr, char *buff)
> > > +{
> > > + struct xe_gt *gt = throttle_to_gt(kobj);
> > > + struct xe_device *xe = gt_to_xe(gt);
> > > + const struct attribute_group *group;
> > > + struct attribute **pother;
> > > + ssize_t ret = 0;
> > > + u32 reasons;
> > > +
> > > + reasons = xe_gt_throttle_get_limit_reasons(gt);
> > > + group = get_platform_throttle_group(xe);
> > > +
> > > + for (pother = group->attrs; *pother; pother++) {
> > > + struct kobj_attribute *kattr = container_of(*pother, struct kobj_attribute, attr);
> > > + struct throttle_attribute *other_ta = kobj_attribute_to_throttle(kattr);
> > > +
> > > + if (other_ta->mask != U32_MAX && reasons & other_ta->mask)
> > > + ret += sysfs_emit_at(buff, ret, "%s ", (*pother)->name);
> >
> > Much better.
> >
> > > + }
> > > +
> > > + /* Drop extra space from last iteration above */
> > > + if (ret)
> > > + ret--;
> > > +
> > > + ret += sysfs_emit_at(buff, ret, "\n");
> >
> > I went through the documentation again and I couldn't find any rules
> > related to empty files or whether it is allowed (just thinking out
> > loud about no throttling cases).
>
> do you mean if "empty" files are allowed in sysfs? I don't think there's
> any problem with that. It's also not empty, it has a newline there ;)
alternatively we could print the entire reg in hex format?
But I prefer the text line in this patch.
Nothing against the 'empty' file with or without the new-line,
but perhaps we could consider to track that in the loop
and if none is add we print
if (ret)
ret--;
else
sysfs_emit_at(buff, ret, "none");
and document that above...
>
> Lucas De Marchi
>
> >
> > Thoughts?
> >
> > Raag
^ permalink raw reply [flat|nested] 17+ messages in thread* Re: [PATCH v3 8/8] drm/xe/gt_throttle: Avoid TOCTOU when monitoring reasons
2025-10-30 15:47 ` Rodrigo Vivi
@ 2025-10-30 16:06 ` Raag Jadav
2025-10-30 18:43 ` Lucas De Marchi
0 siblings, 1 reply; 17+ messages in thread
From: Raag Jadav @ 2025-10-30 16:06 UTC (permalink / raw)
To: Rodrigo Vivi; +Cc: Lucas De Marchi, intel-xe
On Thu, Oct 30, 2025 at 11:47:03AM -0400, Rodrigo Vivi wrote:
> On Thu, Oct 30, 2025 at 09:55:30AM -0500, Lucas De Marchi wrote:
> > On Thu, Oct 30, 2025 at 10:53:36AM +0100, Raag Jadav wrote:
> > > On Wed, Oct 29, 2025 at 04:45:10PM -0700, Lucas De Marchi wrote:
> > > > It's currently not possible to safely monitor if there's throttling
> > > > happening and what are the reasons. The approach of reading the status
> > > > and then reading the reasons is not reliable as by the time sysadmin
> > > > reads the reason, the throttling could not be happening anymore.
> > > >
> > > > Previous tentative to fix that[1] was breaking the ABI and potentially
> > > > sysadmin's scripts. This takes a different approach of adding and
> > > > documenting the additional attribute. It's still valuable, though
> > > > redundant, to provide the simpler 0/1 interface.
> > > >
> > > > In order to avoid userspace knowledge on the bitmask meaning and to be
> > > > able to maintain the kernel side in sync with possible changes in
> > > > future, just walk the attribute group and check what are the masks that
> > > > match the value read.
> > > >
> > > > [1] https://lore.kernel.org/intel-xe/20241025092238.167042-1-raag.jadav@intel.com/
> > >
> > > ...
> > >
> > > > +static const struct attribute_group *get_platform_throttle_group(struct xe_device *xe);
> > > > +
> > > > +static ssize_t status_reasons_show(struct kobject *kobj,
Semantically there's much of a 'status' here, so this could simply be
'reasons' (and same for the attribute name).
> > > > + struct kobj_attribute *attr, char *buff)
> > > > +{
> > > > + struct xe_gt *gt = throttle_to_gt(kobj);
> > > > + struct xe_device *xe = gt_to_xe(gt);
> > > > + const struct attribute_group *group;
> > > > + struct attribute **pother;
> > > > + ssize_t ret = 0;
> > > > + u32 reasons;
> > > > +
> > > > + reasons = xe_gt_throttle_get_limit_reasons(gt);
> > > > + group = get_platform_throttle_group(xe);
> > > > +
> > > > + for (pother = group->attrs; *pother; pother++) {
> > > > + struct kobj_attribute *kattr = container_of(*pother, struct kobj_attribute, attr);
> > > > + struct throttle_attribute *other_ta = kobj_attribute_to_throttle(kattr);
> > > > +
> > > > + if (other_ta->mask != U32_MAX && reasons & other_ta->mask)
> > > > + ret += sysfs_emit_at(buff, ret, "%s ", (*pother)->name);
> > >
> > > Much better.
> > >
> > > > + }
> > > > +
> > > > + /* Drop extra space from last iteration above */
> > > > + if (ret)
> > > > + ret--;
> > > > +
> > > > + ret += sysfs_emit_at(buff, ret, "\n");
> > >
> > > I went through the documentation again and I couldn't find any rules
> > > related to empty files or whether it is allowed (just thinking out
> > > loud about no throttling cases).
> >
> > do you mean if "empty" files are allowed in sysfs? I don't think there's
> > any problem with that. It's also not empty, it has a newline there ;)
>
> alternatively we could print the entire reg in hex format?
> But I prefer the text line in this patch.
>
> Nothing against the 'empty' file with or without the new-line,
> but perhaps we could consider to track that in the loop
> and if none is add we print
>
> if (ret)
> ret--;
> else
> sysfs_emit_at(buff, ret, "none");
>
> and document that above...
+1.
PS: A good read[1] if anyone's interested.
[1] https://lwn.net/Articles/378884/
Raag
^ permalink raw reply [flat|nested] 17+ messages in thread* Re: [PATCH v3 8/8] drm/xe/gt_throttle: Avoid TOCTOU when monitoring reasons
2025-10-30 16:06 ` Raag Jadav
@ 2025-10-30 18:43 ` Lucas De Marchi
2025-10-30 19:54 ` Vivi, Rodrigo
0 siblings, 1 reply; 17+ messages in thread
From: Lucas De Marchi @ 2025-10-30 18:43 UTC (permalink / raw)
To: Raag Jadav; +Cc: Rodrigo Vivi, intel-xe
On Thu, Oct 30, 2025 at 05:06:46PM +0100, Raag Jadav wrote:
>On Thu, Oct 30, 2025 at 11:47:03AM -0400, Rodrigo Vivi wrote:
>> On Thu, Oct 30, 2025 at 09:55:30AM -0500, Lucas De Marchi wrote:
>> > On Thu, Oct 30, 2025 at 10:53:36AM +0100, Raag Jadav wrote:
>> > > On Wed, Oct 29, 2025 at 04:45:10PM -0700, Lucas De Marchi wrote:
>> > > > It's currently not possible to safely monitor if there's throttling
>> > > > happening and what are the reasons. The approach of reading the status
>> > > > and then reading the reasons is not reliable as by the time sysadmin
>> > > > reads the reason, the throttling could not be happening anymore.
>> > > >
>> > > > Previous tentative to fix that[1] was breaking the ABI and potentially
>> > > > sysadmin's scripts. This takes a different approach of adding and
>> > > > documenting the additional attribute. It's still valuable, though
>> > > > redundant, to provide the simpler 0/1 interface.
>> > > >
>> > > > In order to avoid userspace knowledge on the bitmask meaning and to be
>> > > > able to maintain the kernel side in sync with possible changes in
>> > > > future, just walk the attribute group and check what are the masks that
>> > > > match the value read.
>> > > >
>> > > > [1] https://lore.kernel.org/intel-xe/20241025092238.167042-1-raag.jadav@intel.com/
>> > >
>> > > ...
>> > >
>> > > > +static const struct attribute_group *get_platform_throttle_group(struct xe_device *xe);
>> > > > +
>> > > > +static ssize_t status_reasons_show(struct kobject *kobj,
>
>Semantically there's much of a 'status' here, so this could simply be
>'reasons' (and same for the attribute name).
>
>> > > > + struct kobj_attribute *attr, char *buff)
>> > > > +{
>> > > > + struct xe_gt *gt = throttle_to_gt(kobj);
>> > > > + struct xe_device *xe = gt_to_xe(gt);
>> > > > + const struct attribute_group *group;
>> > > > + struct attribute **pother;
>> > > > + ssize_t ret = 0;
>> > > > + u32 reasons;
>> > > > +
>> > > > + reasons = xe_gt_throttle_get_limit_reasons(gt);
>> > > > + group = get_platform_throttle_group(xe);
>> > > > +
>> > > > + for (pother = group->attrs; *pother; pother++) {
>> > > > + struct kobj_attribute *kattr = container_of(*pother, struct kobj_attribute, attr);
>> > > > + struct throttle_attribute *other_ta = kobj_attribute_to_throttle(kattr);
>> > > > +
>> > > > + if (other_ta->mask != U32_MAX && reasons & other_ta->mask)
>> > > > + ret += sysfs_emit_at(buff, ret, "%s ", (*pother)->name);
>> > >
>> > > Much better.
>> > >
>> > > > + }
>> > > > +
>> > > > + /* Drop extra space from last iteration above */
>> > > > + if (ret)
>> > > > + ret--;
>> > > > +
>> > > > + ret += sysfs_emit_at(buff, ret, "\n");
>> > >
>> > > I went through the documentation again and I couldn't find any rules
>> > > related to empty files or whether it is allowed (just thinking out
>> > > loud about no throttling cases).
>> >
>> > do you mean if "empty" files are allowed in sysfs? I don't think there's
>> > any problem with that. It's also not empty, it has a newline there ;)
>>
>> alternatively we could print the entire reg in hex format?
>> But I prefer the text line in this patch.
>>
>> Nothing against the 'empty' file with or without the new-line,
>> but perhaps we could consider to track that in the loop
>> and if none is add we print
>>
>> if (ret)
>> ret--;
>> else
>> sysfs_emit_at(buff, ret, "none");
>>
>> and document that above...
>
>+1.
collecting all the review commends from you 2, that would be with this
diff squashed:
diff --git a/drivers/gpu/drm/xe/xe_gt_throttle.c b/drivers/gpu/drm/xe/xe_gt_throttle.c
index ca45aea8c17a6..f3476fda7f4f6 100644
--- a/drivers/gpu/drm/xe/xe_gt_throttle.c
+++ b/drivers/gpu/drm/xe/xe_gt_throttle.c
@@ -22,15 +22,15 @@
* Their availability depend on the platform and some may not be visible if that
* reason is not available.
*
- * The ``status_reasons`` attribute can be used by sysadmin monitoring all
- * possible reasons for throttling and reporting them. It's preferred over
- * monitoring ``status`` and then reading the reason both for simplicity and to
- * avoid TOCTOU (time-of-check to time-of-use).
+ * The ``reasons`` attribute can be used by sysadmin to monitor all possible
+ * reasons for throttling and report them. It's preferred over monitoring
+ * ``status`` and then reading the reason from individual attributes since that
+ * is racy. If there's no throttling happening, "none" is returned.
*
* The following attributes are available on Crescent Island platform:
*
* - ``status``: Overall throttle status (0: no throttling, 1: throttling)
- * - ``status_reasons``: All reasons causing throttling separated by newline.
+ * - ``reasons``: All reasons causing throttling separated by space
* - ``reason_pl1``: package PL1
* - ``reason_pl2``: package PL2
* - ``reason_pl4``: package PL4
@@ -50,7 +50,7 @@
* Other platforms support the following reasons:
*
* - ``status``: Overall throttle status (0: no throttling, 1: throttling)
- * - ``status_reasons``: All reasons causing throttling separated by newline.
+ * - ``reasons``: All reasons causing throttling separated by space
* - ``reason_pl1``: package PL1
* - ``reason_pl2``: package PL2
* - ``reason_pl4``: package PL4, Iccmax etc.
@@ -120,8 +120,8 @@ static ssize_t reason_show(struct kobject *kobj,
static const struct attribute_group *get_platform_throttle_group(struct xe_device *xe);
-static ssize_t status_reasons_show(struct kobject *kobj,
- struct kobj_attribute *attr, char *buff)
+static ssize_t reasons_show(struct kobject *kobj,
+ struct kobj_attribute *attr, char *buff)
{
struct xe_gt *gt = throttle_to_gt(kobj);
struct xe_device *xe = gt_to_xe(gt);
@@ -141,9 +141,11 @@ static ssize_t status_reasons_show(struct kobject *kobj,
ret += sysfs_emit_at(buff, ret, "%s ", (*pother)->name);
}
- /* Drop extra space from last iteration above */
if (ret)
+ /* Drop extra space from last iteration above */
ret--;
+ else
+ ret += sysfs_emit_at(buff, ret, "none");
ret += sysfs_emit_at(buff, ret, "\n");
@@ -162,7 +164,7 @@ static ssize_t status_reasons_show(struct kobject *kobj,
.mask = _mask, \
}
-static THROTTLE_ATTR_RO_FUNC(status_reasons, 0, status_reasons_show);
+static THROTTLE_ATTR_RO_FUNC(reasons, 0, reasons_show);
static THROTTLE_ATTR_RO(status, U32_MAX);
static THROTTLE_ATTR_RO(reason_pl1, POWER_LIMIT_1_MASK);
static THROTTLE_ATTR_RO(reason_pl2, POWER_LIMIT_2_MASK);
@@ -174,7 +176,7 @@ static THROTTLE_ATTR_RO(reason_vr_thermalert, VR_THERMALERT_MASK);
static THROTTLE_ATTR_RO(reason_vr_tdc, VR_TDC_MASK);
static struct attribute *throttle_attrs[] = {
- &attr_status_reasons.attr.attr,
+ &attr_reasons.attr.attr,
&attr_status.attr.attr,
&attr_reason_pl1.attr.attr,
&attr_reason_pl2.attr.attr,
@@ -200,7 +202,7 @@ static THROTTLE_ATTR_RO(reason_psys_crit, PSYS_CRIT_MASK);
static struct attribute *cri_throttle_attrs[] = {
/* Common */
- &attr_status_reasons.attr.attr,
+ &attr_reasons.attr.attr,
&attr_status.attr.attr,
&attr_reason_pl1.attr.attr,
&attr_reason_pl2.attr.attr,
Lucas De Marchi
>
>PS: A good read[1] if anyone's interested.
>
>[1] https://lwn.net/Articles/378884/
>
>Raag
^ permalink raw reply related [flat|nested] 17+ messages in thread* Re: [PATCH v3 8/8] drm/xe/gt_throttle: Avoid TOCTOU when monitoring reasons
2025-10-30 18:43 ` Lucas De Marchi
@ 2025-10-30 19:54 ` Vivi, Rodrigo
2025-10-31 6:08 ` Raag Jadav
0 siblings, 1 reply; 17+ messages in thread
From: Vivi, Rodrigo @ 2025-10-30 19:54 UTC (permalink / raw)
To: Jadav, Raag, De Marchi, Lucas; +Cc: intel-xe@lists.freedesktop.org
On Thu, 2025-10-30 at 13:43 -0500, Lucas De Marchi wrote:
> On Thu, Oct 30, 2025 at 05:06:46PM +0100, Raag Jadav wrote:
> > On Thu, Oct 30, 2025 at 11:47:03AM -0400, Rodrigo Vivi wrote:
> > > On Thu, Oct 30, 2025 at 09:55:30AM -0500, Lucas De Marchi wrote:
> > > > On Thu, Oct 30, 2025 at 10:53:36AM +0100, Raag Jadav wrote:
> > > > > On Wed, Oct 29, 2025 at 04:45:10PM -0700, Lucas De Marchi
> > > > > wrote:
> > > > > > It's currently not possible to safely monitor if there's
> > > > > > throttling
> > > > > > happening and what are the reasons. The approach of reading
> > > > > > the status
> > > > > > and then reading the reasons is not reliable as by the time
> > > > > > sysadmin
> > > > > > reads the reason, the throttling could not be happening
> > > > > > anymore.
> > > > > >
> > > > > > Previous tentative to fix that[1] was breaking the ABI and
> > > > > > potentially
> > > > > > sysadmin's scripts. This takes a different approach of
> > > > > > adding and
> > > > > > documenting the additional attribute. It's still valuable,
> > > > > > though
> > > > > > redundant, to provide the simpler 0/1 interface.
> > > > > >
> > > > > > In order to avoid userspace knowledge on the bitmask
> > > > > > meaning and to be
> > > > > > able to maintain the kernel side in sync with possible
> > > > > > changes in
> > > > > > future, just walk the attribute group and check what are
> > > > > > the masks that
> > > > > > match the value read.
> > > > > >
> > > > > > [1]
> > > > > > https://lore.kernel.org/intel-xe/20241025092238.167042-1-raag.jadav@intel.com/
> > > > >
> > > > > ...
> > > > >
> > > > > > +static const struct attribute_group
> > > > > > *get_platform_throttle_group(struct xe_device *xe);
> > > > > > +
> > > > > > +static ssize_t status_reasons_show(struct kobject *kobj,
> >
> > Semantically there's much of a 'status' here, so this could simply
> > be
> > 'reasons' (and same for the attribute name).
> >
> > > > > > + struct kobj_attribute
> > > > > > *attr, char *buff)
> > > > > > +{
> > > > > > + struct xe_gt *gt = throttle_to_gt(kobj);
> > > > > > + struct xe_device *xe = gt_to_xe(gt);
> > > > > > + const struct attribute_group *group;
> > > > > > + struct attribute **pother;
> > > > > > + ssize_t ret = 0;
> > > > > > + u32 reasons;
> > > > > > +
> > > > > > + reasons = xe_gt_throttle_get_limit_reasons(gt);
> > > > > > + group = get_platform_throttle_group(xe);
> > > > > > +
> > > > > > + for (pother = group->attrs; *pother; pother++) {
> > > > > > + struct kobj_attribute *kattr =
> > > > > > container_of(*pother, struct kobj_attribute, attr);
> > > > > > + struct throttle_attribute *other_ta =
> > > > > > kobj_attribute_to_throttle(kattr);
> > > > > > +
> > > > > > + if (other_ta->mask != U32_MAX && reasons &
> > > > > > other_ta->mask)
> > > > > > + ret += sysfs_emit_at(buff, ret,
> > > > > > "%s ", (*pother)->name);
> > > > >
> > > > > Much better.
> > > > >
> > > > > > + }
> > > > > > +
> > > > > > + /* Drop extra space from last iteration above */
> > > > > > + if (ret)
> > > > > > + ret--;
> > > > > > +
> > > > > > + ret += sysfs_emit_at(buff, ret, "\n");
> > > > >
> > > > > I went through the documentation again and I couldn't find
> > > > > any rules
> > > > > related to empty files or whether it is allowed (just
> > > > > thinking out
> > > > > loud about no throttling cases).
> > > >
> > > > do you mean if "empty" files are allowed in sysfs? I don't
> > > > think there's
> > > > any problem with that. It's also not empty, it has a newline
> > > > there ;)
> > >
> > > alternatively we could print the entire reg in hex format?
> > > But I prefer the text line in this patch.
> > >
> > > Nothing against the 'empty' file with or without the new-line,
> > > but perhaps we could consider to track that in the loop
> > > and if none is add we print
> > >
> > > if (ret)
> > > ret--;
> > > else
> > > sysfs_emit_at(buff, ret, "none");
> > >
> > > and document that above...
> >
> > +1.
>
> collecting all the review commends from you 2, that would be with
> this
> diff squashed:
>
> diff --git a/drivers/gpu/drm/xe/xe_gt_throttle.c
> b/drivers/gpu/drm/xe/xe_gt_throttle.c
> index ca45aea8c17a6..f3476fda7f4f6 100644
> --- a/drivers/gpu/drm/xe/xe_gt_throttle.c
> +++ b/drivers/gpu/drm/xe/xe_gt_throttle.c
> @@ -22,15 +22,15 @@
> * Their availability depend on the platform and some may not be
> visible if that
> * reason is not available.
> *
> - * The ``status_reasons`` attribute can be used by sysadmin
> monitoring all
> - * possible reasons for throttling and reporting them. It's
> preferred over
> - * monitoring ``status`` and then reading the reason both for
> simplicity and to
> - * avoid TOCTOU (time-of-check to time-of-use).
> + * The ``reasons`` attribute can be used by sysadmin to monitor all
> possible
> + * reasons for throttling and report them. It's preferred over
> monitoring
> + * ``status`` and then reading the reason from individual attributes
> since that
> + * is racy. If there's no throttling happening, "none" is returned.
> *
> * The following attributes are available on Crescent Island
> platform:
> *
> * - ``status``: Overall throttle status (0: no throttling, 1:
> throttling)
> - * - ``status_reasons``: All reasons causing throttling separated by
> newline.
> + * - ``reasons``: All reasons causing throttling separated by space
> * - ``reason_pl1``: package PL1
> * - ``reason_pl2``: package PL2
> * - ``reason_pl4``: package PL4
> @@ -50,7 +50,7 @@
> * Other platforms support the following reasons:
> *
> * - ``status``: Overall throttle status (0: no throttling, 1:
> throttling)
> - * - ``status_reasons``: All reasons causing throttling separated by
> newline.
> + * - ``reasons``: All reasons causing throttling separated by space
> * - ``reason_pl1``: package PL1
> * - ``reason_pl2``: package PL2
> * - ``reason_pl4``: package PL4, Iccmax etc.
> @@ -120,8 +120,8 @@ static ssize_t reason_show(struct kobject *kobj,
>
> static const struct attribute_group
> *get_platform_throttle_group(struct xe_device *xe);
>
> -static ssize_t status_reasons_show(struct kobject *kobj,
> - struct kobj_attribute *attr, char
> *buff)
> +static ssize_t reasons_show(struct kobject *kobj,
> + struct kobj_attribute *attr, char *buff)
> {
> struct xe_gt *gt = throttle_to_gt(kobj);
> struct xe_device *xe = gt_to_xe(gt);
> @@ -141,9 +141,11 @@ static ssize_t status_reasons_show(struct
> kobject *kobj,
> ret += sysfs_emit_at(buff, ret, "%s ",
> (*pother)->name);
> }
>
> - /* Drop extra space from last iteration above */
> if (ret)
> + /* Drop extra space from last iteration above */
> ret--;
> + else
> + ret += sysfs_emit_at(buff, ret, "none");
>
> ret += sysfs_emit_at(buff, ret, "\n");
>
> @@ -162,7 +164,7 @@ static ssize_t status_reasons_show(struct kobject
> *kobj,
> .mask = _mask, \
> }
>
> -static THROTTLE_ATTR_RO_FUNC(status_reasons, 0,
> status_reasons_show);
> +static THROTTLE_ATTR_RO_FUNC(reasons, 0, reasons_show);
> static THROTTLE_ATTR_RO(status, U32_MAX);
> static THROTTLE_ATTR_RO(reason_pl1, POWER_LIMIT_1_MASK);
> static THROTTLE_ATTR_RO(reason_pl2, POWER_LIMIT_2_MASK);
> @@ -174,7 +176,7 @@ static THROTTLE_ATTR_RO(reason_vr_thermalert,
> VR_THERMALERT_MASK);
> static THROTTLE_ATTR_RO(reason_vr_tdc, VR_TDC_MASK);
>
> static struct attribute *throttle_attrs[] = {
> - &attr_status_reasons.attr.attr,
> + &attr_reasons.attr.attr,
> &attr_status.attr.attr,
> &attr_reason_pl1.attr.attr,
> &attr_reason_pl2.attr.attr,
> @@ -200,7 +202,7 @@ static THROTTLE_ATTR_RO(reason_psys_crit,
> PSYS_CRIT_MASK);
>
> static struct attribute *cri_throttle_attrs[] = {
> /* Common */
> - &attr_status_reasons.attr.attr,
> + &attr_reasons.attr.attr,
> &attr_status.attr.attr,
> &attr_reason_pl1.attr.attr,
> &attr_reason_pl2.attr.attr,
works for me. count on my rv-b.
unless Raag has any further concern...
>
> Lucas De Marchi
>
> >
> > PS: A good read[1] if anyone's interested.
> >
> > [1] https://lwn.net/Articles/378884/
> >
> > Raag
^ permalink raw reply [flat|nested] 17+ messages in thread* Re: [PATCH v3 8/8] drm/xe/gt_throttle: Avoid TOCTOU when monitoring reasons
2025-10-30 19:54 ` Vivi, Rodrigo
@ 2025-10-31 6:08 ` Raag Jadav
0 siblings, 0 replies; 17+ messages in thread
From: Raag Jadav @ 2025-10-31 6:08 UTC (permalink / raw)
To: Vivi, Rodrigo; +Cc: De Marchi, Lucas, intel-xe@lists.freedesktop.org
On Fri, Oct 31, 2025 at 01:24:08AM +0530, Vivi, Rodrigo wrote:
> On Thu, 2025-10-30 at 13:43 -0500, Lucas De Marchi wrote:
> > On Thu, Oct 30, 2025 at 05:06:46PM +0100, Raag Jadav wrote:
> > > On Thu, Oct 30, 2025 at 11:47:03AM -0400, Rodrigo Vivi wrote:
> > > > On Thu, Oct 30, 2025 at 09:55:30AM -0500, Lucas De Marchi wrote:
> > > > > On Thu, Oct 30, 2025 at 10:53:36AM +0100, Raag Jadav wrote:
> > > > > > On Wed, Oct 29, 2025 at 04:45:10PM -0700, Lucas De Marchi
> > > > > > wrote:
> > > > > > > It's currently not possible to safely monitor if there's
> > > > > > > throttling
> > > > > > > happening and what are the reasons. The approach of reading
> > > > > > > the status
> > > > > > > and then reading the reasons is not reliable as by the time
> > > > > > > sysadmin
> > > > > > > reads the reason, the throttling could not be happening
> > > > > > > anymore.
> > > > > > >
> > > > > > > Previous tentative to fix that[1] was breaking the ABI and
> > > > > > > potentially
> > > > > > > sysadmin's scripts. This takes a different approach of
> > > > > > > adding and
> > > > > > > documenting the additional attribute. It's still valuable,
> > > > > > > though
> > > > > > > redundant, to provide the simpler 0/1 interface.
> > > > > > >
> > > > > > > In order to avoid userspace knowledge on the bitmask
> > > > > > > meaning and to be
> > > > > > > able to maintain the kernel side in sync with possible
> > > > > > > changes in
> > > > > > > future, just walk the attribute group and check what are
> > > > > > > the masks that
> > > > > > > match the value read.
> > > > > > >
> > > > > > > [1]
> > > > > > > https://lore.kernel.org/intel-xe/20241025092238.167042-1-raag.jadav@intel.com/
> > > > > >
> > > > > > ...
> > > > > >
> > > > > > > +static const struct attribute_group
> > > > > > > *get_platform_throttle_group(struct xe_device *xe);
> > > > > > > +
> > > > > > > +static ssize_t status_reasons_show(struct kobject *kobj,
> > >
> > > Semantically there's much of a 'status' here, so this could simply
> > > be
> > > 'reasons' (and same for the attribute name).
> > >
> > > > > > > + struct kobj_attribute
> > > > > > > *attr, char *buff)
> > > > > > > +{
> > > > > > > + struct xe_gt *gt = throttle_to_gt(kobj);
> > > > > > > + struct xe_device *xe = gt_to_xe(gt);
> > > > > > > + const struct attribute_group *group;
> > > > > > > + struct attribute **pother;
> > > > > > > + ssize_t ret = 0;
> > > > > > > + u32 reasons;
> > > > > > > +
> > > > > > > + reasons = xe_gt_throttle_get_limit_reasons(gt);
> > > > > > > + group = get_platform_throttle_group(xe);
> > > > > > > +
> > > > > > > + for (pother = group->attrs; *pother; pother++) {
> > > > > > > + struct kobj_attribute *kattr =
> > > > > > > container_of(*pother, struct kobj_attribute, attr);
> > > > > > > + struct throttle_attribute *other_ta =
> > > > > > > kobj_attribute_to_throttle(kattr);
> > > > > > > +
> > > > > > > + if (other_ta->mask != U32_MAX && reasons &
> > > > > > > other_ta->mask)
> > > > > > > + ret += sysfs_emit_at(buff, ret,
> > > > > > > "%s ", (*pother)->name);
> > > > > >
> > > > > > Much better.
> > > > > >
> > > > > > > + }
> > > > > > > +
> > > > > > > + /* Drop extra space from last iteration above */
> > > > > > > + if (ret)
> > > > > > > + ret--;
> > > > > > > +
> > > > > > > + ret += sysfs_emit_at(buff, ret, "\n");
> > > > > >
> > > > > > I went through the documentation again and I couldn't find
> > > > > > any rules
> > > > > > related to empty files or whether it is allowed (just
> > > > > > thinking out
> > > > > > loud about no throttling cases).
> > > > >
> > > > > do you mean if "empty" files are allowed in sysfs? I don't
> > > > > think there's
> > > > > any problem with that. It's also not empty, it has a newline
> > > > > there ;)
> > > >
> > > > alternatively we could print the entire reg in hex format?
> > > > But I prefer the text line in this patch.
> > > >
> > > > Nothing against the 'empty' file with or without the new-line,
> > > > but perhaps we could consider to track that in the loop
> > > > and if none is add we print
> > > >
> > > > if (ret)
> > > > ret--;
> > > > else
> > > > sysfs_emit_at(buff, ret, "none");
> > > >
> > > > and document that above...
> > >
> > > +1.
> >
> > collecting all the review commends from you 2, that would be with
> > this
> > diff squashed:
> >
> > diff --git a/drivers/gpu/drm/xe/xe_gt_throttle.c
> > b/drivers/gpu/drm/xe/xe_gt_throttle.c
> > index ca45aea8c17a6..f3476fda7f4f6 100644
> > --- a/drivers/gpu/drm/xe/xe_gt_throttle.c
> > +++ b/drivers/gpu/drm/xe/xe_gt_throttle.c
> > @@ -22,15 +22,15 @@
> > * Their availability depend on the platform and some may not be
> > visible if that
> > * reason is not available.
> > *
> > - * The ``status_reasons`` attribute can be used by sysadmin
> > monitoring all
> > - * possible reasons for throttling and reporting them. It's
> > preferred over
> > - * monitoring ``status`` and then reading the reason both for
> > simplicity and to
> > - * avoid TOCTOU (time-of-check to time-of-use).
> > + * The ``reasons`` attribute can be used by sysadmin to monitor all
> > possible
> > + * reasons for throttling and report them. It's preferred over
> > monitoring
> > + * ``status`` and then reading the reason from individual attributes
> > since that
> > + * is racy. If there's no throttling happening, "none" is returned.
> > *
> > * The following attributes are available on Crescent Island
> > platform:
> > *
> > * - ``status``: Overall throttle status (0: no throttling, 1:
> > throttling)
> > - * - ``status_reasons``: All reasons causing throttling separated by
> > newline.
> > + * - ``reasons``: All reasons causing throttling separated by space
"Array of" ...
> > * - ``reason_pl1``: package PL1
> > * - ``reason_pl2``: package PL2
> > * - ``reason_pl4``: package PL4
> > @@ -50,7 +50,7 @@
> > * Other platforms support the following reasons:
> > *
> > * - ``status``: Overall throttle status (0: no throttling, 1:
> > throttling)
> > - * - ``status_reasons``: All reasons causing throttling separated by
> > newline.
> > + * - ``reasons``: All reasons causing throttling separated by space
Ditto.
> > * - ``reason_pl1``: package PL1
> > * - ``reason_pl2``: package PL2
> > * - ``reason_pl4``: package PL4, Iccmax etc.
> > @@ -120,8 +120,8 @@ static ssize_t reason_show(struct kobject *kobj,
> >
> > static const struct attribute_group
> > *get_platform_throttle_group(struct xe_device *xe);
> >
> > -static ssize_t status_reasons_show(struct kobject *kobj,
> > - struct kobj_attribute *attr, char
> > *buff)
> > +static ssize_t reasons_show(struct kobject *kobj,
> > + struct kobj_attribute *attr, char *buff)
> > {
> > struct xe_gt *gt = throttle_to_gt(kobj);
> > struct xe_device *xe = gt_to_xe(gt);
> > @@ -141,9 +141,11 @@ static ssize_t status_reasons_show(struct
> > kobject *kobj,
> > ret += sysfs_emit_at(buff, ret, "%s ",
> > (*pother)->name);
> > }
> >
> > - /* Drop extra space from last iteration above */
> > if (ret)
> > + /* Drop extra space from last iteration above */
> > ret--;
> > + else
> > + ret += sysfs_emit_at(buff, ret, "none");
> >
> > ret += sysfs_emit_at(buff, ret, "\n");
> >
> > @@ -162,7 +164,7 @@ static ssize_t status_reasons_show(struct kobject
> > *kobj,
> > .mask = _mask, \
> > }
> >
> > -static THROTTLE_ATTR_RO_FUNC(status_reasons, 0,
> > status_reasons_show);
> > +static THROTTLE_ATTR_RO_FUNC(reasons, 0, reasons_show);
> > static THROTTLE_ATTR_RO(status, U32_MAX);
> > static THROTTLE_ATTR_RO(reason_pl1, POWER_LIMIT_1_MASK);
> > static THROTTLE_ATTR_RO(reason_pl2, POWER_LIMIT_2_MASK);
> > @@ -174,7 +176,7 @@ static THROTTLE_ATTR_RO(reason_vr_thermalert,
> > VR_THERMALERT_MASK);
> > static THROTTLE_ATTR_RO(reason_vr_tdc, VR_TDC_MASK);
> >
> > static struct attribute *throttle_attrs[] = {
> > - &attr_status_reasons.attr.attr,
> > + &attr_reasons.attr.attr,
> > &attr_status.attr.attr,
> > &attr_reason_pl1.attr.attr,
> > &attr_reason_pl2.attr.attr,
> > @@ -200,7 +202,7 @@ static THROTTLE_ATTR_RO(reason_psys_crit,
> > PSYS_CRIT_MASK);
> >
> > static struct attribute *cri_throttle_attrs[] = {
> > /* Common */
> > - &attr_status_reasons.attr.attr,
> > + &attr_reasons.attr.attr,
> > &attr_status.attr.attr,
> > &attr_reason_pl1.attr.attr,
> > &attr_reason_pl2.attr.attr,
>
> works for me. count on my rv-b.
>
> unless Raag has any further concern...
Works for me as well.
Raag
^ permalink raw reply [flat|nested] 17+ messages in thread
* Re: [PATCH v3 8/8] drm/xe/gt_throttle: Avoid TOCTOU when monitoring reasons
2025-10-29 23:45 ` [PATCH v3 8/8] drm/xe/gt_throttle: Avoid TOCTOU when monitoring reasons Lucas De Marchi
2025-10-30 9:53 ` Raag Jadav
@ 2025-10-30 15:42 ` Rodrigo Vivi
1 sibling, 0 replies; 17+ messages in thread
From: Rodrigo Vivi @ 2025-10-30 15:42 UTC (permalink / raw)
To: Lucas De Marchi; +Cc: intel-xe, Raag Jadav
On Wed, Oct 29, 2025 at 04:45:10PM -0700, Lucas De Marchi wrote:
> It's currently not possible to safely monitor if there's throttling
> happening and what are the reasons. The approach of reading the status
> and then reading the reasons is not reliable as by the time sysadmin
> reads the reason, the throttling could not be happening anymore.
>
> Previous tentative to fix that[1] was breaking the ABI and potentially
> sysadmin's scripts. This takes a different approach of adding and
> documenting the additional attribute. It's still valuable, though
> redundant, to provide the simpler 0/1 interface.
>
> In order to avoid userspace knowledge on the bitmask meaning and to be
> able to maintain the kernel side in sync with possible changes in
> future, just walk the attribute group and check what are the masks that
> match the value read.
>
> [1] https://lore.kernel.org/intel-xe/20241025092238.167042-1-raag.jadav@intel.com/
>
> Cc: Raag Jadav <raag.jadav@intel.com>
> Cc: Rodrigo Vivi <rodrigo.vivi@intel.com>
> Signed-off-by: Lucas De Marchi <lucas.demarchi@intel.com>
> ---
> - v2: Use space as separator (Rodrigo)
> ---
> drivers/gpu/drm/xe/xe_gt_throttle.c | 52 +++++++++++++++++++++++++++++++++++--
> 1 file changed, 50 insertions(+), 2 deletions(-)
>
> diff --git a/drivers/gpu/drm/xe/xe_gt_throttle.c b/drivers/gpu/drm/xe/xe_gt_throttle.c
> index fa7068aac3344..ca45aea8c17a6 100644
> --- a/drivers/gpu/drm/xe/xe_gt_throttle.c
> +++ b/drivers/gpu/drm/xe/xe_gt_throttle.c
> @@ -22,9 +22,15 @@
> * Their availability depend on the platform and some may not be visible if that
> * reason is not available.
> *
> + * The ``status_reasons`` attribute can be used by sysadmin monitoring all
> + * possible reasons for throttling and reporting them. It's preferred over
> + * monitoring ``status`` and then reading the reason both for simplicity and to
> + * avoid TOCTOU (time-of-check to time-of-use).
> + *
> * The following attributes are available on Crescent Island platform:
> *
> - * - ``status``: Overall throttle status
> + * - ``status``: Overall throttle status (0: no throttling, 1: throttling)
> + * - ``status_reasons``: All reasons causing throttling separated by newline.
s/newline/space
> * - ``reason_pl1``: package PL1
> * - ``reason_pl2``: package PL2
> * - ``reason_pl4``: package PL4
> @@ -43,7 +49,8 @@
> *
> * Other platforms support the following reasons:
> *
> - * - ``status``: Overall status
> + * - ``status``: Overall throttle status (0: no throttling, 1: throttling)
> + * - ``status_reasons``: All reasons causing throttling separated by newline.
same
> * - ``reason_pl1``: package PL1
> * - ``reason_pl2``: package PL2
> * - ``reason_pl4``: package PL4, Iccmax etc.
> @@ -111,12 +118,51 @@ static ssize_t reason_show(struct kobject *kobj,
> return sysfs_emit(buff, "%u\n", is_throttled_by(gt, ta->mask));
> }
>
> +static const struct attribute_group *get_platform_throttle_group(struct xe_device *xe);
> +
> +static ssize_t status_reasons_show(struct kobject *kobj,
> + struct kobj_attribute *attr, char *buff)
> +{
> + struct xe_gt *gt = throttle_to_gt(kobj);
> + struct xe_device *xe = gt_to_xe(gt);
> + const struct attribute_group *group;
> + struct attribute **pother;
> + ssize_t ret = 0;
> + u32 reasons;
> +
> + reasons = xe_gt_throttle_get_limit_reasons(gt);
> + group = get_platform_throttle_group(xe);
> +
> + for (pother = group->attrs; *pother; pother++) {
> + struct kobj_attribute *kattr = container_of(*pother, struct kobj_attribute, attr);
> + struct throttle_attribute *other_ta = kobj_attribute_to_throttle(kattr);
> +
> + if (other_ta->mask != U32_MAX && reasons & other_ta->mask)
> + ret += sysfs_emit_at(buff, ret, "%s ", (*pother)->name);
> + }
> +
> + /* Drop extra space from last iteration above */
> + if (ret)
> + ret--;
> +
> + ret += sysfs_emit_at(buff, ret, "\n");
> +
> + return ret;
> +}
> +
> #define THROTTLE_ATTR_RO(name, _mask) \
> struct throttle_attribute attr_##name = { \
> .attr = __ATTR(name, 0444, reason_show, NULL), \
> .mask = _mask, \
> }
>
> +#define THROTTLE_ATTR_RO_FUNC(name, _mask, _show) \
> + struct throttle_attribute attr_##name = { \
> + .attr = __ATTR(name, 0444, _show, NULL), \
> + .mask = _mask, \
> + }
> +
> +static THROTTLE_ATTR_RO_FUNC(status_reasons, 0, status_reasons_show);
> static THROTTLE_ATTR_RO(status, U32_MAX);
> static THROTTLE_ATTR_RO(reason_pl1, POWER_LIMIT_1_MASK);
> static THROTTLE_ATTR_RO(reason_pl2, POWER_LIMIT_2_MASK);
> @@ -128,6 +174,7 @@ static THROTTLE_ATTR_RO(reason_vr_thermalert, VR_THERMALERT_MASK);
> static THROTTLE_ATTR_RO(reason_vr_tdc, VR_TDC_MASK);
>
> static struct attribute *throttle_attrs[] = {
> + &attr_status_reasons.attr.attr,
> &attr_status.attr.attr,
> &attr_reason_pl1.attr.attr,
> &attr_reason_pl2.attr.attr,
> @@ -153,6 +200,7 @@ static THROTTLE_ATTR_RO(reason_psys_crit, PSYS_CRIT_MASK);
>
> static struct attribute *cri_throttle_attrs[] = {
> /* Common */
> + &attr_status_reasons.attr.attr,
> &attr_status.attr.attr,
> &attr_reason_pl1.attr.attr,
> &attr_reason_pl2.attr.attr,
>
> --
> 2.51.0
>
^ permalink raw reply [flat|nested] 17+ messages in thread