* [PATCH v2 3/8] mfd: khadas-mcu: Add per-variant configuration infrastructure and VIM4 support
From: Ronald Claveau @ 2026-04-03 16:08 UTC (permalink / raw)
To: Neil Armstrong, Lee Jones, Rob Herring, Krzysztof Kozlowski,
Conor Dooley, Andi Shyti, Kevin Hilman, Jerome Brunet,
Martin Blumenstingl, Beniamino Galvani, Rafael J. Wysocki,
Daniel Lezcano, Zhang Rui, Lukasz Luba, Liam Girdwood, Mark Brown
Cc: linux-amlogic, devicetree, linux-kernel, linux-i2c,
linux-arm-kernel, linux-pm, Ronald Claveau
In-Reply-To: <20260403-add-mcu-fan-khadas-vim4-v2-0-70536b22439a@aliel.fr>
Introduce a per-variant configuration structure (khadas_mcu_data)
holding the regmap config and MFD cells,
selected at probe time via the of_device_id match data.
This makes adding other variants straightforward.
Also introduce khadas_mcu_fan_pdata to pass fan register address and
maximum level to the fan sub-driver, removing the hardcoded constants.
Signed-off-by: Ronald Claveau <linux-kernel-dev@aliel.fr>
---
include/linux/mfd/khadas-mcu.h | 39 +++++++++++++++++++++++++++++++++++++--
1 file changed, 37 insertions(+), 2 deletions(-)
diff --git a/include/linux/mfd/khadas-mcu.h b/include/linux/mfd/khadas-mcu.h
index a99ba2ed0e4e0..75e275d3fa8d9 100644
--- a/include/linux/mfd/khadas-mcu.h
+++ b/include/linux/mfd/khadas-mcu.h
@@ -70,6 +70,13 @@
#define KHADAS_MCU_WOL_INIT_START_REG 0x87 /* WO */
#define KHADAS_MCU_CMD_FAN_STATUS_CTRL_REG 0x88 /* WO */
+/* VIM4 specific registers */
+#define KHADAS_MCU_VIM4_REST_CONF_REG 0x2c /* WO - reset EEPROM */
+#define KHADAS_MCU_VIM4_LED_ON_RAM_REG 0x89 /* WO - LED volatile */
+#define KHADAS_MCU_VIM4_FAN_CTRL_REG 0x8a /* WO */
+#define KHADAS_MCU_VIM4_WDT_EN_REG 0x8b /* WO */
+#define KHADAS_MCU_VIM4_SYS_RST_REG 0x91 /* WO */
+
enum {
KHADAS_BOARD_VIM1 = 0x1,
KHADAS_BOARD_VIM2,
@@ -82,10 +89,38 @@ enum {
* struct khadas_mcu - Khadas MCU structure
* @device: device reference used for logs
* @regmap: register map
+ * @data: pointer to variant-specific config
*/
struct khadas_mcu {
- struct device *dev;
- struct regmap *regmap;
+ struct device *dev;
+ struct regmap *regmap;
+ const struct khadas_mcu_data *data;
+};
+
+/**
+ * struct khadas_mcu_data - per-variant configuration
+ * @regmap_config: regmap configuration
+ * @cells: MFD sub-devices
+ * @ncells: number of sub-devices
+ * @fan_cells: MFD fan sub-devices
+ * @nfan_cells: number of fan sub-devices
+ */
+struct khadas_mcu_data {
+ const struct regmap_config *regmap_config;
+ const struct mfd_cell *cells;
+ int ncells;
+ const struct mfd_cell *fan_cells;
+ int nfan_cells;
+};
+
+/**
+ * struct khadas_mcu_fan_pdata - fan sub-driver configuration
+ * @fan_reg: register address to write the fan level
+ * @max_level: maximum fan level
+ */
+struct khadas_mcu_fan_pdata {
+ unsigned int fan_reg;
+ unsigned int max_level;
};
#endif /* MFD_KHADAS_MCU_H */
--
2.49.0
^ permalink raw reply related
* [PATCH v2 4/8] mfd: khadas-mcu: Add support for VIM4 MCU variant
From: Ronald Claveau @ 2026-04-03 16:08 UTC (permalink / raw)
To: Neil Armstrong, Lee Jones, Rob Herring, Krzysztof Kozlowski,
Conor Dooley, Andi Shyti, Kevin Hilman, Jerome Brunet,
Martin Blumenstingl, Beniamino Galvani, Rafael J. Wysocki,
Daniel Lezcano, Zhang Rui, Lukasz Luba, Liam Girdwood, Mark Brown
Cc: linux-amlogic, devicetree, linux-kernel, linux-i2c,
linux-arm-kernel, linux-pm, Ronald Claveau
In-Reply-To: <20260403-add-mcu-fan-khadas-vim4-v2-0-70536b22439a@aliel.fr>
Refactor probe() to use per-variant khadas_mcu_data
instead of hardcoded globals.
Add dedicated regmap configuration and device data for the VIM4 MCU,
with its own volatile/writeable registers.
Add the fan control register
(0–100 levels vs 0–3 for previous supported boards).
Add a new compatible string "khadas,vim4-mcu".
Signed-off-by: Ronald Claveau <linux-kernel-dev@aliel.fr>
---
drivers/mfd/khadas-mcu.c | 106 ++++++++++++++++++++++++++++++++++++++++++-----
1 file changed, 95 insertions(+), 11 deletions(-)
diff --git a/drivers/mfd/khadas-mcu.c b/drivers/mfd/khadas-mcu.c
index ba981a7886921..b36b3b3ab73c0 100644
--- a/drivers/mfd/khadas-mcu.c
+++ b/drivers/mfd/khadas-mcu.c
@@ -75,15 +75,91 @@ static const struct regmap_config khadas_mcu_regmap_config = {
.cache_type = REGCACHE_MAPLE,
};
+static const struct khadas_mcu_fan_pdata khadas_mcu_fan_pdata = {
+ .fan_reg = KHADAS_MCU_CMD_FAN_STATUS_CTRL_REG,
+ .max_level = 3,
+};
+
static struct mfd_cell khadas_mcu_fan_cells[] = {
/* VIM1/2 Rev13+ and VIM3 only */
- { .name = "khadas-mcu-fan-ctrl", },
+ {
+ .name = "khadas-mcu-fan-ctrl",
+ .platform_data = &khadas_mcu_fan_pdata,
+ .pdata_size = sizeof(khadas_mcu_fan_pdata),
+ },
};
static struct mfd_cell khadas_mcu_cells[] = {
{ .name = "khadas-mcu-user-mem", },
};
+static const struct khadas_mcu_data khadas_mcu_data = {
+ .regmap_config = &khadas_mcu_regmap_config,
+ .cells = khadas_mcu_cells,
+ .ncells = ARRAY_SIZE(khadas_mcu_cells),
+ .fan_cells = khadas_mcu_fan_cells,
+ .nfan_cells = ARRAY_SIZE(khadas_mcu_fan_cells),
+};
+
+static bool khadas_mcu_vim4_reg_volatile(struct device *dev, unsigned int reg)
+{
+ switch (reg) {
+ case KHADAS_MCU_PWR_OFF_CMD_REG:
+ case KHADAS_MCU_VIM4_REST_CONF_REG:
+ case KHADAS_MCU_WOL_INIT_START_REG:
+ case KHADAS_MCU_VIM4_LED_ON_RAM_REG:
+ case KHADAS_MCU_VIM4_FAN_CTRL_REG:
+ case KHADAS_MCU_VIM4_WDT_EN_REG:
+ case KHADAS_MCU_VIM4_SYS_RST_REG:
+ return true;
+ default:
+ return false;
+ }
+}
+
+static bool khadas_mcu_vim4_reg_writeable(struct device *dev, unsigned int reg)
+{
+ switch (reg) {
+ case KHADAS_MCU_VERSION_0_REG:
+ case KHADAS_MCU_VERSION_1_REG:
+ case KHADAS_MCU_SHUTDOWN_NORMAL_STATUS_REG:
+ return false;
+ default:
+ return true;
+ }
+}
+
+static const struct regmap_config khadas_mcu_vim4_regmap_config = {
+ .reg_bits = 8,
+ .reg_stride = 1,
+ .val_bits = 8,
+ .max_register = KHADAS_MCU_VIM4_SYS_RST_REG,
+ .volatile_reg = khadas_mcu_vim4_reg_volatile,
+ .writeable_reg = khadas_mcu_vim4_reg_writeable,
+ .cache_type = REGCACHE_MAPLE,
+};
+
+static const struct khadas_mcu_fan_pdata khadas_vim4_fan_pdata = {
+ .fan_reg = KHADAS_MCU_VIM4_FAN_CTRL_REG,
+ .max_level = 0x64,
+};
+
+static const struct mfd_cell khadas_mcu_vim4_cells[] = {
+ {
+ .name = "khadas-mcu-fan-ctrl",
+ .platform_data = &khadas_vim4_fan_pdata,
+ .pdata_size = sizeof(khadas_vim4_fan_pdata),
+ },
+};
+
+static const struct khadas_mcu_data khadas_vim4_mcu_data = {
+ .regmap_config = &khadas_mcu_vim4_regmap_config,
+ .cells = NULL,
+ .ncells = 0,
+ .fan_cells = khadas_mcu_vim4_cells,
+ .nfan_cells = ARRAY_SIZE(khadas_mcu_vim4_cells),
+};
+
static int khadas_mcu_probe(struct i2c_client *client)
{
struct device *dev = &client->dev;
@@ -94,28 +170,35 @@ static int khadas_mcu_probe(struct i2c_client *client)
if (!ddata)
return -ENOMEM;
+ ddata->data = i2c_get_match_data(client);
+ if (!ddata->data)
+ return -EINVAL;
+
i2c_set_clientdata(client, ddata);
ddata->dev = dev;
- ddata->regmap = devm_regmap_init_i2c(client, &khadas_mcu_regmap_config);
+ ddata->regmap = devm_regmap_init_i2c(client,
+ ddata->data->regmap_config);
if (IS_ERR(ddata->regmap)) {
ret = PTR_ERR(ddata->regmap);
dev_err(dev, "Failed to allocate register map: %d\n", ret);
return ret;
}
- ret = devm_mfd_add_devices(dev, PLATFORM_DEVID_NONE,
- khadas_mcu_cells,
- ARRAY_SIZE(khadas_mcu_cells),
- NULL, 0, NULL);
- if (ret)
- return ret;
+ if (ddata->data->cells && ddata->data->ncells) {
+ ret = devm_mfd_add_devices(dev, PLATFORM_DEVID_NONE,
+ ddata->data->cells,
+ ddata->data->ncells,
+ NULL, 0, NULL);
+ if (ret)
+ return ret;
+ }
if (of_property_present(dev->of_node, "#cooling-cells"))
return devm_mfd_add_devices(dev, PLATFORM_DEVID_NONE,
- khadas_mcu_fan_cells,
- ARRAY_SIZE(khadas_mcu_fan_cells),
+ ddata->data->fan_cells,
+ ddata->data->nfan_cells,
NULL, 0, NULL);
return 0;
@@ -123,7 +206,8 @@ static int khadas_mcu_probe(struct i2c_client *client)
#ifdef CONFIG_OF
static const struct of_device_id khadas_mcu_of_match[] = {
- { .compatible = "khadas,mcu", },
+ { .compatible = "khadas,mcu", .data = &khadas_mcu_data },
+ { .compatible = "khadas,vim4-mcu", .data = &khadas_vim4_mcu_data },
{},
};
MODULE_DEVICE_TABLE(of, khadas_mcu_of_match);
--
2.49.0
^ permalink raw reply related
* [PATCH v2 6/8] arm64: dts: amlogic: t7: Add i2c pinctrl node
From: Ronald Claveau @ 2026-04-03 16:08 UTC (permalink / raw)
To: Neil Armstrong, Lee Jones, Rob Herring, Krzysztof Kozlowski,
Conor Dooley, Andi Shyti, Kevin Hilman, Jerome Brunet,
Martin Blumenstingl, Beniamino Galvani, Rafael J. Wysocki,
Daniel Lezcano, Zhang Rui, Lukasz Luba, Liam Girdwood, Mark Brown
Cc: linux-amlogic, devicetree, linux-kernel, linux-i2c,
linux-arm-kernel, linux-pm, Ronald Claveau
In-Reply-To: <20260403-add-mcu-fan-khadas-vim4-v2-0-70536b22439a@aliel.fr>
Add the T7 pinctrl used by the Khadas VIM4 for MCU communication.
Signed-off-by: Ronald Claveau <linux-kernel-dev@aliel.fr>
---
arch/arm64/boot/dts/amlogic/amlogic-t7.dtsi | 10 ++++++++++
1 file changed, 10 insertions(+)
diff --git a/arch/arm64/boot/dts/amlogic/amlogic-t7.dtsi b/arch/arm64/boot/dts/amlogic/amlogic-t7.dtsi
index 7fe72c94ed623..e96fe10b251a0 100644
--- a/arch/arm64/boot/dts/amlogic/amlogic-t7.dtsi
+++ b/arch/arm64/boot/dts/amlogic/amlogic-t7.dtsi
@@ -376,6 +376,16 @@ mux {
};
};
+ i2c0_ao_d_pins: i2c0-ao-d {
+ mux {
+ groups = "i2c0_ao_sck_d",
+ "i2c0_ao_sda_d";
+ function = "i2c0_ao";
+ bias-disable;
+ drive-strength-microamp = <3000>;
+ };
+ };
+
pwm_a_pins: pwm-a {
mux {
groups = "pwm_a";
--
2.49.0
^ permalink raw reply related
* [PATCH v2 7/8] arm64: dts: amlogic: t7: Add i2c controller node
From: Ronald Claveau @ 2026-04-03 16:08 UTC (permalink / raw)
To: Neil Armstrong, Lee Jones, Rob Herring, Krzysztof Kozlowski,
Conor Dooley, Andi Shyti, Kevin Hilman, Jerome Brunet,
Martin Blumenstingl, Beniamino Galvani, Rafael J. Wysocki,
Daniel Lezcano, Zhang Rui, Lukasz Luba, Liam Girdwood, Mark Brown
Cc: linux-amlogic, devicetree, linux-kernel, linux-i2c,
linux-arm-kernel, linux-pm, Ronald Claveau
In-Reply-To: <20260403-add-mcu-fan-khadas-vim4-v2-0-70536b22439a@aliel.fr>
Add the T7 i2c controller node used by the Khadas VIM4
for MCU communication.
Use amlogic,meson-axg-i2c as fallback compatible.
Signed-off-by: Ronald Claveau <linux-kernel-dev@aliel.fr>
---
arch/arm64/boot/dts/amlogic/amlogic-t7.dtsi | 10 ++++++++++
1 file changed, 10 insertions(+)
diff --git a/arch/arm64/boot/dts/amlogic/amlogic-t7.dtsi b/arch/arm64/boot/dts/amlogic/amlogic-t7.dtsi
index e96fe10b251a0..560c9dce35266 100644
--- a/arch/arm64/boot/dts/amlogic/amlogic-t7.dtsi
+++ b/arch/arm64/boot/dts/amlogic/amlogic-t7.dtsi
@@ -711,6 +711,16 @@ pwm_ao_cd: pwm@60000 {
status = "disabled";
};
+ i2c_m_ao_a: i2c@76000 {
+ compatible = "amlogic,t7-i2c", "amlogic,meson-axg-i2c";
+ reg = <0x0 0x76000 0x0 0x48>;
+ #address-cells = <1>;
+ #size-cells = <0>;
+ interrupts = <GIC_SPI 330 IRQ_TYPE_EDGE_RISING>;
+ clocks = <&clkc_periphs CLKID_SYS_I2C_AO_A>;
+ status = "disabled";
+ };
+
sd_emmc_a: mmc@88000 {
compatible = "amlogic,t7-mmc", "amlogic,meson-axg-mmc";
reg = <0x0 0x88000 0x0 0x800>;
--
2.49.0
^ permalink raw reply related
* [PATCH v2 5/8] thermal: khadas-mcu-fan: Add fan config from platform data Add regulator support
From: Ronald Claveau @ 2026-04-03 16:08 UTC (permalink / raw)
To: Neil Armstrong, Lee Jones, Rob Herring, Krzysztof Kozlowski,
Conor Dooley, Andi Shyti, Kevin Hilman, Jerome Brunet,
Martin Blumenstingl, Beniamino Galvani, Rafael J. Wysocki,
Daniel Lezcano, Zhang Rui, Lukasz Luba, Liam Girdwood, Mark Brown
Cc: linux-amlogic, devicetree, linux-kernel, linux-i2c,
linux-arm-kernel, linux-pm, Ronald Claveau
In-Reply-To: <20260403-add-mcu-fan-khadas-vim4-v2-0-70536b22439a@aliel.fr>
Replace the hardcoded MAX_LEVEL constant and fan register
with values read from platform_data (fan_reg, max_level),
as new MCUs need different values.
Optionally acquire and enable a "fan" regulator supply
at probe time and on resume,
so boards that gate fan power through a regulator are handled.
Signed-off-by: Ronald Claveau <linux-kernel-dev@aliel.fr>
---
drivers/thermal/khadas_mcu_fan.c | 49 +++++++++++++++++++++++++++++++++++-----
1 file changed, 43 insertions(+), 6 deletions(-)
diff --git a/drivers/thermal/khadas_mcu_fan.c b/drivers/thermal/khadas_mcu_fan.c
index d35e5313bea41..24559bf65de46 100644
--- a/drivers/thermal/khadas_mcu_fan.c
+++ b/drivers/thermal/khadas_mcu_fan.c
@@ -13,13 +13,15 @@
#include <linux/regmap.h>
#include <linux/sysfs.h>
#include <linux/thermal.h>
-
-#define MAX_LEVEL 3
+#include <linux/regulator/consumer.h>
struct khadas_mcu_fan_ctx {
struct khadas_mcu *mcu;
+ unsigned int fan_reg;
unsigned int level;
+ unsigned int max_level;
struct thermal_cooling_device *cdev;
+ struct regulator *power;
};
static int khadas_mcu_fan_set_level(struct khadas_mcu_fan_ctx *ctx,
@@ -27,8 +29,7 @@ static int khadas_mcu_fan_set_level(struct khadas_mcu_fan_ctx *ctx,
{
int ret;
- ret = regmap_write(ctx->mcu->regmap, KHADAS_MCU_CMD_FAN_STATUS_CTRL_REG,
- level);
+ ret = regmap_write(ctx->mcu->regmap, ctx->fan_reg, level);
if (ret)
return ret;
@@ -40,7 +41,9 @@ static int khadas_mcu_fan_set_level(struct khadas_mcu_fan_ctx *ctx,
static int khadas_mcu_fan_get_max_state(struct thermal_cooling_device *cdev,
unsigned long *state)
{
- *state = MAX_LEVEL;
+ struct khadas_mcu_fan_ctx *ctx = cdev->devdata;
+
+ *state = ctx->max_level;
return 0;
}
@@ -61,7 +64,7 @@ khadas_mcu_fan_set_cur_state(struct thermal_cooling_device *cdev,
{
struct khadas_mcu_fan_ctx *ctx = cdev->devdata;
- if (state > MAX_LEVEL)
+ if (state > ctx->max_level)
return -EINVAL;
if (state == ctx->level)
@@ -83,11 +86,32 @@ static int khadas_mcu_fan_probe(struct platform_device *pdev)
struct device *dev = &pdev->dev;
struct khadas_mcu_fan_ctx *ctx;
int ret;
+ const struct khadas_mcu_fan_pdata *pdata = dev_get_platdata(&pdev->dev);
ctx = devm_kzalloc(dev, sizeof(*ctx), GFP_KERNEL);
if (!ctx)
return -ENOMEM;
+
ctx->mcu = mcu;
+ ctx->fan_reg = pdata->fan_reg;
+ ctx->max_level = pdata->max_level;
+
+ ctx->power = devm_regulator_get_optional(dev->parent, "fan");
+ if (IS_ERR(ctx->power)) {
+ if (PTR_ERR(ctx->power) == -ENODEV)
+ ctx->power = NULL;
+ else
+ return PTR_ERR(ctx->power);
+ }
+
+ if (ctx->power) {
+ ret = regulator_enable(ctx->power);
+ if (ret) {
+ dev_err(dev, "Failed to enable fan power supply: %d\n", ret);
+ return ret;
+ }
+ }
+
platform_set_drvdata(pdev, ctx);
cdev = devm_thermal_of_cooling_device_register(dev->parent,
@@ -124,12 +148,25 @@ static int khadas_mcu_fan_suspend(struct device *dev)
ctx->level = level_save;
+ if (ctx->power) {
+ ret = regulator_disable(ctx->power);
+ if (ret)
+ return ret;
+ }
+
return 0;
}
static int khadas_mcu_fan_resume(struct device *dev)
{
struct khadas_mcu_fan_ctx *ctx = dev_get_drvdata(dev);
+ int ret;
+
+ if (ctx->power) {
+ ret = regulator_enable(ctx->power);
+ if (ret)
+ return ret;
+ }
return khadas_mcu_fan_set_level(ctx, ctx->level);
}
--
2.49.0
^ permalink raw reply related
* [PATCH v2 8/8] arm64: dts: amlogic: t7: khadas-vim4: Add i2c MCU fan node
From: Ronald Claveau @ 2026-04-03 16:08 UTC (permalink / raw)
To: Neil Armstrong, Lee Jones, Rob Herring, Krzysztof Kozlowski,
Conor Dooley, Andi Shyti, Kevin Hilman, Jerome Brunet,
Martin Blumenstingl, Beniamino Galvani, Rafael J. Wysocki,
Daniel Lezcano, Zhang Rui, Lukasz Luba, Liam Girdwood, Mark Brown
Cc: linux-amlogic, devicetree, linux-kernel, linux-i2c,
linux-arm-kernel, linux-pm, Ronald Claveau
In-Reply-To: <20260403-add-mcu-fan-khadas-vim4-v2-0-70536b22439a@aliel.fr>
Enable and configure i2c MCU node to get fan working on Khadas VIM4.
Signed-off-by: Ronald Claveau <linux-kernel-dev@aliel.fr>
---
.../boot/dts/amlogic/amlogic-t7-a311d2-khadas-vim4.dts | 13 +++++++++++++
1 file changed, 13 insertions(+)
diff --git a/arch/arm64/boot/dts/amlogic/amlogic-t7-a311d2-khadas-vim4.dts b/arch/arm64/boot/dts/amlogic/amlogic-t7-a311d2-khadas-vim4.dts
index 69d6118ba57e7..5d7f5390f3a66 100644
--- a/arch/arm64/boot/dts/amlogic/amlogic-t7-a311d2-khadas-vim4.dts
+++ b/arch/arm64/boot/dts/amlogic/amlogic-t7-a311d2-khadas-vim4.dts
@@ -157,6 +157,19 @@ wifi32k: wifi32k {
};
};
+&i2c_m_ao_a {
+ status = "okay";
+ pinctrl-0 = <&i2c0_ao_d_pins>;
+ pinctrl-names = "default";
+
+ khadas_mcu: system-controller@18 {
+ compatible = "khadas,vim4-mcu";
+ reg = <0x18>;
+ fan-supply = <&vcc5v>;
+ #cooling-cells = <2>;
+ };
+};
+
&pwm_ab {
status = "okay";
pinctrl-0 = <&pwm_a_pins>;
--
2.49.0
^ permalink raw reply related
* [PATCH] PM / wakeup: Register class wakeup in pure_initcall phase
From: Heiner Kallweit @ 2026-04-03 16:09 UTC (permalink / raw)
To: Rafael J. Wysocki, Pavel Machek, Len Brown, Greg Kroah-Hartman,
Danilo Krummrich
Cc: Linux PM, driver-core
Wakeup sources (e.g. autosleep) can be created as early as core_initcall.
Class wakeup is registered in postcore_initcall only, what results in
autosleep wakeup source not being shown in sysfs. To fix this,
register class wakeup in pure_initcall phase already.
The current behavior doesn't cause any known issue, therefore treat
the change as an improvement.
Signed-off-by: Heiner Kallweit <hkallweit1@gmail.com>
---
drivers/base/power/wakeup_stats.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/base/power/wakeup_stats.c b/drivers/base/power/wakeup_stats.c
index 308f8bde9..30f0d3d61 100644
--- a/drivers/base/power/wakeup_stats.c
+++ b/drivers/base/power/wakeup_stats.c
@@ -216,4 +216,4 @@ static int __init wakeup_sources_sysfs_init(void)
return PTR_ERR_OR_ZERO(wakeup_class);
}
-postcore_initcall(wakeup_sources_sysfs_init);
+pure_initcall(wakeup_sources_sysfs_init);
--
2.53.0
^ permalink raw reply related
* Re: [PATCH v10 00/12] barrier: Add smp_cond_load_{relaxed, acquire}_timeout()
From: Okanovic, Haris @ 2026-04-03 16:12 UTC (permalink / raw)
To: ankur.a.arora@oracle.com
Cc: joao.m.martins@oracle.com, xueshuai@linux.alibaba.com,
david.laight.linux@gmail.com, boris.ostrovsky@oracle.com,
memxor@gmail.com, zhenglifeng1@huawei.com, konrad.wilk@oracle.com,
cl@gentwo.org, akpm@linux-foundation.org,
linux-kernel@vger.kernel.org, catalin.marinas@arm.com,
ast@kernel.org, rdunlap@infradead.org, daniel.lezcano@linaro.org,
arnd@arndb.de, linux-arch@vger.kernel.org, will@kernel.org,
mark.rutland@arm.com, peterz@infradead.org, bpf@vger.kernel.org,
linux-arm-kernel@lists.infradead.org, Okanovic, Haris,
rafael@kernel.org, linux-pm@vger.kernel.org
In-Reply-To: <20260316013651.3225328-1-ankur.a.arora@oracle.com>
Can we merge this series? I think there's an approval from every named
maintainer at this point.
Besides `perf sched` microbenchamrk that Ankur has been running, I've
observed 4-6% improvements in memcahed, cassandra, mysql, and
postgresql under certain loads. Other applications likely benefit too.
Thanks,
Haris Okanovic
AWS Graviton Software
On Sun, 2026-03-15 at 18:36 -0700, Ankur Arora wrote:
> CAUTION: This email originated from outside of the organization. Do not click links or open attachments unless you can confirm the sender and know the content is safe.
>
>
>
> Hi,
>
> This series adds waited variants of the smp_cond_load() primitives:
> smp_cond_load_relaxed_timeout(), and smp_cond_load_acquire_timeout().
>
> With this version, the main remaining things are:
>
> - Review by PeterZ of the new interface tif_need_resched_relaxed_wait()
> (patch 11, "sched: add need-resched timed wait interface").
>
> - Review of the BPF changes. This version simplifies the rqspinlock
> changes by reusing the original error handling path
> (patches 9, 10 "bpf/rqspinlock: switch check_timeout() to a clock
> interface", "bpf/rqspinlock: Use smp_cond_load_acquire_timeout()").
>
> - Review of WFET handling. (patch 4, "arm64: support WFET in
> smp_cond_load_relaxed_timeout()").
>
> The new interfaces are meant for contexts where you want to wait on a
> condition variable for a finite duration. This is easy enough to do with
> a loop around cpu_relax(). There are, however, architectures (ex. arm64)
> that allow waiting on a cacheline instead.
>
> So, these interfaces handle a mixture of spin/wait with a
> smp_cond_load() thrown in. The interfaces are:
>
> smp_cond_load_relaxed_timeout(ptr, cond_expr, time_expr, timeout)
> smp_cond_load_acquire_timeout(ptr, cond_expr, time_expr, timeout)
>
> The parameters, time_expr, timeout determine when to bail out.
>
> Also add tif_need_resched_relaxed_wait() which wraps the pattern used
> in poll_idle() and abstracts out details of the interface and those
> of the scheduler.
>
> In addition add atomic_cond_read_*_timeout(), atomic64_cond_read_*_timeout(),
> and atomic_long wrappers to the interfaces.
>
> Finally update poll_idle() and resilient queued spinlocks to use them.
>
> Changelog:
> v9 [9]:
> - s/@cond/@cond_expr/ (Randy Dunlap)
> - Clarify that SMP_TIMEOUT_POLL_COUNT is only around memory
> addresses. (David Laight)
> - Add the missing config ARCH_HAS_CPU_RELAX in arch/arm64/Kconfig.
> (Catalin Marinas).
> - Switch to arch_counter_get_cntvct_stable() (via __delay_cycles())
> in the cmpwait path instead of using arch_timer_read_counter().
> (Catalin Marinas)
>
> v8 [0]:
> - Defer evaluation of @time_expr_ns to when we hit the slowpath.
> (comment from Alexei Starovoitov).
>
> - Mention that cpu_poll_relax() is better than raw CPU polling
> only where ARCH_HAS_CPU_RELAX is defined.
> - also define ARCH_HAS_CPU_RELAX for arm64.
> (Came out of a discussion with Will Deacon.)
>
> - Split out WFET and WFE handling. I was doing both of these
> in a common handler.
> (From Will Deacon and in an earlier revision by Catalin Marinas.)
>
> - Add mentions of atomic_cond_read_{relaxed,acquire}(),
> atomic_cond_read_{relaxed,acquire}_timeout() in
> Documentation/atomic_t.txt.
>
> - Use the BIT() macro to do the checking in tif_bitset_relaxed_wait().
>
> - Cleanup unnecessary assignments, casts etc in poll_idle().
> (From Rafael Wysocki.)
>
> - Fixup warnings from kernel build robot
>
>
> v7 [1]:
> - change the interface to separately provide the timeout. This is
> useful for supporting WFET and similar primitives which can do
> timed waiting (suggested by Arnd Bergmann).
>
> - Adapting rqspinlock code to this changed interface also
> necessitated allowing time_expr to fail.
> - rqspinlock changes to adapt to the new smp_cond_load_acquire_timeout().
>
> - add WFET support (suggested by Arnd Bergmann).
> - add support for atomic-long wrappers.
> - add a new scheduler interface tif_need_resched_relaxed_wait() which
> encapsulates the polling logic used by poll_idle().
> - interface suggested by (Rafael J. Wysocki).
>
>
> v6 [2]:
> - fixup missing timeout parameters in atomic64_cond_read_*_timeout()
> - remove a race between setting of TIF_NEED_RESCHED and the call to
> smp_cond_load_relaxed_timeout(). This would mean that dev->poll_time_limit
> would be set even if we hadn't spent any time waiting.
> (The original check compared against local_clock(), which would have been
> fine, but I was instead using a cheaper check against _TIF_NEED_RESCHED.)
> (Both from meta-CI bot)
>
>
> v5 [3]:
> - use cpu_poll_relax() instead of cpu_relax().
> - instead of defining an arm64 specific
> smp_cond_load_relaxed_timeout(), just define the appropriate
> cpu_poll_relax().
> - re-read the target pointer when we exit due to the time-check.
> - s/SMP_TIMEOUT_SPIN_COUNT/SMP_TIMEOUT_POLL_COUNT/
> (Suggested by Will Deacon)
>
> - add atomic_cond_read_*_timeout() and atomic64_cond_read_*_timeout()
> interfaces.
> - rqspinlock: use atomic_cond_read_acquire_timeout().
> - cpuidle: use smp_cond_load_relaxed_tiemout() for polling.
> (Suggested by Catalin Marinas)
>
> - rqspinlock: define SMP_TIMEOUT_POLL_COUNT to be 16k for non arm64
>
>
> v4 [4]:
> - naming change 's/timewait/timeout/'
> - resilient spinlocks: get rid of res_smp_cond_load_acquire_waiting()
> and fixup use of RES_CHECK_TIMEOUT().
> (Both suggested by Catalin Marinas)
>
> v3 [5]:
> - further interface simplifications (suggested by Catalin Marinas)
>
> v2 [6]:
> - simplified the interface (suggested by Catalin Marinas)
> - get rid of wait_policy, and a multitude of constants
> - adds a slack parameter
> This helped remove a fair amount of duplicated code duplication and in
> hindsight unnecessary constants.
>
> v1 [7]:
> - add wait_policy (coarse and fine)
> - derive spin-count etc at runtime instead of using arbitrary
> constants.
>
> Haris Okanovic tested v4 of this series with poll_idle()/haltpoll patches. [8]
>
> Comments appreciated!
>
> Thanks
> Ankur
>
> [0] https://lore.kernel.org/lkml/20251215044919.460086-1-ankur.a.arora@oracle.com/
> [1] https://lore.kernel.org/lkml/20251028053136.692462-1-ankur.a.arora@oracle.com/
> [2] https://lore.kernel.org/lkml/20250911034655.3916002-1-ankur.a.arora@oracle.com/
> [3] https://lore.kernel.org/lkml/20250911034655.3916002-1-ankur.a.arora@oracle.com/
> [4] https://lore.kernel.org/lkml/20250829080735.3598416-1-ankur.a.arora@oracle.com/
> [5] https://lore.kernel.org/lkml/20250627044805.945491-1-ankur.a.arora@oracle.com/
> [6] https://lore.kernel.org/lkml/20250502085223.1316925-1-ankur.a.arora@oracle.com/
> [7] https://lore.kernel.org/lkml/20250203214911.898276-1-ankur.a.arora@oracle.com/
> [8] https://lore.kernel.org/lkml/2cecbf7fb23ee83a4ce027e1be3f46f97efd585c.camel@amazon.com/
> [9] https://lore.kernel.org/lkml/20260209023153.2661784-1-ankur.a.arora@oracle.com/
>
> Cc: Arnd Bergmann <arnd@arndb.de>
> Cc: Will Deacon <will@kernel.org>
> Cc: Catalin Marinas <catalin.marinas@arm.com>
> Cc: Peter Zijlstra <peterz@infradead.org>
> Cc: "Rafael J. Wysocki" <rafael@kernel.org>
> Cc: Daniel Lezcano <daniel.lezcano@linaro.org>
> Cc: Kumar Kartikeya Dwivedi <memxor@gmail.com>
> Cc: Alexei Starovoitov <ast@kernel.org>
> Cc: bpf@vger.kernel.org
> Cc: linux-arch@vger.kernel.org
> Cc: linux-arm-kernel@lists.infradead.org
> Cc: linux-pm@vger.kernel.org
>
> Ankur Arora (12):
> asm-generic: barrier: Add smp_cond_load_relaxed_timeout()
> arm64: barrier: Support smp_cond_load_relaxed_timeout()
> arm64/delay: move some constants out to a separate header
> arm64: support WFET in smp_cond_load_relaxed_timeout()
> arm64: rqspinlock: Remove private copy of
> smp_cond_load_acquire_timewait()
> asm-generic: barrier: Add smp_cond_load_acquire_timeout()
> atomic: Add atomic_cond_read_*_timeout()
> locking/atomic: scripts: build atomic_long_cond_read_*_timeout()
> bpf/rqspinlock: switch check_timeout() to a clock interface
> bpf/rqspinlock: Use smp_cond_load_acquire_timeout()
> sched: add need-resched timed wait interface
> cpuidle/poll_state: Wait for need-resched via
> tif_need_resched_relaxed_wait()
>
> Documentation/atomic_t.txt | 14 +++--
> arch/arm64/Kconfig | 3 +
> arch/arm64/include/asm/barrier.h | 23 +++++++
> arch/arm64/include/asm/cmpxchg.h | 62 +++++++++++++++----
> arch/arm64/include/asm/delay-const.h | 27 +++++++++
> arch/arm64/include/asm/rqspinlock.h | 85 --------------------------
> arch/arm64/lib/delay.c | 15 ++---
> drivers/cpuidle/poll_state.c | 21 +------
> drivers/soc/qcom/rpmh-rsc.c | 8 +--
> include/asm-generic/barrier.h | 90 ++++++++++++++++++++++++++++
> include/linux/atomic.h | 10 ++++
> include/linux/atomic/atomic-long.h | 18 +++---
> include/linux/sched/idle.h | 29 +++++++++
> kernel/bpf/rqspinlock.c | 77 +++++++++++++++---------
> scripts/atomic/gen-atomic-long.sh | 16 +++--
> 15 files changed, 320 insertions(+), 178 deletions(-)
> create mode 100644 arch/arm64/include/asm/delay-const.h
>
> --
> 2.31.1
>
--
Regards,
Haris Okanovic
AWS Graviton Software
^ permalink raw reply
* Re: [PATCH v3 0/3] thermal: spacemit: Add support for SpacemiT K1 SoC thermal sensor
From: Gong Shuai @ 2026-04-03 16:28 UTC (permalink / raw)
To: Vincent Legoll, alex, aou, conor+dt, daniel.lezcano, devicetree,
dlan, krzk+dt, krzysztof.kozlowski, linux-kernel, linux-pm,
linux-riscv, lukasz.luba, p.zabel, palmer, pjw, rafael, robh,
rui.zhang, spacemit
In-Reply-To: <36cca49c-0cb4-42b9-87ae-702df825a4a4@online.fr>
Hi Vincent,
On 4/3/2026 8:57 PM, Vincent Legoll wrote:> Hello,
>
>
> I applied this series on top of:
>
> - next-20260402
>
> -
>
https://patchwork.kernel.org/project/linux-riscv/patch/20260330-orangepi-sd-card-uhs-v5-9-bd853604322d@gmail.com/
>
> I need the the SD card series to boot from SD card.
>
You're absolutely right! We do need the SD series patches.
I'm actually building the mainline kernel using meta-riscv, and it's
default config
already includes those patches. I completely forgot about that.
Thanks.
>
> I'm also seeing the same "it works" state as Gong Shuai.
>
> So you can add another:
>
> Tested-by: Vincent Legoll <legoll@online.fr> # OrangePi-RV2
>
> Regards
>
> Thanks
>
^ permalink raw reply
* Re: [PATCH v3 0/2] Support BPF traversal of wakeup sources
From: Samuel Wu @ 2026-04-03 16:28 UTC (permalink / raw)
To: Greg Kroah-Hartman
Cc: Rafael J. Wysocki, Pavel Machek, Len Brown, Danilo Krummrich,
Alexei Starovoitov, Daniel Borkmann, Andrii Nakryiko,
Martin KaFai Lau, Eduard Zingerman, Kumar Kartikeya Dwivedi,
Song Liu, Yonghong Song, Jiri Olsa, Shuah Khan, kernel-team,
linux-kernel, linux-pm, driver-core, bpf, linux-kselftest
In-Reply-To: <2026040357-undefined-gave-c98e@gregkh>
On Fri, Apr 3, 2026 at 3:04 AM Greg Kroah-Hartman
<gregkh@linuxfoundation.org> wrote:
>
> On Thu, Apr 02, 2026 at 12:37:12PM -0700, Samuel Wu wrote:
> > On Wed, Apr 1, 2026 at 9:06 PM Greg Kroah-Hartman
> > <gregkh@linuxfoundation.org> wrote:
> > >
> > > On Wed, Apr 01, 2026 at 12:07:12PM -0700, Samuel Wu wrote:
> > > > On Wed, Apr 1, 2026 at 2:15 AM Greg Kroah-Hartman
> > > > <gregkh@linuxfoundation.org> wrote:
> > > > >
> > > > > On Tue, Mar 31, 2026 at 08:34:09AM -0700, Samuel Wu wrote:
[ ... ]
> > The data is fundamental for debugging and improving power at scale.
> > The original discussion and patch [1] provide more context of the
> > intent. To summarize the history, debugfs was unstable and insecure,
> > leading to the current sysfs implementation. However, sysfs has the
> > constraint of one attribute per node, requiring 10 sysfs accesses per
> > wakeup source.
>
> Ok, as the sysfs api doesn't work your use case anymore, why do we need
> to keep it around at all?
>
> > That said, I completely agree that reading 1500+ sysfs files at once
> > is unreasonable. Perhaps the sysfs approach was manageable at the time
> > of [1], but moving forward we need a more scalable solution. This is
> > the main motivator and makes BPF the sane approach, as it improves
> > traversal in nearly every aspect (e.g. cycles, memory, simplicity,
> > scalability).
>
> I'm all for making this more scalable and work for your systems now, but
> consider if you could drop the sysfs api entirely, would you want this
> to be a different type of api entirely instead of having to plug through
> these using ebpf?
Almost all use cases want all this data at once, so AFAICT BPF offers
the best performance for that. But of course, open to discussion if
there is an alternative API that matches BPF's performance for this
use case.
I'm not opposed to dropping the sysfs approach, and I attempted to do
so in the v1 patch [1]. I'm not sure who else currently uses those
sysfs nodes, but a config flag should remove friction and could be a
stepping stone toward deprecation/removal.
[1]: https://lore.kernel.org/all/20260320160055.4114055-3-wusamuel@google.com/
Thanks!
-- Sam
^ permalink raw reply
* Re: [PATCH v2 1/2] cpuidle: governors: menu: Refine stopped tick handling
From: Ionut Nechita (Wind River) @ 2026-04-03 17:07 UTC (permalink / raw)
To: rafael
Cc: aboorvad, christian.loehle, dsmythies, linux-kernel, linux-pm,
sunlightlinux
In-Reply-To: <3341782.5fSG56mABF@rafael.j.wysocki>
On Mon, 23 Feb 2026 16:38:55 +0100, Rafael J. Wysocki wrote:
> Update the menu governor in accordance with the above and use twice
> the tick period length as the "safe timer range" for allowing the
> original predicted_ns value to be used even if the tick has been
> stopped.
Tested this on 6.12.79-rt17 with isolated CPUs (nohz_full=1-16,
isolcpus=nohz,domain,managed_irq,1-16) on Intel Xeon Gold 6338N.
cyclictest --priority 95 --nsecs --duration 600 --affinity 1-15
--threads 15 --mainaffinity 0
Before (6.12.79-rt17 without patch):
Avg: ~1780ns, Max T:3-T:8: 9300-9700ns
After (6.12.79-rt17 + this patch):
Avg: ~1790ns, Max T:3-T:14: 5200-6100ns
The patch reduces worst-case latency on threads T:3-T:14 from
~9500ns to ~5800ns on isolated CPUs with nohz_full. T:0-T:2 still
show occasional higher spikes (9400-10700ns) but the overall tail
latency improvement is clear.
Tested-by: Ionut Nechita <sunlightlinux@gmail.com>
^ permalink raw reply
* Re: [PATCH] PM / wakeup: Register class wakeup in pure_initcall phase
From: Greg Kroah-Hartman @ 2026-04-03 17:24 UTC (permalink / raw)
To: Heiner Kallweit
Cc: Rafael J. Wysocki, Pavel Machek, Len Brown, Danilo Krummrich,
Linux PM, driver-core
In-Reply-To: <ac269a91-0ba0-4798-acc6-ec294926bbb0@gmail.com>
On Fri, Apr 03, 2026 at 06:09:59PM +0200, Heiner Kallweit wrote:
> Wakeup sources (e.g. autosleep) can be created as early as core_initcall.
> Class wakeup is registered in postcore_initcall only, what results in
> autosleep wakeup source not being shown in sysfs. To fix this,
> register class wakeup in pure_initcall phase already.
>
> The current behavior doesn't cause any known issue, therefore treat
> the change as an improvement.
>
> Signed-off-by: Heiner Kallweit <hkallweit1@gmail.com>
> ---
> drivers/base/power/wakeup_stats.c | 2 +-
> 1 file changed, 1 insertion(+), 1 deletion(-)
>
> diff --git a/drivers/base/power/wakeup_stats.c b/drivers/base/power/wakeup_stats.c
> index 308f8bde9..30f0d3d61 100644
> --- a/drivers/base/power/wakeup_stats.c
> +++ b/drivers/base/power/wakeup_stats.c
> @@ -216,4 +216,4 @@ static int __init wakeup_sources_sysfs_init(void)
>
> return PTR_ERR_OR_ZERO(wakeup_class);
> }
> -postcore_initcall(wakeup_sources_sysfs_init);
> +pure_initcall(wakeup_sources_sysfs_init);
> --
> 2.53.0
>
Wait, if this doesn't need to be change, we shouldn't change it. Unless
you have a bug that is being fixed here, don't touch init call levels.
They are tricky and will come to bite you if you don't watch out.
thanks,
greg k-h
^ permalink raw reply
* [PATCH v9 0/3] PCI: Add support for PCIe WAKE# interrupt
From: Krishna Chaitanya Chundru @ 2026-04-03 17:33 UTC (permalink / raw)
To: Rafael J. Wysocki, Len Brown, Pavel Machek, Greg Kroah-Hartman,
Danilo Krummrich, Bjorn Helgaas, Bartosz Golaszewski,
Linus Walleij, Bartosz Golaszewski, Rob Herring, Saravana Kannan,
Linus Walleij
Cc: linux-pm, linux-kernel, linux-pci, linux-gpio, quic_vbadigan,
sherry.sun, driver-core, devicetree, Krishna Chaitanya Chundru,
Manivannan Sadhasivam, Bartosz Golaszewski
PCIe WAKE# interrupt is needed for bringing back PCIe device state from
D3cold to D0.
This is pending from long time, there was two attempts done previously to
add WAKE# support[1], [2]. Those series tried to add support for legacy
interrupts along with WAKE#. Legacy interrupts are already available in
the latest kernel and we can ignore them. For the wake IRQ the series is
trying to use interrupts property define in the device tree.
This series is using gpio property instead of interrupts, from
gpio desc driver will allocate the dedicate IRQ.
Bjorn,
Can you take this series through PCI branch, once other subsystem
maintainers give us ACK.
WAKE# is added in dts schema and merged based on this patch.
https://lore.kernel.org/all/20250515090517.3506772-1-krishna.chundru@oss.qualcomm.com/
[1]: https://lore.kernel.org/all/b2b91240-95fe-145d-502c-d52225497a34@nvidia.com/T/
[2]: https://lore.kernel.org/all/20171226023646.17722-1-jeffy.chen@rock-chips.com/
Signed-off-by: Krishna Chaitanya Chundru <krishna.chundru@oss.qualcomm.com>
---
Changes in v9:
- Call device_init_wakeup() only if
dev_pm_set_dedicated_shared_wake_irq() succeeds (Mani).
- Change the IRQ_TYPE from IRQ_TYPE_EDGE_FALLING to IRQ_TYPE_LEVEL_LOW (Mani).
- Link to v8: https://lore.kernel.org/r/20260313-wakeirq_support-v8-0-48a0a702518a@oss.qualcomm.com
Changes in v8:
- Moved the stub functions under CONFIG_OF_IRQ(mani).
- Added the description of how dev_pm_set_dedicated_shared_wake_irq()
works.
- Link to v7: https://lore.kernel.org/r/20260218-wakeirq_support-v7-0-0d4689830207@oss.qualcomm.com
Changes in v7:
- Updated the commit text (Mani).
- Couple of nits like using pci_err instead of dev_err,
use platform_pci_configure_wake(), platform_pci_remove_wake() instead
of calling directly calling pci_configure_of_wake_gpio() & pci_remove_of_wake_gpio() etc (Mani).
- Add a new fwnode_gpiod_get() API that wraps fwnode_gpiod_get_index(..0..), similar to
devm_fwnode_gpiod_get() (Mani).
- Link to v6: https://lore.kernel.org/r/20251127-wakeirq_support-v6-0-60f581f94205@oss.qualcomm.com
Changes in v6:
- Change the name to dev_pm_set_dedicated_shared_wake_irq() and make the
changes pointed by (Rafael).
- Link to v5: https://lore.kernel.org/r/20251107-wakeirq_support-v5-0-464e17f2c20c@oss.qualcomm.com
Changes in v5:
- Enable WAKE# irq only when there is wake -gpios defined in its device
tree node (Bjorn).
- For legacy bindings for direct atach check in root port if we haven't
find the wake in the endpoint node.
- Instead of hooking wake in driver bound case, do it in the framework
irrespective of the driver state (Bjorn).
- Link to v4: https://lore.kernel.org/r/20250801-wake_irq_support-v4-0-6b6639013a1a@oss.qualcomm.com
Changes in v4:
- Move wake from portdrv to core framework to endpoint (Bjorn).
- Added support for multiple WAKE# case (Bjorn). But traverse from
endpoint upstream port to root port till you get WAKE#. And use
IRQF_SHARED flag for requesting interrupts.
- Link to v3: https://lore.kernel.org/r/20250605-wake_irq_support-v3-0-7ba56dc909a5@oss.qualcomm.com
Changes in v3:
- Update the commit messages, function names etc as suggested by Mani.
- return wake_irq if returns error (Neil).
- Link to v2: https://lore.kernel.org/r/20250419-wake_irq_support-v2-0-06baed9a87a1@oss.qualcomm.com
Changes in v2:
- Move the wake irq teardown after pcie_port_device_remove
and move of_pci_setup_wake_irq before pcie_link_rcec (Lukas)
- teardown wake irq in shutdown also.
- Link to v1: https://lore.kernel.org/r/20250401-wake_irq_support-v1-0-d2e22f4a0efd@oss.qualcomm.com
---
Krishna Chaitanya Chundru (3):
PM: sleep: wakeirq: Add support for dedicated shared wake IRQ setup
gpio: Add fwnode_gpiod_get() helper
PCI: Add support for PCIe WAKE# interrupt
drivers/base/power/wakeirq.c | 39 ++++++++++++++++++++---
drivers/pci/of.c | 74 +++++++++++++++++++++++++++++++++++++++++++
drivers/pci/pci.c | 10 ++++++
drivers/pci/pci.h | 2 ++
drivers/pci/probe.c | 2 ++
drivers/pci/remove.c | 1 +
include/linux/gpio/consumer.h | 9 ++++++
include/linux/of_pci.h | 4 +++
include/linux/pci.h | 2 ++
include/linux/pm_wakeirq.h | 6 ++++
10 files changed, 144 insertions(+), 5 deletions(-)
---
base-commit: d8a9a4b11a137909e306e50346148fc5c3b63f9d
change-id: 20251104-wakeirq_support-f54c4baa18c5
Best regards,
--
Krishna Chaitanya Chundru <krishna.chundru@oss.qualcomm.com>
^ permalink raw reply
* [PATCH v9 1/3] PM: sleep: wakeirq: Add support for dedicated shared wake IRQ setup
From: Krishna Chaitanya Chundru @ 2026-04-03 17:33 UTC (permalink / raw)
To: Rafael J. Wysocki, Len Brown, Pavel Machek, Greg Kroah-Hartman,
Danilo Krummrich, Bjorn Helgaas, Bartosz Golaszewski,
Linus Walleij, Bartosz Golaszewski, Rob Herring, Saravana Kannan,
Linus Walleij
Cc: linux-pm, linux-kernel, linux-pci, linux-gpio, quic_vbadigan,
sherry.sun, driver-core, devicetree, Krishna Chaitanya Chundru
In-Reply-To: <20260403-wakeirq_support-v9-0-1cbecf3b58d7@oss.qualcomm.com>
Some devices require more flexibility when configuring their dedicated
wake-up interrupts, such as support for IRQF_SHARED or other IRQ flags.
This is particularly useful in PCIe systems where multiple endpoints
(e.g., Wi-Fi and Bluetooth controllers) share a common WAKE# signal
line which requests platform to re-establish power and reference clocks
to the components. In such cases, drivers can use this new API
dev_pm_set_dedicated_shared_wake_irq() to register a shared wake IRQ.
Update the internal helper __dev_pm_set_dedicated_wake_irq() to accept an
irq_flags argument. Modify the existing dev_pm_set_dedicated_wake_irq()
and dev_pm_set_dedicated_wake_irq_reverse() to preserve current behavior.
When IRQ registered with IRQF_SHARED we can't use IRQF_NO_AUTOEN flag,
so after registering for irq, disable it explicitly.
Signed-off-by: Krishna Chaitanya Chundru <krishna.chundru@oss.qualcomm.com>
Acked-by: Rafael J. Wysocki (Intel) <rafael@kernel.org>
---
drivers/base/power/wakeirq.c | 39 ++++++++++++++++++++++++++++++++++-----
include/linux/pm_wakeirq.h | 6 ++++++
2 files changed, 40 insertions(+), 5 deletions(-)
diff --git a/drivers/base/power/wakeirq.c b/drivers/base/power/wakeirq.c
index ad23f0fa5d1a5a9eb49b1af2288ee4908082b13e..b7b106f55559a7c85cb35d9e5ed22fe37970662d 100644
--- a/drivers/base/power/wakeirq.c
+++ b/drivers/base/power/wakeirq.c
@@ -171,7 +171,8 @@ static irqreturn_t handle_threaded_wake_irq(int irq, void *_wirq)
return IRQ_HANDLED;
}
-static int __dev_pm_set_dedicated_wake_irq(struct device *dev, int irq, unsigned int flag)
+static int __dev_pm_set_dedicated_wake_irq(struct device *dev, int irq, unsigned int flag,
+ unsigned int irq_flags)
{
struct wake_irq *wirq;
int err;
@@ -200,8 +201,7 @@ static int __dev_pm_set_dedicated_wake_irq(struct device *dev, int irq, unsigned
* so we use a threaded irq.
*/
err = request_threaded_irq(irq, NULL, handle_threaded_wake_irq,
- IRQF_ONESHOT | IRQF_NO_AUTOEN,
- wirq->name, wirq);
+ IRQF_ONESHOT | irq_flags, wirq->name, wirq);
if (err)
goto err_free_name;
@@ -237,7 +237,7 @@ static int __dev_pm_set_dedicated_wake_irq(struct device *dev, int irq, unsigned
*/
int dev_pm_set_dedicated_wake_irq(struct device *dev, int irq)
{
- return __dev_pm_set_dedicated_wake_irq(dev, irq, 0);
+ return __dev_pm_set_dedicated_wake_irq(dev, irq, 0, IRQF_NO_AUTOEN);
}
EXPORT_SYMBOL_GPL(dev_pm_set_dedicated_wake_irq);
@@ -258,10 +258,39 @@ EXPORT_SYMBOL_GPL(dev_pm_set_dedicated_wake_irq);
*/
int dev_pm_set_dedicated_wake_irq_reverse(struct device *dev, int irq)
{
- return __dev_pm_set_dedicated_wake_irq(dev, irq, WAKE_IRQ_DEDICATED_REVERSE);
+ return __dev_pm_set_dedicated_wake_irq(dev, irq, WAKE_IRQ_DEDICATED_REVERSE,
+ IRQF_NO_AUTOEN);
}
EXPORT_SYMBOL_GPL(dev_pm_set_dedicated_wake_irq_reverse);
+/**
+ * dev_pm_set_dedicated_shared_wake_irq - Request a dedicated shared wake-up interrupt
+ * @dev: Device entry
+ * @irq: Device wake-up interrupt
+ * @flags: Custom IRQ flags (e.g., IRQ_TYPE_EDGE_FALLING)
+ *
+ * This API sets up a threaded interrupt handler for a device that has
+ * a shared wake-up interrupt in addition to the device IO interrupt. It also
+ * sets IRQ flags like IRQ_TYPE_EDGE_FALLING passed by the caller.
+ *
+ * Returns 0 on success or a negative error code on failure.
+ */
+int dev_pm_set_dedicated_shared_wake_irq(struct device *dev, int irq, unsigned long flags)
+{
+ struct wake_irq *wirq;
+ int ret;
+
+ ret = __dev_pm_set_dedicated_wake_irq(dev, irq, 0, IRQF_SHARED | flags);
+ if (ret)
+ return ret;
+
+ wirq = dev->power.wakeirq;
+ disable_irq_nosync(wirq->irq);
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(dev_pm_set_dedicated_shared_wake_irq);
+
/**
* dev_pm_enable_wake_irq_check - Checks and enables wake-up interrupt
* @dev: Device
diff --git a/include/linux/pm_wakeirq.h b/include/linux/pm_wakeirq.h
index 25b63ed51b765c2c6919f259668a12675330835e..61f1e840745b56baa57db37563e450cb2d757a85 100644
--- a/include/linux/pm_wakeirq.h
+++ b/include/linux/pm_wakeirq.h
@@ -11,6 +11,7 @@ extern int dev_pm_set_dedicated_wake_irq(struct device *dev, int irq);
extern int dev_pm_set_dedicated_wake_irq_reverse(struct device *dev, int irq);
extern void dev_pm_clear_wake_irq(struct device *dev);
extern int devm_pm_set_wake_irq(struct device *dev, int irq);
+extern int dev_pm_set_dedicated_shared_wake_irq(struct device *dev, int irq, unsigned long flags);
#else /* !CONFIG_PM */
@@ -38,5 +39,10 @@ static inline int devm_pm_set_wake_irq(struct device *dev, int irq)
return 0;
}
+static inline int dev_pm_set_dedicated_shared_wake_irq(struct device *dev,
+ int irq, unsigned long flags)
+{
+ return 0;
+}
#endif /* CONFIG_PM */
#endif /* _LINUX_PM_WAKEIRQ_H */
--
2.34.1
^ permalink raw reply related
* [PATCH v9 2/3] gpio: Add fwnode_gpiod_get() helper
From: Krishna Chaitanya Chundru @ 2026-04-03 17:33 UTC (permalink / raw)
To: Rafael J. Wysocki, Len Brown, Pavel Machek, Greg Kroah-Hartman,
Danilo Krummrich, Bjorn Helgaas, Bartosz Golaszewski,
Linus Walleij, Bartosz Golaszewski, Rob Herring, Saravana Kannan,
Linus Walleij
Cc: linux-pm, linux-kernel, linux-pci, linux-gpio, quic_vbadigan,
sherry.sun, driver-core, devicetree, Krishna Chaitanya Chundru,
Manivannan Sadhasivam, Bartosz Golaszewski
In-Reply-To: <20260403-wakeirq_support-v9-0-1cbecf3b58d7@oss.qualcomm.com>
Add fwnode_gpiod_get() as a convenience wrapper around
fwnode_gpiod_get_index() for the common case where only the
first GPIO is required.
This mirrors existing gpiod_get() and devm_gpiod_get() helpers
and avoids open-coding index 0 at call sites.
Suggested-by: Manivannan Sadhasivam <mani@kernel.org>
Acked-by: Manivannan Sadhasivam <mani@kernel.org>
Reviewed-by: Linus Walleij <linusw@kernel.org>
Acked-by: Bartosz Golaszewski <bartosz.golaszewski@oss.qualcomm.com>
Signed-off-by: Krishna Chaitanya Chundru <krishna.chundru@oss.qualcomm.com>
---
include/linux/gpio/consumer.h | 9 +++++++++
1 file changed, 9 insertions(+)
diff --git a/include/linux/gpio/consumer.h b/include/linux/gpio/consumer.h
index 0d8408582918680bfea6a04ccedfc0c75211907a..fee926c0262ce9dc4b9a3c151e74f2cf37470a49 100644
--- a/include/linux/gpio/consumer.h
+++ b/include/linux/gpio/consumer.h
@@ -596,6 +596,15 @@ static inline int gpiod_disable_hw_timestamp_ns(struct gpio_desc *desc,
}
#endif /* CONFIG_GPIOLIB && CONFIG_HTE */
+static inline
+struct gpio_desc *fwnode_gpiod_get(struct fwnode_handle *fwnode,
+ const char *con_id,
+ enum gpiod_flags flags,
+ const char *label)
+{
+ return fwnode_gpiod_get_index(fwnode, con_id, 0, flags, label);
+}
+
static inline
struct gpio_desc *devm_fwnode_gpiod_get(struct device *dev,
struct fwnode_handle *fwnode,
--
2.34.1
^ permalink raw reply related
* [PATCH v9 3/3] PCI: Add support for PCIe WAKE# interrupt
From: Krishna Chaitanya Chundru @ 2026-04-03 17:33 UTC (permalink / raw)
To: Rafael J. Wysocki, Len Brown, Pavel Machek, Greg Kroah-Hartman,
Danilo Krummrich, Bjorn Helgaas, Bartosz Golaszewski,
Linus Walleij, Bartosz Golaszewski, Rob Herring, Saravana Kannan,
Linus Walleij
Cc: linux-pm, linux-kernel, linux-pci, linux-gpio, quic_vbadigan,
sherry.sun, driver-core, devicetree, Krishna Chaitanya Chundru
In-Reply-To: <20260403-wakeirq_support-v9-0-1cbecf3b58d7@oss.qualcomm.com>
According to the PCI Express specification (PCIe r7.0, Section 5.3.3.2),
two link wakeup mechanisms are defined: Beacon and WAKE#. Beacon is a
hardware-only mechanism and is invisible to software (PCIe r7.0,
Section 4.2.7.8.1). This change adds support for the WAKE# mechanism in
the PCI core.
According to the PCIe specification, multiple WAKE# signals can exist in
a system or each component in the hierarchy could share a single WAKE#
signal. In configurations involving a PCIe switch, each downstream port
(DSP) of the switch may be connected to a separate WAKE# line, allowing
each endpoint to signal WAKE# independently. From figure 5.4 in sec
5.3.3.2, WAKE# can also be terminated at the switch itself. To support
this, the WAKE# should be described in the device tree node of the
endpoint/bridge. If all endpoints share a single WAKE# line, then each
endpoint node should describe the same WAKE# signal or a single WAKE# in
the Root Port node.
In pci_device_add(), PCI framework will search for the WAKE# in device
node, If not found, it searches in its upstream port only if upstream port
is Root Port. Once found, register for the wake IRQ in shared mode, as the
WAKE# may be shared among multiple endpoints.
dev_pm_set_dedicated_shared_wake_irq() associates a wakeup IRQ with a
device and requests it, but the PM core keeps the IRQ disabled by default.
The IRQ is enabled only when the device is permitted to wake the system,
i.e. during system suspend and after runtime suspend, and only when device
wakeup is enabled.
When the wake IRQ fires, the wakeirq handler invokes pm_runtime_resume() to
bring the device back to an active power state, such as transitioning from
D3cold to D0. Once the device is active and the link is usable, the
endpoint may generate a PME, which is then handled by the PCI core through
PME polling or the PCIe PME service driver to complete the wakeup of the
endpoint.
WAKE# is added in dts schema and merged based on below links.
Link: https://lore.kernel.org/all/20250515090517.3506772-1-krishna.chundru@oss.qualcomm.com/
Link: https://github.com/devicetree-org/dt-schema/pull/170
Reviewed-by: Linus Walleij <linus.walleij@linaro.org>
Signed-off-by: Krishna Chaitanya Chundru <krishna.chundru@oss.qualcomm.com>
---
drivers/pci/of.c | 74 ++++++++++++++++++++++++++++++++++++++++++++++++++
drivers/pci/pci.c | 10 +++++++
drivers/pci/pci.h | 2 ++
drivers/pci/probe.c | 2 ++
drivers/pci/remove.c | 1 +
include/linux/of_pci.h | 4 +++
include/linux/pci.h | 2 ++
7 files changed, 95 insertions(+)
diff --git a/drivers/pci/of.c b/drivers/pci/of.c
index 9f8eb5df279ed28db7a3b2fd29c65da9975c2efa..1678e82962b78ac206829a3a1fc121b0142b993b 100644
--- a/drivers/pci/of.c
+++ b/drivers/pci/of.c
@@ -7,6 +7,7 @@
#define pr_fmt(fmt) "PCI: OF: " fmt
#include <linux/cleanup.h>
+#include <linux/gpio/consumer.h>
#include <linux/irqdomain.h>
#include <linux/kernel.h>
#include <linux/pci.h>
@@ -15,6 +16,7 @@
#include <linux/of_address.h>
#include <linux/of_pci.h>
#include <linux/platform_device.h>
+#include <linux/pm_wakeirq.h>
#include "pci.h"
#ifdef CONFIG_PCI
@@ -586,6 +588,78 @@ int of_irq_parse_and_map_pci(const struct pci_dev *dev, u8 slot, u8 pin)
return irq_create_of_mapping(&oirq);
}
EXPORT_SYMBOL_GPL(of_irq_parse_and_map_pci);
+
+static void pci_configure_wake_irq(struct pci_dev *pdev, struct gpio_desc *wake)
+{
+ int ret, wake_irq;
+
+ wake_irq = gpiod_to_irq(wake);
+ if (wake_irq < 0) {
+ pci_err(pdev, "Failed to get wake irq: %d\n", wake_irq);
+ return;
+ }
+
+ /*
+ * dev_pm_set_dedicated_shared_wake_irq() associates a wakeup IRQ with the
+ * device and requests it, but the PM core keeps it disabled by default.
+ * The IRQ is enabled only when the device is allowed to wake the system
+ * (during system suspend and after runtime suspend), and only if device
+ * wakeup is enabled.
+ *
+ * When the wake IRQ fires, the wakeirq handler invokes pm_runtime_resume()
+ * to bring the device back to an active power state (e.g. from D3cold to D0).
+ * Once the device is active and the link is usable, the endpoint may signal
+ * a PME, which is then handled by the PCI core (either via PME polling or the
+ * PCIe PME service driver) to wakeup particular endpoint.
+ */
+ ret = dev_pm_set_dedicated_shared_wake_irq(&pdev->dev, wake_irq,
+ IRQ_TYPE_LEVEL_LOW);
+ if (ret < 0) {
+ pci_err(pdev, "Failed to set WAKE# IRQ: %d\n", ret);
+ return;
+ }
+
+ device_init_wakeup(&pdev->dev, true);
+}
+
+void pci_configure_of_wake_gpio(struct pci_dev *dev)
+{
+ struct device_node *dn = pci_device_to_OF_node(dev);
+ struct pci_dev *upstream;
+ struct gpio_desc *gpio;
+
+ if (!dn)
+ return;
+
+ /*
+ * The devices in a hierarchy expose wakeup capability through the 'wake-gpios'
+ * property defined either in the device node or in the Slot node. So first check
+ * for the property in device node and if not available, check in the Slot node.
+ */
+ gpio = fwnode_gpiod_get(of_fwnode_handle(dn), "wake",
+ GPIOD_IN | GPIOD_FLAGS_BIT_NONEXCLUSIVE, NULL);
+ if (IS_ERR(gpio)) {
+ upstream = pci_upstream_bridge(dev);
+ if (upstream && pci_is_root_bus(upstream->bus) && upstream->wake)
+ pci_configure_wake_irq(dev, upstream->wake);
+ } else {
+ dev->wake = gpio;
+ pci_configure_wake_irq(dev, gpio);
+ }
+}
+
+void pci_remove_of_wake_gpio(struct pci_dev *dev)
+{
+ struct device_node *dn = pci_device_to_OF_node(dev);
+
+ if (!dn)
+ return;
+
+ dev_pm_clear_wake_irq(&dev->dev);
+ device_init_wakeup(&dev->dev, false);
+ gpiod_put(dev->wake);
+ dev->wake = NULL;
+}
#endif /* CONFIG_OF_IRQ */
static int pci_parse_request_of_pci_ranges(struct device *dev,
diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
index 8479c2e1f74f1044416281aba11bf071ea89488a..3d858f36ab48a6daec645574ca9027d9d6f071de 100644
--- a/drivers/pci/pci.c
+++ b/drivers/pci/pci.c
@@ -17,6 +17,7 @@
#include <linux/lockdep.h>
#include <linux/msi.h>
#include <linux/of.h>
+#include <linux/of_pci.h>
#include <linux/pci.h>
#include <linux/pm.h>
#include <linux/slab.h>
@@ -1123,6 +1124,15 @@ static inline bool platform_pci_bridge_d3(struct pci_dev *dev)
return acpi_pci_bridge_d3(dev);
}
+void platform_pci_configure_wake(struct pci_dev *dev)
+{
+ return pci_configure_of_wake_gpio(dev);
+}
+
+void platform_pci_remove_wake(struct pci_dev *dev)
+{
+ return pci_remove_of_wake_gpio(dev);
+}
/**
* pci_update_current_state - Read power state of given device and cache it
* @dev: PCI device to handle.
diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h
index 13d998fbacce6698514d92500dfea03cc562cdc2..65ca9551e558d2e3331fab0a968620d6b2a2522a 100644
--- a/drivers/pci/pci.h
+++ b/drivers/pci/pci.h
@@ -282,6 +282,8 @@ void pci_msix_init(struct pci_dev *dev);
bool pci_bridge_d3_possible(struct pci_dev *dev);
void pci_bridge_d3_update(struct pci_dev *dev);
int pci_bridge_wait_for_secondary_bus(struct pci_dev *dev, char *reset_type);
+void platform_pci_configure_wake(struct pci_dev *dev);
+void platform_pci_remove_wake(struct pci_dev *dev);
static inline bool pci_bus_rrs_vendor_id(u32 l)
{
diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c
index bccc7a4bdd794384b7877d453c7989941471c999..372b0d2f4531ea53c0570608306a547101d59e7b 100644
--- a/drivers/pci/probe.c
+++ b/drivers/pci/probe.c
@@ -2771,6 +2771,8 @@ void pci_device_add(struct pci_dev *dev, struct pci_bus *bus)
/* Establish pdev->tsm for newly added (e.g. new SR-IOV VFs) */
pci_tsm_init(dev);
+ platform_pci_configure_wake(dev);
+
pci_npem_create(dev);
pci_doe_sysfs_init(dev);
diff --git a/drivers/pci/remove.c b/drivers/pci/remove.c
index e9d519993853f92f1810d3eff9f44ca7e3e1abd9..d781b41e57c4444077075690cec926a9fe15334f 100644
--- a/drivers/pci/remove.c
+++ b/drivers/pci/remove.c
@@ -35,6 +35,7 @@ static void pci_destroy_dev(struct pci_dev *dev)
if (pci_dev_test_and_set_removed(dev))
return;
+ platform_pci_remove_wake(dev);
pci_doe_sysfs_teardown(dev);
pci_npem_remove(dev);
diff --git a/include/linux/of_pci.h b/include/linux/of_pci.h
index 29658c0ee71ff10122760214d04ee2bab01709fd..0efd6e9cb4d3d3beaafb42ea411303139f1150d5 100644
--- a/include/linux/of_pci.h
+++ b/include/linux/of_pci.h
@@ -30,12 +30,16 @@ static inline void of_pci_check_probe_only(void) { }
#if IS_ENABLED(CONFIG_OF_IRQ)
int of_irq_parse_and_map_pci(const struct pci_dev *dev, u8 slot, u8 pin);
+void pci_configure_of_wake_gpio(struct pci_dev *dev);
+void pci_remove_of_wake_gpio(struct pci_dev *dev);
#else
static inline int
of_irq_parse_and_map_pci(const struct pci_dev *dev, u8 slot, u8 pin)
{
return 0;
}
+static inline void pci_configure_of_wake_gpio(struct pci_dev *dev) { }
+static inline void pci_remove_of_wake_gpio(struct pci_dev *dev) { }
#endif
#endif
diff --git a/include/linux/pci.h b/include/linux/pci.h
index 1c270f1d512301de4d462fe7e5097c32af5c6f8d..d1e08df8a8deaa87780589f23242767fdcdba541 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -586,6 +586,8 @@ struct pci_dev {
/* These methods index pci_reset_fn_methods[] */
u8 reset_methods[PCI_NUM_RESET_METHODS]; /* In priority order */
+ struct gpio_desc *wake; /* Holds WAKE# gpio */
+
#ifdef CONFIG_PCIE_TPH
u16 tph_cap; /* TPH capability offset */
u8 tph_mode; /* TPH mode */
--
2.34.1
^ permalink raw reply related
* Re: [PATCH v20 06/10] power: reset: Add psci-reboot-mode driver
From: Shivendra Pratap @ 2026-04-03 17:45 UTC (permalink / raw)
To: Lorenzo Pieralisi
Cc: Arnd Bergmann, Bjorn Andersson, Sebastian Reichel, Rob Herring,
Souvik Chakravarty, Krzysztof Kozlowski, Andy Yan,
Matthias Brugger, Mark Rutland, Conor Dooley, Konrad Dybcio,
John Stultz, Moritz Fischer, Bartosz Golaszewski, Sudeep Holla,
Florian Fainelli, Krzysztof Kozlowski, Dmitry Baryshkov,
Mukesh Ojha, Andre Draszik, Kathiravan Thirumoorthy, linux-pm,
linux-kernel, linux-arm-kernel, linux-arm-msm, devicetree,
Srinivas Kandagatla
In-Reply-To: <ac/hru3IIiU0+Lp9@lpieralisi>
On 03-04-2026 21:20, Lorenzo Pieralisi wrote:
> On Fri, Apr 03, 2026 at 12:05:27AM +0530, Shivendra Pratap wrote:
>>
>>
>> On 01-04-2026 20:07, Lorenzo Pieralisi wrote:
>>> On Tue, Mar 31, 2026 at 11:30:09PM +0530, Shivendra Pratap wrote:
>>>>
>>>>
>>>> On 27-03-2026 19:25, Lorenzo Pieralisi wrote:
>>>>> On Wed, Mar 04, 2026 at 11:33:06PM +0530, Shivendra Pratap wrote:
>>>>>> PSCI supports different types of resets like COLD reset, ARCH WARM
>>
>> [snip..]
>>
>>>>>> + * Predefined reboot-modes are defined as per the values
>>>>>> + * of enum reboot_mode defined in the kernel: reboot.c.
>>>>>> + */
>>>>>> +static struct mode_info psci_resets[] = {
>>>>>> + { .mode = "warm", .magic = REBOOT_WARM},
>>>>>> + { .mode = "soft", .magic = REBOOT_SOFT},
>>>>>> + { .mode = "cold", .magic = REBOOT_COLD},
>>>
>>> These strings match the command userspace issue right ? I think that we
>>> should make them match the corresponding PSCI reset types, the list above
>>> maps command to reboot_mode values and those can belong to any reboot
>>> mode driver to be honest they don't make much sense in a PSCI reboot
>>> mode driver only.
>>>
>>> It is a question for everyone here: would it make sense to make these
>>> predefined resets a set of strings, eg:
>>>
>>> psci-system-reset
>>> psci-system-reset2-arch-warm-reset
>>>
>>> and then vendor resets:
>>>
>>> psci-system-reset2-vendor-reset
>>
>> Can you share bit more details on this? We are already defining the string
>> from userspace in the struct - eg: ".mode = "warm".
>
> "warm","soft","cold" are not strictly speaking PSCI concepts and mean nothing
> well defined to user space and even if they did, they would not belong in
> the PSCI reboot mode driver but in generic code.
>
> Spelling out what a reset is might help instead, again, this is just my
> opinion, I don't know how the semantics of resets have been handled thus
> far.
>
> If userspace issues a LINUX_REBOOT_CMD_RESTART2 with arg, say,
> "psci-system-reset2-arch-warm-reset" it is pretty clear what it wants
> to do in PSCI.
ok. got it.
so it predef-modes.
reboot psci-system-reset2-arch-warm-reset =>goes for => ARCH WARM RESET.
etc..
>
> Again, it is a suggestion, comments welcome.
>
>> yes we can move away from enum reboot_mode and use custom psci defines one -
>> Ack.
>>
>>>
>>
>> [snip ..]
>>
>>>>>> +
>>>>>> +/*
>>>>>> + * arg1 is reset_type(Low 32 bit of magic).
>>>>>> + * arg2 is cookie(High 32 bit of magic).
>>>>>> + * If reset_type is 0, cookie will be used to decide the reset command.
>>>>>> + */
>>>>>> +static int psci_reboot_mode_write(struct reboot_mode_driver *reboot, u64 magic)
>>>>>> +{
>>>>>> + u32 reset_type = REBOOT_MODE_ARG1(magic);
>>>>>> + u32 cookie = REBOOT_MODE_ARG2(magic);
>>>>>> +
>>>>>> + if (reset_type == 0) {
>>>>>> + if (cookie == REBOOT_WARM || cookie == REBOOT_SOFT)
>>>>>> + psci_set_reset_cmd(true, 0, 0);
>>>>>> + else
>>>>>> + psci_set_reset_cmd(false, 0, 0);
>>>>>> + } else {
>>>>>> + psci_set_reset_cmd(true, reset_type, cookie);
>>>>>> + }
>>>>>
>>>>> I don't think that psci_set_reset_cmd() has the right interface (and this
>>>>> nested if is too complicated for my taste). All we need to pass is reset-type
>>>>> and cookie (and if the reset is one of the predefined ones, reset-type is 0
>>>>> and cookie is the REBOOT_* cookie).
>>>>>
>>>>> Then the PSCI firmware driver will take the action according to what
>>>>> resets are available.
>>>>>
>>>>> How does it sound ?
>>>>
>>>> So we mean these checks will move to the psci driver? Sorry for re-iterating
>>>> the question.
>>>
>>> Given what I say above, I believe that something we can do is mapping the magic
>>> to an enum like:
>>>
>>> PSCI_SYSTEM_RESET
>>> PSCI_SYSTEM_RESET2_ARCH_SYSTEM_WARM_RESET
>>> PSCI_SYSTEM_RESET2_VENDOR_RESET
>>>
>>> and can add a probe function into PSCI driver similar to psci_has_osi_support() but
>>> to probe for SYSTEM_RESET2 and initialize the predefined strings accordingly,
>>> depending on its presence.
>>
>> Not able to get it cleanly.
>>
>> 1. Will move away from reboot_mode enum for pre-defined modes and define new
>> enum defining these modes- fine.
>> 2. get SYSTEM_RESET2 is supported from psci exported function -- fine, but
>> how we use it here now, as we do not want to send the reset_cmd from
>> psci_set_reset_cmd now?
>
> You do keep psci_set_reset_cmd() but all it is used for is setting a struct
> shared with the PSCI driver where you initialize the enum above, possibly
> with a cookie if it is a vendor reset.
>
>> 3. For pre-defined modes, warm/soft or cold - reset_type and cookie, both
>> are zero, sys_reset2 or sys_reset2 decides the ARCH reset vs cold reset.
>> 4. For vendor-rest , we use sys_reset2 with reset_type and cookie.
>
> Yes.
Ack.
>> All above is done in reboot_notifier call at psci-reboot-mode.
>> --
>>
>> Now in the final restart_notifier->psci_sys_reset --
>>
>> If panic is in progress, we do not use any of the cmd based reset params and
>> go with the legacy reset. So we need to preserve the values that were set
>> from psci-reboot-mode.
>>
>> Did not understand the proposed suggestion in above usecase. Need more input
>> on this.
>
> I explained above. The reboot mode driver sets the command to carry out
> depending on the string coming from user space and whether PSCI supports
> SYSTEM_RESET2 or not.
got it. working on it. thanks.
>> --
>>
>> One other option is to have a restart_notifier in psci-reboot-mode, with
>> lesser priority than psci_sys_rest and then handle all the case including
>> panic and sys_reset2.
>
> No.
Ack.
thanks,
Shivendra
^ permalink raw reply
* [GIT PULL] devfreq next for 7.1
From: Choi Chanwoo @ 2026-04-03 18:31 UTC (permalink / raw)
To: rafael
Cc: linux-kernel, linux-pm, myungjoo.ham, kyungmin.park, cw00.choi,
chanwoo
Dear Rafael,
This is devfreq-next pull request. I add detailed description of
this pull request on the following tag. Please pull devfreq with
following updates.
Best Regards,
Chanwoo Choi
The following changes since commit 7aaa8047eafd0bd628065b15757d9b48c5f9c07d:
Linux 7.0-rc6 (2026-03-29 15:40:00 -0700)
are available in the Git repository at:
git://git.kernel.org/pub/scm/linux/kernel/git/chanwoo/linux.git tags/devfreq-next-for-7.1
for you to fetch changes up to cd905830ea6184d6678386ce2d652bec324034d1:
PM / devfreq: tegra30-devfreq: add support for Tegra114 (2026-04-04 03:15:39 +0900)
----------------------------------------------------------------
Update devfreq next for v7.1
Detailed description for this pull request:
- Remove unneeded casting for HZ_PER_KHZ on devfreq.c
- Use _visible attribute to replace create/remove_sysfs_files() to fix sysfs
attribute race conditions on devfreq.c
- Add support for Tegra114 activity monitor device on tegra30-devfreq.c
----------------------------------------------------------------
Andy Shevchenko (1):
PM / devfreq: Remove unneeded casting for HZ_PER_KHZ
Pengjie Zhang (1):
PM / devfreq: use _visible attribute to replace create/remove_sysfs_files()
Svyatoslav Ryhel (1):
PM / devfreq: tegra30-devfreq: add support for Tegra114
drivers/devfreq/devfreq.c | 108 ++++++++++++++++++++++----------------
drivers/devfreq/tegra30-devfreq.c | 17 ++++--
2 files changed, 74 insertions(+), 51 deletions(-)
^ permalink raw reply
* Re: [GIT PULL] Thermal control fixes for v7.0-rc7
From: pr-tracker-bot @ 2026-04-03 19:52 UTC (permalink / raw)
To: Rafael J. Wysocki
Cc: Linus Torvalds, Linux PM, Linux Kernel Mailing List,
Daniel Lezcano
In-Reply-To: <CAJZ5v0g23pA9urxBundpRAb0O67=UFSM29SDiudXNa2S18sBZA@mail.gmail.com>
The pull request you sent on Fri, 3 Apr 2026 14:32:41 +0200:
> git://git.kernel.org/pub/scm/linux/kernel/git/rafael/linux-pm.git thermal-7.0-rc7
has been merged into torvalds/linux.git:
https://git.kernel.org/torvalds/c/576db0f37549a05d7b1d9b5d6ad9fcce9ad7bfd6
Thank you!
--
Deet-doot-dot, I am a bot.
https://korg.docs.kernel.org/prtracker.html
^ permalink raw reply
* Re: [GIT PULL] Power management fixes for v7.0-rc7
From: pr-tracker-bot @ 2026-04-03 19:52 UTC (permalink / raw)
To: Rafael J. Wysocki
Cc: Linus Torvalds, Linux PM, Linux Kernel Mailing List, Viresh Kumar
In-Reply-To: <CAJZ5v0hh83q-F1Y-13a_3-AopnRRmhM91FCnNrMrKYwjqAiJcw@mail.gmail.com>
The pull request you sent on Fri, 3 Apr 2026 14:34:11 +0200:
> git://git.kernel.org/pub/scm/linux/kernel/git/rafael/linux-pm.git pm-7.0-rc7
has been merged into torvalds/linux.git:
https://git.kernel.org/torvalds/c/1270605fd2d8c3d2f0a050f5078e56cbc9b755e5
Thank you!
--
Deet-doot-dot, I am a bot.
https://korg.docs.kernel.org/prtracker.html
^ permalink raw reply
* Re: [PATCH 1/2] pmdomain/rockchip: skip QoS operations for idle-only domains
From: Daniel Bozeman @ 2026-04-03 21:27 UTC (permalink / raw)
To: shawn.lin, finley.xiao, ulf.hansson, heiko, linux-pm,
linux-arm-kernel, linux-rockchip, linux-kernel
In-Reply-To: <fbf16b5a-1565-39c9-0d92-33a7a577212f@rock-chips.com>
I ran both tests you requested:
Test 1: Added pr_err to rockchip_pd_power_on/off to identify
the crashing domain. With patch 2 only (skip EPROBE_DEFER),
the crash occurs on PD_VO:
rockchip_pd_power_off: vo pwr_mask=0x0
Internal error: synchronous external abort: 0000000096000010
Workqueue: pm genpd_power_off_work_fn
Call trace:
regmap_mmio_read32le+0x8/0x20
_regmap_bus_reg_read+0x6c/0xac
_regmap_read+0x60/0xd8
regmap_read+0x4c/0x7c
rockchip_pmu_set_idle_request.isra.0+0x98/0x16c
rockchip_pd_power+0x130/0x48c
rockchip_pd_power_off+0x38/0x48
genpd_power_off.isra.0+0x1f0/0x2f0
genpd_power_off_work_fn+0x34/0x54
Test 2: Same debug build, booted with clk_ignore_unused
added to kernel cmdline via U-Boot. Same crash, same domain:
rockchip_pd_power_off: vo pwr_mask=0x0
Internal error: synchronous external abort: 0000000096000010
(identical call trace)
The crash occurs even with clk_ignore_unused. The QoS
registers for PD_VO are inaccessible when genpd attempts
to power off this idle-only domain.
^ permalink raw reply
* Re: [PATCH 1/2] pmdomain/rockchip: skip QoS operations for idle-only domains
From: Shawn Lin @ 2026-04-04 11:40 UTC (permalink / raw)
To: Daniel Bozeman, finley.xiao, ulf.hansson, heiko, linux-pm,
linux-arm-kernel, linux-rockchip, linux-kernel, Jonas Karlman
Cc: shawn.lin
In-Reply-To: <adAwtiaU-32qjRRE@claude-dev>
+ Jonas
在 2026/04/04 星期六 5:27, Daniel Bozeman 写道:
> I ran both tests you requested:
>
> Test 1: Added pr_err to rockchip_pd_power_on/off to identify
> the crashing domain. With patch 2 only (skip EPROBE_DEFER),
> the crash occurs on PD_VO:
Thanks for fing the PD_VO, and I'm still requesting more docs internally
to check what's going on. I see there are several qos nodes under PD_VO,
but I'm not sure if they all belong to PD_VO and even not sure if their
registers are define correctly.
Perhaps, could you help dig more by removing the qos one by one from
PD_VO to narrow down the broken qos?
I also loop in Jonas who submited the code, to have a look.(I'm also
surprised to see there aren't any Qos nodes under PD_VO in vendor
kernel for reference, but upstream code has...)
>
> rockchip_pd_power_off: vo pwr_mask=0x0
> Internal error: synchronous external abort: 0000000096000010
> Workqueue: pm genpd_power_off_work_fn
> Call trace:
> regmap_mmio_read32le+0x8/0x20
> _regmap_bus_reg_read+0x6c/0xac
> _regmap_read+0x60/0xd8
> regmap_read+0x4c/0x7c
> rockchip_pmu_set_idle_request.isra.0+0x98/0x16c
> rockchip_pd_power+0x130/0x48c
> rockchip_pd_power_off+0x38/0x48
> genpd_power_off.isra.0+0x1f0/0x2f0
> genpd_power_off_work_fn+0x34/0x54
>
> Test 2: Same debug build, booted with clk_ignore_unused
> added to kernel cmdline via U-Boot. Same crash, same domain:
>
> rockchip_pd_power_off: vo pwr_mask=0x0
> Internal error: synchronous external abort: 0000000096000010
> (identical call trace)
>
> The crash occurs even with clk_ignore_unused. The QoS
> registers for PD_VO are inaccessible when genpd attempts
> to power off this idle-only domain.
>
^ permalink raw reply
* Re: [PATCH RESEND v1] thermal: core: fix blocking in unregistering zone
From: Rafael J. Wysocki @ 2026-04-04 12:58 UTC (permalink / raw)
To: Guenter Roeck
Cc: Rafael J. Wysocki, Jiajia Liu, Daniel Lezcano, Zhang Rui,
Lukasz Luba, linux-pm, linux-kernel, Armin Wolf, linux-hwmon
In-Reply-To: <ebdbe040-d673-47b9-a6c9-f0fefc0b771b@roeck-us.net>
On Fri, Apr 3, 2026 at 4:20 PM Guenter Roeck <linux@roeck-us.net> wrote:
>
> On 4/3/26 05:52, Rafael J. Wysocki wrote:
> .[ ... ]
> > It appears to work for me, but I'm not sure if having multiple hwmon class
> > devices with the same value in the name attribute is fine.
>
> Like this ?
>
> $ cd /sys/class/hwmon
> $ grep . */name
> hwmon0/name:r8169_0_c00:00
> hwmon1/name:nvme
> hwmon2/name:nvme
> hwmon3/name:nct6687
> hwmon4/name:k10temp
> hwmon5/name:spd5118
> hwmon6/name:spd5118
> hwmon7/name:spd5118
> hwmon8/name:spd5118
> hwmon9/name:mt7921_phy0
Yes.
> Names such as "r8169_0_c00:00" and "mt7921_phy0" are actually overkill
> since the "sensors" command makes it
>
> r8169_0_c00:00-mdio-0
> Adapter: MDIO adapter
> temp1: +36.0°C (high = +120.0°C)
>
> mt7921_phy0-pci-0d00
> Adapter: PCI adapter
> temp1: +30.0°C
>
> essentially duplicating the device index.
Well, with the patch posted by me, the output of sensors from a test
system looks like this:
acpitz-acpi-0
Adapter: ACPI interface
temp1: +16.8°C
pch_cannonlake-virtual-0
Adapter: Virtual device
temp1: +33.0°C
acpitz-acpi-0
Adapter: ACPI interface
temp1: +27.8°C
(some further data excluded), which is kind of confusing (note the
duplicate acpitz-acpi-0 entries with different values of temp1).
That could be disambiguated by concatenating the thermal zone ID
(possibly after a '_') to the name. Or the "temp*" things for thermal
zones of the same type could carry different numbers.
A less attractive alternative would be to register a special virtual
device serving as a parent for all hwmon interfaces registered
automatically for thermal zones.
^ permalink raw reply
* Re: [PATCH] thermal/core: Remove pointless variable when registering a cooling device
From: Rafael J. Wysocki @ 2026-04-04 13:07 UTC (permalink / raw)
To: Lukasz Luba, Daniel Lezcano
Cc: Daniel Lezcano, Zhang Rui, open list:THERMAL, open list
In-Reply-To: <bdd8684b-9ce8-4c7a-b27c-a2d67b29367c@arm.com>
On Thu, Apr 2, 2026 at 1:03 PM Lukasz Luba <lukasz.luba@arm.com> wrote:
>
>
>
> On 4/2/26 09:44, Daniel Lezcano wrote:
> > From: Daniel Lezcano <daniel.lezcano@oss.qualcomm.com>
> >
> > The 'id' variable is set to store the ida_alloc() value which is
> > already stored into cdev->id. It is pointless to use it because
> > cdev->id can be used instead.
> >
> > Signed-off-by: Daniel Lezcano <daniel.lezcano@oss.qualcomm.com>
> > Signed-off-by: Daniel Lezcano <daniel.lezcano@kernel.org>
> > ---
> > drivers/thermal/thermal_core.c | 5 ++---
> > 1 file changed, 2 insertions(+), 3 deletions(-)
> >
> > diff --git a/drivers/thermal/thermal_core.c b/drivers/thermal/thermal_core.c
> > index b7d706ed7ed9..02ce58223f9f 100644
> > --- a/drivers/thermal/thermal_core.c
> > +++ b/drivers/thermal/thermal_core.c
> > @@ -1061,7 +1061,7 @@ __thermal_cooling_device_register(struct device_node *np,
> > {
> > struct thermal_cooling_device *cdev;
> > unsigned long current_state;
> > - int id, ret;
> > + int ret;
> >
> > if (!ops || !ops->get_max_state || !ops->get_cur_state ||
> > !ops->set_cur_state)
> > @@ -1078,7 +1078,6 @@ __thermal_cooling_device_register(struct device_node *np,
> > if (ret < 0)
> > goto out_kfree_cdev;
> > cdev->id = ret;
> > - id = ret;
> >
> > cdev->type = kstrdup_const(type ? type : "", GFP_KERNEL);
> > if (!cdev->type) {
> > @@ -1135,7 +1134,7 @@ __thermal_cooling_device_register(struct device_node *np,
> > out_cdev_type:
> > kfree_const(cdev->type);
> > out_ida_remove:
> > - ida_free(&thermal_cdev_ida, id);
> > + ida_free(&thermal_cdev_ida, cdev->id);
> > out_kfree_cdev:
> > kfree(cdev);
> > return ERR_PTR(ret);
>
>
> Reviewed-by: Lukasz Luba <lukasz.luba@arm.com>
Applied as 7.1 material, thanks!
^ permalink raw reply
* Re: [PATCH RESEND v1] thermal: core: fix blocking in unregistering zone
From: Guenter Roeck @ 2026-04-04 14:02 UTC (permalink / raw)
To: Rafael J. Wysocki
Cc: Jiajia Liu, Daniel Lezcano, Zhang Rui, Lukasz Luba, linux-pm,
linux-kernel, Armin Wolf, linux-hwmon
In-Reply-To: <CAJZ5v0jZPXC5g3KY+trwmV=nXJu74y=0LaQTHaQ_MnqcfPVAGQ@mail.gmail.com>
On 4/4/26 05:58, Rafael J. Wysocki wrote:
> On Fri, Apr 3, 2026 at 4:20 PM Guenter Roeck <linux@roeck-us.net> wrote:
>>
>> On 4/3/26 05:52, Rafael J. Wysocki wrote:
>> .[ ... ]
>>> It appears to work for me, but I'm not sure if having multiple hwmon class
>>> devices with the same value in the name attribute is fine.
>>
>> Like this ?
>>
>> $ cd /sys/class/hwmon
>> $ grep . */name
>> hwmon0/name:r8169_0_c00:00
>> hwmon1/name:nvme
>> hwmon2/name:nvme
>> hwmon3/name:nct6687
>> hwmon4/name:k10temp
>> hwmon5/name:spd5118
>> hwmon6/name:spd5118
>> hwmon7/name:spd5118
>> hwmon8/name:spd5118
>> hwmon9/name:mt7921_phy0
>
> Yes.
>
>> Names such as "r8169_0_c00:00" and "mt7921_phy0" are actually overkill
>> since the "sensors" command makes it
>>
>> r8169_0_c00:00-mdio-0
>> Adapter: MDIO adapter
>> temp1: +36.0°C (high = +120.0°C)
>>
>> mt7921_phy0-pci-0d00
>> Adapter: PCI adapter
>> temp1: +30.0°C
>>
>> essentially duplicating the device index.
>
> Well, with the patch posted by me, the output of sensors from a test
> system looks like this:
>
> acpitz-acpi-0
> Adapter: ACPI interface
> temp1: +16.8°C
>
> pch_cannonlake-virtual-0
> Adapter: Virtual device
> temp1: +33.0°C
>
> acpitz-acpi-0
> Adapter: ACPI interface
> temp1: +27.8°C
>
> (some further data excluded), which is kind of confusing (note the
> duplicate acpitz-acpi-0 entries with different values of temp1).
>
Yes, agreed, that is confusing. I would have expected the second one
to be identified as "acpitz-acpi-1". Do they both have the same parent ?
> That could be disambiguated by concatenating the thermal zone ID
> (possibly after a '_') to the name. Or the "temp*" things for thermal
> zones of the same type could carry different numbers.
>
> A less attractive alternative would be to register a special virtual
> device serving as a parent for all hwmon interfaces registered
> automatically for thermal zones.
If they all have the same parent, technically it should be a single
hwmon device with multiple sensors, as in:
acpitz-acpi-0
Adapter: ACPI interface
temp1: +16.8°C
temp2: +27.8°C
Guenter
^ permalink raw reply
page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox