Linux Documentation
 help / color / mirror / Atom feed
* [PATCH v3 3/5] HID: hid-oxp: Add Second Generation Gamepad Mode Switch
From: Derek J. Clark @ 2026-04-12 21:34 UTC (permalink / raw)
  To: Jiri Kosina, Benjamin Tissoires
  Cc: Pierre-Loup A . Griffais, Lambert Fan, Zhouwang Huang,
	Derek J . Clark, linux-input, linux-doc, linux-kernel
In-Reply-To: <20260412213444.2231505-1-derekjohn.clark@gmail.com>

Adds "gamepad_mode" attribute to second generation OneXPlayer
configuration HID devices. This attribute initiates a mode shift in the
device MCU that puts it into a state where all events are routed to an
hidraw interface instead of the xpad evdev interface. This allows for
debugging the hardware input mapping added in the next patch.

Reviewed-by: Zhouwang Huang <honjow311@gmail.com>
Tested-by: Zhouwang Huang <honjow311@gmail.com>
Signed-off-by: Derek J. Clark <derekjohn.clark@gmail.com>
---
v2:
  - Rename to gamepad_mode & show relevant gamepad modes instead of
    using a debug enable/disable paradigm, to match other drivers.
---
 drivers/hid/hid-oxp.c | 130 ++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 130 insertions(+)

diff --git a/drivers/hid/hid-oxp.c b/drivers/hid/hid-oxp.c
index 25214356163e..c62952537d98 100644
--- a/drivers/hid/hid-oxp.c
+++ b/drivers/hid/hid-oxp.c
@@ -33,6 +33,7 @@
 enum oxp_function_index {
 	OXP_FID_GEN1_RGB_SET =		0x07,
 	OXP_FID_GEN1_RGB_REPLY =	0x0f,
+	OXP_FID_GEN2_TOGGLE_MODE =	0xb2,
 	OXP_FID_GEN2_STATUS_EVENT =	0xb8,
 };
 
@@ -41,11 +42,22 @@ static struct oxp_hid_cfg {
 	struct hid_device *hdev;
 	struct mutex cfg_mutex; /*ensure single synchronous output report*/
 	u8 rgb_brightness;
+	u8 gamepad_mode;
 	u8 rgb_effect;
 	u8 rgb_speed;
 	u8 rgb_en;
 } drvdata;
 
+enum oxp_gamepad_mode_index {
+	OXP_GP_MODE_XINPUT = 0x00,
+	OXP_GP_MODE_DEBUG = 0x03,
+};
+
+static const char *const oxp_gamepad_mode_text[] = {
+	[OXP_GP_MODE_XINPUT] = "xinput",
+	[OXP_GP_MODE_DEBUG] = "debug",
+};
+
 enum oxp_feature_en_index {
 	OXP_FEAT_DISABLED,
 	OXP_FEAT_ENABLED,
@@ -181,6 +193,32 @@ static int oxp_hid_raw_event_gen_1(struct hid_device *hdev,
 	return 0;
 }
 
+static int oxp_gen_2_property_out(enum oxp_function_index fid, u8 *data, u8 data_size);
+
+static void oxp_mcu_init_fn(struct work_struct *work)
+{
+	u8 gp_mode_data[3] = { OXP_GP_MODE_DEBUG, 0x01, 0x02 };
+	int ret;
+
+	/* Cycle the gamepad mode */
+	ret = oxp_gen_2_property_out(OXP_FID_GEN2_TOGGLE_MODE, gp_mode_data, 3);
+	if (ret)
+		dev_err(&drvdata.hdev->dev,
+			"Error: Failed to set gamepad mode: %i\n", ret);
+
+	/* Remainder only applies for xinput mode */
+	if (drvdata.gamepad_mode == OXP_GP_MODE_DEBUG)
+		return;
+
+	gp_mode_data[0] = OXP_GP_MODE_XINPUT;
+	ret = oxp_gen_2_property_out(OXP_FID_GEN2_TOGGLE_MODE, gp_mode_data, 3);
+	if (ret)
+		dev_err(&drvdata.hdev->dev,
+			"Error: Failed to set gamepad mode: %i\n", ret);
+}
+
+static DECLARE_DELAYED_WORK(oxp_mcu_init, oxp_mcu_init_fn);
+
 static int oxp_hid_raw_event_gen_2(struct hid_device *hdev,
 				   struct hid_report *report, u8 *data,
 				   int size)
@@ -191,6 +229,14 @@ static int oxp_hid_raw_event_gen_2(struct hid_device *hdev,
 	if (data[0] != OXP_FID_GEN2_STATUS_EVENT)
 		return 0;
 
+	/* Sent ~6s after resume event, indicating the MCU has fully reset.
+	 * Re-apply our settings after this has been received.
+	 */
+	if (data[3] == OXP_EFFECT_MONO_TRUE) {
+		mod_delayed_work(system_wq, &oxp_mcu_init, msecs_to_jiffies(50));
+		return 0;
+	}
+
 	if (data[3] != OXP_GET_PROPERTY)
 		return 0;
 
@@ -288,6 +334,77 @@ static int oxp_gen_2_property_out(enum oxp_function_index fid, u8 *data,
 				footer_size);
 }
 
+static ssize_t gamepad_mode_store(struct device *dev,
+				  struct device_attribute *attr, const char *buf,
+				  size_t count)
+{
+	u16 up = get_usage_page(drvdata.hdev);
+	u8 data[3] = { 0x00, 0x01, 0x02 };
+	int ret = -EINVAL;
+	int i;
+
+	if (up != GEN2_USAGE_PAGE)
+		return ret;
+
+	for (i = 0; i < ARRAY_SIZE(oxp_gamepad_mode_text); i++) {
+		if (oxp_gamepad_mode_text[i] && sysfs_streq(buf, oxp_gamepad_mode_text[i])) {
+			ret = i;
+			break;
+		}
+	}
+	if (ret < 0)
+		return ret;
+
+	data[0] = ret;
+
+	ret = oxp_gen_2_property_out(OXP_FID_GEN2_TOGGLE_MODE, data, 3);
+	if (ret)
+		return ret;
+
+	drvdata.gamepad_mode = data[0];
+
+	return count;
+}
+
+static ssize_t gamepad_mode_show(struct device *dev,
+				 struct device_attribute *attr, char *buf)
+{
+	return sysfs_emit(buf, "%s\n", oxp_gamepad_mode_text[drvdata.gamepad_mode]);
+}
+static DEVICE_ATTR_RW(gamepad_mode);
+
+static ssize_t gamepad_mode_index_show(struct device *dev,
+				       struct device_attribute *attr,
+				       char *buf)
+{
+	ssize_t count = 0;
+	unsigned int i;
+
+	for (i = 0; i < ARRAY_SIZE(oxp_gamepad_mode_text); i++) {
+		if (!oxp_gamepad_mode_text[i] ||
+		    oxp_gamepad_mode_text[i][0] == '\0')
+			continue;
+
+		count += sysfs_emit_at(buf, count, "%s ", oxp_gamepad_mode_text[i]);
+	}
+
+	if (count)
+		buf[count - 1] = '\n';
+
+	return count;
+}
+static DEVICE_ATTR_RO(gamepad_mode_index);
+
+static struct attribute *oxp_cfg_attrs[] = {
+	&dev_attr_gamepad_mode.attr,
+	&dev_attr_gamepad_mode_index.attr,
+	NULL,
+};
+
+static const struct attribute_group oxp_cfg_attrs_group = {
+	.attrs = oxp_cfg_attrs,
+};
+
 static int oxp_rgb_status_store(u8 enabled, u8 speed, u8 brightness)
 {
 	u16 up = get_usage_page(drvdata.hdev);
@@ -733,7 +850,20 @@ static int oxp_cfg_probe(struct hid_device *hdev, u16 up)
 		dev_warn(drvdata.led_mc->led_cdev.dev,
 			 "Failed to query RGB initial state: %i\n", ret);
 
+	/* Below features are only implemented in gen 2 */
+	if (up != GEN2_USAGE_PAGE)
+		return 0;
+
 skip_rgb:
+	mod_delayed_work(system_wq, &oxp_mcu_init, msecs_to_jiffies(50));
+
+	drvdata.gamepad_mode = OXP_GP_MODE_XINPUT;
+
+	ret = devm_device_add_group(&hdev->dev, &oxp_cfg_attrs_group);
+	if (ret)
+		return dev_err_probe(&hdev->dev, ret,
+				     "Failed to attach configuration attributes\n");
+
 	return 0;
 }
 
-- 
2.53.0


^ permalink raw reply related

* [PATCH v3 5/5] HID: hid-oxp: Add Vibration Intensity Attribute
From: Derek J. Clark @ 2026-04-12 21:34 UTC (permalink / raw)
  To: Jiri Kosina, Benjamin Tissoires
  Cc: Pierre-Loup A . Griffais, Lambert Fan, Zhouwang Huang,
	Derek J . Clark, linux-input, linux-doc, linux-kernel
In-Reply-To: <20260412213444.2231505-1-derekjohn.clark@gmail.com>

Adds attribute for setting the rumble intensity level. This setting must
be re-applied after the gamepad mode is set as doing so resets this to
the default value.

Reviewed-by: Zhouwang Huang <honjow311@gmail.com>
Tested-by: Zhouwang Huang <honjow311@gmail.com>
Signed-off-by: Derek J. Clark <derekjohn.clark@gmail.com>
---
 drivers/hid/hid-oxp.c | 78 +++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 78 insertions(+)

diff --git a/drivers/hid/hid-oxp.c b/drivers/hid/hid-oxp.c
index 959ec1a90d22..a4e9d41bd3a7 100644
--- a/drivers/hid/hid-oxp.c
+++ b/drivers/hid/hid-oxp.c
@@ -34,6 +34,7 @@ enum oxp_function_index {
 	OXP_FID_GEN1_RGB_SET =		0x07,
 	OXP_FID_GEN1_RGB_REPLY =	0x0f,
 	OXP_FID_GEN2_TOGGLE_MODE =	0xb2,
+	OXP_FID_GEN2_RUMBLE_SET =	0xb3,
 	OXP_FID_GEN2_KEY_STATE =	0xb4,
 	OXP_FID_GEN2_STATUS_EVENT =	0xb8,
 };
@@ -178,6 +179,7 @@ static struct oxp_hid_cfg {
 	struct mutex cfg_mutex; /*ensure single synchronous output report*/
 	u8 rgb_brightness;
 	u8 gamepad_mode;
+	u8 rumble_intensity;
 	u8 rgb_effect;
 	u8 rgb_speed;
 	u8 rgb_en;
@@ -263,6 +265,11 @@ static const char *const oxp_rgb_effect_text[] = {
 	[OXP_EFFECT_MONO_LIST] = "monocolor",
 };
 
+enum oxp_rumble_side_index {
+	OXP_RUMBLE_LEFT = 0x00,
+	OXP_RUMBLE_RIGHT,
+};
+
 struct oxp_gen_1_rgb_report {
 	u8 report_id;
 	u8 message_id;
@@ -338,6 +345,7 @@ static int oxp_hid_raw_event_gen_1(struct hid_device *hdev,
 
 static int oxp_gen_2_property_out(enum oxp_function_index fid, u8 *data, u8 data_size);
 static int oxp_set_buttons(void);
+static int oxp_rumble_intensity_set(u8 intensity);
 
 static void oxp_mcu_init_fn(struct work_struct *work)
 {
@@ -365,6 +373,12 @@ static void oxp_mcu_init_fn(struct work_struct *work)
 	if (ret)
 		dev_err(&drvdata.hdev->dev,
 			"Error: Failed to set gamepad mode: %i\n", ret);
+
+	/* Set vibration level */
+	ret = oxp_rumble_intensity_set(drvdata.rumble_intensity);
+	if (ret)
+		dev_err(&drvdata.hdev->dev,
+			"Error: Failed to set rumble intensity: %i\n", ret);
 }
 
 static DECLARE_DELAYED_WORK(oxp_mcu_init, oxp_mcu_init_fn);
@@ -513,6 +527,14 @@ static ssize_t gamepad_mode_store(struct device *dev,
 
 	drvdata.gamepad_mode = data[0];
 
+	if (drvdata.gamepad_mode == OXP_GP_MODE_DEBUG)
+		return count;
+
+	/* Re-apply rumble settings as switching gamepad mode will override */
+	ret = oxp_rumble_intensity_set(drvdata.rumble_intensity);
+	if (ret)
+		return ret;
+
 	return count;
 }
 
@@ -858,6 +880,59 @@ static ssize_t button_mapping_options_show(struct device *dev,
 }
 static DEVICE_ATTR_RO(button_mapping_options);
 
+static int oxp_rumble_intensity_set(u8 intensity)
+{
+	u8 header[15] = { 0x02, 0x38, 0x02, 0xe3, 0x39, 0xe3, 0x39, 0xe3,
+			  0x39, 0x01, intensity, 0x05, 0xe3, 0x39, 0xe3 };
+	u8 footer[9] = { 0x39, 0xe3, 0x39, 0xe3, 0xe3, 0x02, 0x04, 0x39, 0x39 };
+	size_t footer_size = ARRAY_SIZE(footer);
+	size_t header_size = ARRAY_SIZE(header);
+	u8 data[59] = { 0x0 };
+	size_t data_size = ARRAY_SIZE(data);
+
+	memcpy(data, header, header_size);
+	memcpy(data + data_size - footer_size, footer, footer_size);
+
+	return oxp_gen_2_property_out(OXP_FID_GEN2_RUMBLE_SET, data, data_size);
+}
+
+static ssize_t rumble_intensity_store(struct device *dev,
+				      struct device_attribute *attr, const char *buf,
+				      size_t count)
+{
+	int ret;
+	u8 val;
+
+	ret = kstrtou8(buf, 10, &val);
+	if (ret)
+		return ret;
+
+	if (val < 0 || val > 5)
+		return -EINVAL;
+
+	ret = oxp_rumble_intensity_set(val);
+	if (ret)
+		return ret;
+
+	drvdata.rumble_intensity = val;
+
+	return count;
+}
+
+static ssize_t rumble_intensity_show(struct device *dev,
+				     struct device_attribute *attr, char *buf)
+{
+	return sysfs_emit(buf, "%i\n", drvdata.rumble_intensity);
+}
+static DEVICE_ATTR_RW(rumble_intensity);
+
+static ssize_t rumble_intensity_range_show(struct device *dev,
+					   struct device_attribute *attr, char *buf)
+{
+	return sysfs_emit(buf, "0-5\n");
+}
+static DEVICE_ATTR_RO(rumble_intensity_range);
+
 #define OXP_DEVICE_ATTR_RW(_name, _group)                                     \
 	static ssize_t _name##_store(struct device *dev,                      \
 				     struct device_attribute *attr,           \
@@ -949,6 +1024,8 @@ static struct attribute *oxp_cfg_attrs[] = {
 	&dev_attr_gamepad_mode.attr,
 	&dev_attr_gamepad_mode_index.attr,
 	&dev_attr_reset_buttons.attr,
+	&dev_attr_rumble_intensity.attr,
+	&dev_attr_rumble_intensity_range.attr,
 	NULL,
 };
 
@@ -1422,6 +1499,7 @@ static int oxp_cfg_probe(struct hid_device *hdev, u16 up)
 	drvdata.bmap_2 = bmap_2;
 	oxp_reset_buttons();
 	drvdata.gamepad_mode = OXP_GP_MODE_XINPUT;
+	drvdata.rumble_intensity = 5;
 	mod_delayed_work(system_wq, &oxp_mcu_init, msecs_to_jiffies(50));
 
 	ret = devm_device_add_group(&hdev->dev, &oxp_cfg_attrs_group);
-- 
2.53.0


^ permalink raw reply related

* [PATCH v3 4/5] HID: hid-oxp: Add Button Mapping Interface
From: Derek J. Clark @ 2026-04-12 21:34 UTC (permalink / raw)
  To: Jiri Kosina, Benjamin Tissoires
  Cc: Pierre-Loup A . Griffais, Lambert Fan, Zhouwang Huang,
	Derek J . Clark, linux-input, linux-doc, linux-kernel
In-Reply-To: <20260412213444.2231505-1-derekjohn.clark@gmail.com>

Adds button mapping interface for second generation OneXPlayer
configuration HID interfaces. This interface allows the MCU to swap
button mappings at the hardware level. The current state cannot be
retrieved, and the mappings may have been modified in Windows prior, so
we reset the button mapping at init and expose an attribute to allow
userspace to do this again at any time.

The interface requires two pages of button mapping data to be sent
before the settings will take place. Since the MCU requires a 200ms
delay after each message (total 400ms for these attributes) use the same
debounce work queue method we used for RGB. This will allow for
userspace or udev rules to rapidly map all buttons. The values will
be cached before the final write is finally sent to the device.

Reviewed-by: Zhouwang Huang <honjow311@gmail.com>
Tested-by: Zhouwang Huang <honjow311@gmail.com>
Signed-off-by: Derek J. Clark <derekjohn.clark@gmail.com>
---
v3:
  - Ensure default button map is properly init during probe.
v2:
  - Add detection of post-suspend MCU init to trigger setting the button
    map again.
---
 drivers/hid/hid-oxp.c | 569 +++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 567 insertions(+), 2 deletions(-)

diff --git a/drivers/hid/hid-oxp.c b/drivers/hid/hid-oxp.c
index c62952537d98..959ec1a90d22 100644
--- a/drivers/hid/hid-oxp.c
+++ b/drivers/hid/hid-oxp.c
@@ -34,10 +34,145 @@ enum oxp_function_index {
 	OXP_FID_GEN1_RGB_SET =		0x07,
 	OXP_FID_GEN1_RGB_REPLY =	0x0f,
 	OXP_FID_GEN2_TOGGLE_MODE =	0xb2,
+	OXP_FID_GEN2_KEY_STATE =	0xb4,
 	OXP_FID_GEN2_STATUS_EVENT =	0xb8,
 };
 
+#define OXP_MAPPING_GAMEPAD	0x01
+#define OXP_MAPPING_KEYBOARD	0x02
+
+struct oxp_button_data {
+	u8 mode;
+	u8 index;
+	u8 key_id;
+	u8 padding[2];
+} __packed;
+
+struct oxp_button_entry {
+	struct oxp_button_data data;
+	const char *name;
+};
+
+static const struct oxp_button_entry oxp_button_table[] = {
+	/* Gamepad Buttons */
+	{ { OXP_MAPPING_GAMEPAD, 0x01 }, "BTN_A" },
+	{ { OXP_MAPPING_GAMEPAD, 0x02 }, "BTN_B" },
+	{ { OXP_MAPPING_GAMEPAD, 0x03 }, "BTN_X" },
+	{ { OXP_MAPPING_GAMEPAD, 0x04 }, "BTN_Y" },
+	{ { OXP_MAPPING_GAMEPAD, 0x05 }, "BTN_LB" },
+	{ { OXP_MAPPING_GAMEPAD, 0x06 }, "BTN_RB" },
+	{ { OXP_MAPPING_GAMEPAD, 0x07 }, "BTN_LT" },
+	{ { OXP_MAPPING_GAMEPAD, 0x08 }, "BTN_RT" },
+	{ { OXP_MAPPING_GAMEPAD, 0x09 }, "BTN_START" },
+	{ { OXP_MAPPING_GAMEPAD, 0x0a }, "BTN_SELECT" },
+	{ { OXP_MAPPING_GAMEPAD, 0x0b }, "BTN_L3" },
+	{ { OXP_MAPPING_GAMEPAD, 0x0c }, "BTN_R3" },
+	{ { OXP_MAPPING_GAMEPAD, 0x0d }, "DPAD_UP" },
+	{ { OXP_MAPPING_GAMEPAD, 0x0e }, "DPAD_DOWN" },
+	{ { OXP_MAPPING_GAMEPAD, 0x0f }, "DPAD_LEFT" },
+	{ { OXP_MAPPING_GAMEPAD, 0x10 }, "DPAD_RIGHT" },
+	{ { OXP_MAPPING_GAMEPAD, 0x11 }, "JOY_L_UP" },
+	{ { OXP_MAPPING_GAMEPAD, 0x12 }, "JOY_L_UP_RIGHT" },
+	{ { OXP_MAPPING_GAMEPAD, 0x13 }, "JOY_L_RIGHT" },
+	{ { OXP_MAPPING_GAMEPAD, 0x14 }, "JOY_L_DOWN_RIGHT" },
+	{ { OXP_MAPPING_GAMEPAD, 0x15 }, "JOY_L_DOWN" },
+	{ { OXP_MAPPING_GAMEPAD, 0x16 }, "JOY_L_DOWN_LEFT" },
+	{ { OXP_MAPPING_GAMEPAD, 0x17 }, "JOY_L_LEFT" },
+	{ { OXP_MAPPING_GAMEPAD, 0x18 }, "JOY_L_UP_LEFT" },
+	{ { OXP_MAPPING_GAMEPAD, 0x19 }, "JOY_R_UP" },
+	{ { OXP_MAPPING_GAMEPAD, 0x1a }, "JOY_R_UP_RIGHT" },
+	{ { OXP_MAPPING_GAMEPAD, 0x1b }, "JOY_R_RIGHT" },
+	{ { OXP_MAPPING_GAMEPAD, 0x1c }, "JOY_R_DOWN_RIGHT" },
+	{ { OXP_MAPPING_GAMEPAD, 0x1d }, "JOY_R_DOWN" },
+	{ { OXP_MAPPING_GAMEPAD, 0x1e }, "JOY_R_DOWN_LEFT" },
+	{ { OXP_MAPPING_GAMEPAD, 0x1f }, "JOY_R_LEFT" },
+	{ { OXP_MAPPING_GAMEPAD, 0x20 }, "JOY_R_UP_LEFT" },
+	{ { OXP_MAPPING_GAMEPAD, 0x22 }, "BTN_GUIDE" },
+	/* Keyboard Keys */
+	{ { OXP_MAPPING_KEYBOARD, 0x01, 0x5a }, "KEY_F1" },
+	{ { OXP_MAPPING_KEYBOARD, 0x01, 0x5b }, "KEY_F2" },
+	{ { OXP_MAPPING_KEYBOARD, 0x01, 0x5c }, "KEY_F3" },
+	{ { OXP_MAPPING_KEYBOARD, 0x01, 0x5d }, "KEY_F4" },
+	{ { OXP_MAPPING_KEYBOARD, 0x01, 0x5e }, "KEY_F5" },
+	{ { OXP_MAPPING_KEYBOARD, 0x01, 0x5f }, "KEY_F6" },
+	{ { OXP_MAPPING_KEYBOARD, 0x01, 0x60 }, "KEY_F7" },
+	{ { OXP_MAPPING_KEYBOARD, 0x01, 0x61 }, "KEY_F8" },
+	{ { OXP_MAPPING_KEYBOARD, 0x01, 0x62 }, "KEY_F9" },
+	{ { OXP_MAPPING_KEYBOARD, 0x01, 0x63 }, "KEY_F10" },
+	{ { OXP_MAPPING_KEYBOARD, 0x01, 0x64 }, "KEY_F11" },
+	{ { OXP_MAPPING_KEYBOARD, 0x01, 0x65 }, "KEY_F12" },
+	{ { OXP_MAPPING_KEYBOARD, 0x01, 0x66 }, "KEY_F13" },
+	{ { OXP_MAPPING_KEYBOARD, 0x01, 0x67 }, "KEY_F14" },
+	{ { OXP_MAPPING_KEYBOARD, 0x01, 0x68 }, "KEY_F15" },
+	{ { OXP_MAPPING_KEYBOARD, 0x01, 0x69 }, "KEY_F16" },
+	{ { OXP_MAPPING_KEYBOARD, 0x01, 0x6a }, "KEY_F17" },
+	{ { OXP_MAPPING_KEYBOARD, 0x01, 0x6b }, "KEY_F18" },
+	{ { OXP_MAPPING_KEYBOARD, 0x01, 0x6c }, "KEY_F19" },
+	{ { OXP_MAPPING_KEYBOARD, 0x01, 0x6d }, "KEY_F20" },
+	{ { OXP_MAPPING_KEYBOARD, 0x01, 0x6e }, "KEY_F21" },
+	{ { OXP_MAPPING_KEYBOARD, 0x01, 0x6f }, "KEY_F22" },
+	{ { OXP_MAPPING_KEYBOARD, 0x01, 0x70 }, "KEY_F23" },
+	{ { OXP_MAPPING_KEYBOARD, 0x01, 0x71 }, "KEY_F24" },
+};
+
+enum oxp_joybutton_index {
+	BUTTON_A =	0x01,
+	BUTTON_B,
+	BUTTON_X,
+	BUTTON_Y,
+	BUTTON_LB,
+	BUTTON_RB,
+	BUTTON_LT,
+	BUTTON_RT,
+	BUTTON_START,
+	BUTTON_SELECT,
+	BUTTON_L3,
+	BUTTON_R3,
+	BUTTON_DUP,
+	BUTTON_DDOWN,
+	BUTTON_DLEFT,
+	BUTTON_DRIGHT,
+	BUTTON_M1 =	0x22,
+	BUTTON_M2,
+	/* These are unused currently, reserved for future devices */
+	BUTTON_M3,
+	BUTTON_M4,
+	BUTTON_M5,
+	BUTTON_M6,
+};
+
+struct oxp_button_idx {
+	enum oxp_joybutton_index button_idx;
+	u8 mapping_idx;
+} __packed;
+
+struct oxp_bmap_page_1 {
+	struct oxp_button_idx btn_a;
+	struct oxp_button_idx btn_b;
+	struct oxp_button_idx btn_x;
+	struct oxp_button_idx btn_y;
+	struct oxp_button_idx btn_lb;
+	struct oxp_button_idx btn_rb;
+	struct oxp_button_idx btn_lt;
+	struct oxp_button_idx btn_rt;
+	struct oxp_button_idx btn_start;
+} __packed;
+
+struct oxp_bmap_page_2 {
+	struct oxp_button_idx btn_select;
+	struct oxp_button_idx btn_l3;
+	struct oxp_button_idx btn_r3;
+	struct oxp_button_idx btn_dup;
+	struct oxp_button_idx btn_ddown;
+	struct oxp_button_idx btn_dleft;
+	struct oxp_button_idx btn_dright;
+	struct oxp_button_idx btn_m1;
+	struct oxp_button_idx btn_m2;
+} __packed;
+
 static struct oxp_hid_cfg {
+	struct oxp_bmap_page_1 *bmap_1;
+	struct oxp_bmap_page_2 *bmap_2;
 	struct led_classdev_mc *led_mc;
 	struct hid_device *hdev;
 	struct mutex cfg_mutex; /*ensure single synchronous output report*/
@@ -48,6 +183,10 @@ static struct oxp_hid_cfg {
 	u8 rgb_en;
 } drvdata;
 
+#define OXP_FILL_PAGE_SLOT(page, btn)            \
+	{ .button_idx = (page)->btn.button_idx,  \
+	  .mapping_idx = (page)->btn.mapping_idx }
+
 enum oxp_gamepad_mode_index {
 	OXP_GP_MODE_XINPUT = 0x00,
 	OXP_GP_MODE_DEBUG = 0x03,
@@ -153,6 +292,10 @@ struct oxp_gen_2_rgb_report {
 	u8 effect;
 } __packed;
 
+struct oxp_attr {
+	u8 index;
+};
+
 static u16 get_usage_page(struct hid_device *hdev)
 {
 	return hdev->collection[0].usage >> 16;
@@ -194,12 +337,19 @@ static int oxp_hid_raw_event_gen_1(struct hid_device *hdev,
 }
 
 static int oxp_gen_2_property_out(enum oxp_function_index fid, u8 *data, u8 data_size);
+static int oxp_set_buttons(void);
 
 static void oxp_mcu_init_fn(struct work_struct *work)
 {
 	u8 gp_mode_data[3] = { OXP_GP_MODE_DEBUG, 0x01, 0x02 };
 	int ret;
 
+	/* Re-apply the button mapping */
+	ret = oxp_set_buttons();
+	if (ret)
+		dev_err(&drvdata.hdev->dev,
+			"Error: Failed to set button mapping: %i\n", ret);
+
 	/* Cycle the gamepad mode */
 	ret = oxp_gen_2_property_out(OXP_FID_GEN2_TOGGLE_MODE, gp_mode_data, 3);
 	if (ret)
@@ -395,9 +545,410 @@ static ssize_t gamepad_mode_index_show(struct device *dev,
 }
 static DEVICE_ATTR_RO(gamepad_mode_index);
 
+static void oxp_set_defaults_bmap_1(struct oxp_bmap_page_1 *bmap)
+{
+	bmap->btn_a.button_idx = BUTTON_A;
+	bmap->btn_a.mapping_idx = 0;
+	bmap->btn_b.button_idx = BUTTON_B;
+	bmap->btn_b.mapping_idx = 1;
+	bmap->btn_x.button_idx = BUTTON_X;
+	bmap->btn_x.mapping_idx = 2;
+	bmap->btn_y.button_idx = BUTTON_Y;
+	bmap->btn_y.mapping_idx = 3;
+	bmap->btn_lb.button_idx = BUTTON_LB;
+	bmap->btn_lb.mapping_idx = 4;
+	bmap->btn_rb.button_idx = BUTTON_RB;
+	bmap->btn_rb.mapping_idx = 5;
+	bmap->btn_lt.button_idx = BUTTON_LT;
+	bmap->btn_lt.mapping_idx = 6;
+	bmap->btn_rt.button_idx = BUTTON_RT;
+	bmap->btn_rt.mapping_idx = 7;
+	bmap->btn_start.button_idx = BUTTON_START;
+	bmap->btn_start.mapping_idx = 8;
+}
+
+static void oxp_set_defaults_bmap_2(struct oxp_bmap_page_2 *bmap)
+{
+	bmap->btn_select.button_idx = BUTTON_SELECT;
+	bmap->btn_select.mapping_idx = 9;
+	bmap->btn_l3.button_idx = BUTTON_L3;
+	bmap->btn_l3.mapping_idx = 10;
+	bmap->btn_r3.button_idx = BUTTON_R3;
+	bmap->btn_r3.mapping_idx = 11;
+	bmap->btn_dup.button_idx = BUTTON_DUP;
+	bmap->btn_dup.mapping_idx = 12;
+	bmap->btn_ddown.button_idx = BUTTON_DDOWN;
+	bmap->btn_ddown.mapping_idx = 13;
+	bmap->btn_dleft.button_idx = BUTTON_DLEFT;
+	bmap->btn_dleft.mapping_idx = 14;
+	bmap->btn_dright.button_idx = BUTTON_DRIGHT;
+	bmap->btn_dright.mapping_idx = 15;
+	bmap->btn_m1.button_idx = BUTTON_M1;
+	bmap->btn_m1.mapping_idx = 48; /* KEY_F15 */
+	bmap->btn_m2.button_idx = BUTTON_M2;
+	bmap->btn_m2.mapping_idx = 49; /* KEY_F16 */
+}
+
+static void oxp_page_fill_data(char *buf, const struct oxp_button_idx *buttons,
+			       size_t len)
+{
+	size_t offset_increment = sizeof(u8) + sizeof(struct oxp_button_idx);
+	size_t offset = 5;
+	unsigned int i;
+
+	for (i = 0; i < len; i++, offset += offset_increment) {
+		buf[offset] = (u8)buttons[i].button_idx;
+		memcpy(buf + offset + 1,
+		       &oxp_button_table[buttons[i].mapping_idx].data,
+		       sizeof(struct oxp_button_data));
+	}
+}
+
+static int oxp_set_buttons(void)
+{
+	u8 page_1[59] = { 0x02, 0x38, 0x20, 0x01, 0x01 };
+	u8 page_2[59] = { 0x02, 0x38, 0x20, 0x02, 0x01 };
+	u16 up = get_usage_page(drvdata.hdev);
+	int ret;
+
+	if (up != GEN2_USAGE_PAGE)
+		return -EINVAL;
+
+	const struct oxp_button_idx p1[] = {
+		OXP_FILL_PAGE_SLOT(drvdata.bmap_1, btn_a),
+		OXP_FILL_PAGE_SLOT(drvdata.bmap_1, btn_b),
+		OXP_FILL_PAGE_SLOT(drvdata.bmap_1, btn_x),
+		OXP_FILL_PAGE_SLOT(drvdata.bmap_1, btn_y),
+		OXP_FILL_PAGE_SLOT(drvdata.bmap_1, btn_lb),
+		OXP_FILL_PAGE_SLOT(drvdata.bmap_1, btn_rb),
+		OXP_FILL_PAGE_SLOT(drvdata.bmap_1, btn_lt),
+		OXP_FILL_PAGE_SLOT(drvdata.bmap_1, btn_rt),
+		OXP_FILL_PAGE_SLOT(drvdata.bmap_1, btn_start),
+	};
+
+	const struct oxp_button_idx p2[] = {
+		OXP_FILL_PAGE_SLOT(drvdata.bmap_2, btn_select),
+		OXP_FILL_PAGE_SLOT(drvdata.bmap_2, btn_l3),
+		OXP_FILL_PAGE_SLOT(drvdata.bmap_2, btn_r3),
+		OXP_FILL_PAGE_SLOT(drvdata.bmap_2, btn_dup),
+		OXP_FILL_PAGE_SLOT(drvdata.bmap_2, btn_ddown),
+		OXP_FILL_PAGE_SLOT(drvdata.bmap_2, btn_dleft),
+		OXP_FILL_PAGE_SLOT(drvdata.bmap_2, btn_dright),
+		OXP_FILL_PAGE_SLOT(drvdata.bmap_2, btn_m1),
+		OXP_FILL_PAGE_SLOT(drvdata.bmap_2, btn_m2),
+	};
+
+	oxp_page_fill_data(page_1, p1, ARRAY_SIZE(p1));
+	oxp_page_fill_data(page_2, p2, ARRAY_SIZE(p2));
+
+	ret = oxp_gen_2_property_out(OXP_FID_GEN2_KEY_STATE, page_1, ARRAY_SIZE(page_1));
+	if (ret)
+		return ret;
+
+	return oxp_gen_2_property_out(OXP_FID_GEN2_KEY_STATE, page_2, ARRAY_SIZE(page_2));
+}
+
+static void oxp_reset_buttons(void)
+{
+	oxp_set_defaults_bmap_1(drvdata.bmap_1);
+	oxp_set_defaults_bmap_2(drvdata.bmap_2);
+}
+
+static ssize_t reset_buttons_store(struct device *dev,
+				   struct device_attribute *attr, const char *buf,
+				   size_t count)
+{
+	int val, ret;
+
+	ret = kstrtoint(buf, 10, &val);
+	if (ret)
+		return ret;
+
+	if (val != 1)
+		return -EINVAL;
+
+	oxp_reset_buttons();
+	ret = oxp_set_buttons();
+	if (ret)
+		return ret;
+
+	return count;
+}
+static DEVICE_ATTR_WO(reset_buttons);
+
+static void oxp_btn_queue_fn(struct work_struct *work)
+{
+	int ret;
+
+	ret = oxp_set_buttons();
+	if (ret)
+		dev_err(&drvdata.hdev->dev,
+			"Error: Failed to write button mapping: %i\n", ret);
+}
+
+static DECLARE_DELAYED_WORK(oxp_btn_queue, oxp_btn_queue_fn);
+
+static int oxp_button_idx_from_str(const char *buf)
+{
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(oxp_button_table); i++)
+		if (sysfs_streq(buf, oxp_button_table[i].name))
+			return i;
+
+	return -EINVAL;
+}
+
+static ssize_t map_button_store(struct device *dev,
+				struct device_attribute *attr, const char *buf,
+				size_t count, u8 index)
+{
+	int idx;
+
+	idx = oxp_button_idx_from_str(buf);
+	if (idx < 0)
+		return idx;
+
+	switch (index) {
+	case BUTTON_A:
+		drvdata.bmap_1->btn_a.mapping_idx = idx;
+		break;
+	case BUTTON_B:
+		drvdata.bmap_1->btn_b.mapping_idx = idx;
+		break;
+	case BUTTON_X:
+		drvdata.bmap_1->btn_x.mapping_idx = idx;
+		break;
+	case BUTTON_Y:
+		drvdata.bmap_1->btn_y.mapping_idx = idx;
+		break;
+	case BUTTON_LB:
+		drvdata.bmap_1->btn_lb.mapping_idx = idx;
+		break;
+	case BUTTON_RB:
+		drvdata.bmap_1->btn_rb.mapping_idx = idx;
+		break;
+	case BUTTON_LT:
+		drvdata.bmap_1->btn_lt.mapping_idx = idx;
+		break;
+	case BUTTON_RT:
+		drvdata.bmap_1->btn_rt.mapping_idx = idx;
+		break;
+	case BUTTON_START:
+		drvdata.bmap_1->btn_start.mapping_idx = idx;
+		break;
+	case BUTTON_SELECT:
+		drvdata.bmap_2->btn_select.mapping_idx = idx;
+		break;
+	case BUTTON_L3:
+		drvdata.bmap_2->btn_l3.mapping_idx = idx;
+		break;
+	case BUTTON_R3:
+		drvdata.bmap_2->btn_r3.mapping_idx = idx;
+		break;
+	case BUTTON_DUP:
+		drvdata.bmap_2->btn_dup.mapping_idx = idx;
+		break;
+	case BUTTON_DDOWN:
+		drvdata.bmap_2->btn_ddown.mapping_idx = idx;
+		break;
+	case BUTTON_DLEFT:
+		drvdata.bmap_2->btn_dleft.mapping_idx = idx;
+		break;
+	case BUTTON_DRIGHT:
+		drvdata.bmap_2->btn_dright.mapping_idx = idx;
+		break;
+	case BUTTON_M1:
+		drvdata.bmap_2->btn_m1.mapping_idx = idx;
+		break;
+	case BUTTON_M2:
+		drvdata.bmap_2->btn_m2.mapping_idx = idx;
+		break;
+	default:
+		return -EINVAL;
+	}
+	mod_delayed_work(system_wq, &oxp_btn_queue, msecs_to_jiffies(50));
+	return count;
+}
+
+static ssize_t map_button_show(struct device *dev,
+			       struct device_attribute *attr, char *buf,
+			       u8 index)
+{
+	u8 i;
+
+	switch (index) {
+	case BUTTON_A:
+		i = drvdata.bmap_1->btn_a.mapping_idx;
+		break;
+	case BUTTON_B:
+		i = drvdata.bmap_1->btn_b.mapping_idx;
+		break;
+	case BUTTON_X:
+		i = drvdata.bmap_1->btn_x.mapping_idx;
+		break;
+	case BUTTON_Y:
+		i = drvdata.bmap_1->btn_y.mapping_idx;
+		break;
+	case BUTTON_LB:
+		i = drvdata.bmap_1->btn_lb.mapping_idx;
+		break;
+	case BUTTON_RB:
+		i = drvdata.bmap_1->btn_rb.mapping_idx;
+		break;
+	case BUTTON_LT:
+		i = drvdata.bmap_1->btn_lt.mapping_idx;
+		break;
+	case BUTTON_RT:
+		i = drvdata.bmap_1->btn_rt.mapping_idx;
+		break;
+	case BUTTON_START:
+		i = drvdata.bmap_1->btn_start.mapping_idx;
+		break;
+	case BUTTON_SELECT:
+		i = drvdata.bmap_2->btn_select.mapping_idx;
+		break;
+	case BUTTON_L3:
+		i = drvdata.bmap_2->btn_l3.mapping_idx;
+		break;
+	case BUTTON_R3:
+		i = drvdata.bmap_2->btn_r3.mapping_idx;
+		break;
+	case BUTTON_DUP:
+		i = drvdata.bmap_2->btn_dup.mapping_idx;
+		break;
+	case BUTTON_DDOWN:
+		i = drvdata.bmap_2->btn_ddown.mapping_idx;
+		break;
+	case BUTTON_DLEFT:
+		i = drvdata.bmap_2->btn_dleft.mapping_idx;
+		break;
+	case BUTTON_DRIGHT:
+		i = drvdata.bmap_2->btn_dright.mapping_idx;
+		break;
+	case BUTTON_M1:
+		i = drvdata.bmap_2->btn_m1.mapping_idx;
+		break;
+	case BUTTON_M2:
+		i = drvdata.bmap_2->btn_m2.mapping_idx;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	if (i >= ARRAY_SIZE(oxp_button_table))
+		return -EINVAL;
+
+	return sysfs_emit(buf, "%s\n", oxp_button_table[i].name);
+}
+
+static ssize_t button_mapping_options_show(struct device *dev,
+					   struct device_attribute *attr, char *buf)
+{
+	ssize_t count = 0;
+	unsigned int i;
+
+	for (i = 0; i < ARRAY_SIZE(oxp_button_table); i++)
+		count += sysfs_emit_at(buf, count, "%s ", oxp_button_table[i].name);
+
+	if (count)
+		buf[count - 1] = '\n';
+
+	return count;
+}
+static DEVICE_ATTR_RO(button_mapping_options);
+
+#define OXP_DEVICE_ATTR_RW(_name, _group)                                     \
+	static ssize_t _name##_store(struct device *dev,                      \
+				     struct device_attribute *attr,           \
+				     const char *buf, size_t count)           \
+	{                                                                     \
+		return _group##_store(dev, attr, buf, count, _name.index);    \
+	}                                                                     \
+	static ssize_t _name##_show(struct device *dev,                       \
+				    struct device_attribute *attr, char *buf) \
+	{                                                                     \
+		return _group##_show(dev, attr, buf, _name.index);            \
+	}                                                                     \
+	static DEVICE_ATTR_RW(_name)
+
+static struct oxp_attr button_a = { BUTTON_A };
+OXP_DEVICE_ATTR_RW(button_a, map_button);
+
+static struct oxp_attr button_b = { BUTTON_B };
+OXP_DEVICE_ATTR_RW(button_b, map_button);
+
+static struct oxp_attr button_x = { BUTTON_X };
+OXP_DEVICE_ATTR_RW(button_x, map_button);
+
+static struct oxp_attr button_y = { BUTTON_Y };
+OXP_DEVICE_ATTR_RW(button_y, map_button);
+
+static struct oxp_attr button_lb = { BUTTON_LB };
+OXP_DEVICE_ATTR_RW(button_lb, map_button);
+
+static struct oxp_attr button_rb = { BUTTON_RB };
+OXP_DEVICE_ATTR_RW(button_rb, map_button);
+
+static struct oxp_attr button_lt = { BUTTON_LT };
+OXP_DEVICE_ATTR_RW(button_lt, map_button);
+
+static struct oxp_attr button_rt = { BUTTON_RT };
+OXP_DEVICE_ATTR_RW(button_rt, map_button);
+
+static struct oxp_attr button_start = { BUTTON_START };
+OXP_DEVICE_ATTR_RW(button_start, map_button);
+
+static struct oxp_attr button_select = { BUTTON_SELECT };
+OXP_DEVICE_ATTR_RW(button_select, map_button);
+
+static struct oxp_attr button_l3 = { BUTTON_L3 };
+OXP_DEVICE_ATTR_RW(button_l3, map_button);
+
+static struct oxp_attr button_r3 = { BUTTON_R3 };
+OXP_DEVICE_ATTR_RW(button_r3, map_button);
+
+static struct oxp_attr button_d_up = { BUTTON_DUP };
+OXP_DEVICE_ATTR_RW(button_d_up, map_button);
+
+static struct oxp_attr button_d_down = { BUTTON_DDOWN };
+OXP_DEVICE_ATTR_RW(button_d_down, map_button);
+
+static struct oxp_attr button_d_left = { BUTTON_DLEFT };
+OXP_DEVICE_ATTR_RW(button_d_left, map_button);
+
+static struct oxp_attr button_d_right = { BUTTON_DRIGHT };
+OXP_DEVICE_ATTR_RW(button_d_right, map_button);
+
+static struct oxp_attr button_m1 = { BUTTON_M1 };
+OXP_DEVICE_ATTR_RW(button_m1, map_button);
+
+static struct oxp_attr button_m2 = { BUTTON_M2 };
+OXP_DEVICE_ATTR_RW(button_m2, map_button);
+
 static struct attribute *oxp_cfg_attrs[] = {
+	&dev_attr_button_a.attr,
+	&dev_attr_button_b.attr,
+	&dev_attr_button_d_down.attr,
+	&dev_attr_button_d_left.attr,
+	&dev_attr_button_d_right.attr,
+	&dev_attr_button_d_up.attr,
+	&dev_attr_button_l3.attr,
+	&dev_attr_button_lb.attr,
+	&dev_attr_button_lt.attr,
+	&dev_attr_button_m1.attr,
+	&dev_attr_button_m2.attr,
+	&dev_attr_button_mapping_options.attr,
+	&dev_attr_button_r3.attr,
+	&dev_attr_button_rb.attr,
+	&dev_attr_button_rt.attr,
+	&dev_attr_button_select.attr,
+	&dev_attr_button_start.attr,
+	&dev_attr_button_x.attr,
+	&dev_attr_button_y.attr,
 	&dev_attr_gamepad_mode.attr,
 	&dev_attr_gamepad_mode_index.attr,
+	&dev_attr_reset_buttons.attr,
 	NULL,
 };
 
@@ -823,6 +1374,8 @@ static bool oxp_hybrid_mcu_device(void)
 
 static int oxp_cfg_probe(struct hid_device *hdev, u16 up)
 {
+	struct oxp_bmap_page_1 *bmap_1;
+	struct oxp_bmap_page_2 *bmap_2;
 	int ret;
 
 	hid_set_drvdata(hdev, &drvdata);
@@ -855,9 +1408,21 @@ static int oxp_cfg_probe(struct hid_device *hdev, u16 up)
 		return 0;
 
 skip_rgb:
-	mod_delayed_work(system_wq, &oxp_mcu_init, msecs_to_jiffies(50));
-
+	bmap_1 = devm_kzalloc(&hdev->dev, sizeof(struct oxp_bmap_page_1), GFP_KERNEL);
+	if (!bmap_1)
+		return dev_err_probe(&hdev->dev, -ENOMEM,
+				     "Unable to allocate button map page 1\n");
+
+	bmap_2 = devm_kzalloc(&hdev->dev, sizeof(struct oxp_bmap_page_2), GFP_KERNEL);
+	if (!bmap_2)
+		return dev_err_probe(&hdev->dev, -ENOMEM,
+				     "Unable to allocate button map page 2\n");
+
+	drvdata.bmap_1 = bmap_1;
+	drvdata.bmap_2 = bmap_2;
+	oxp_reset_buttons();
 	drvdata.gamepad_mode = OXP_GP_MODE_XINPUT;
+	mod_delayed_work(system_wq, &oxp_mcu_init, msecs_to_jiffies(50));
 
 	ret = devm_device_add_group(&hdev->dev, &oxp_cfg_attrs_group);
 	if (ret)
-- 
2.53.0


^ permalink raw reply related

* [GIT PULL] Documentation for 7.1
From: Jonathan Corbet @ 2026-04-12 21:51 UTC (permalink / raw)
  To: Linus Torvalds; +Cc: linux-kernel, linux-doc, Shuah Khan

The following changes since commit 6de23f81a5e08be8fbf5e8d7e9febc72a5b5f27f:

  Linux 7.0-rc1 (2026-02-22 13:18:59 -0800)

are available in the Git repository at:

  git://git.kernel.org/pub/scm/linux/kernel/git/docs/linux.git tags/docs-7.1

for you to fetch changes up to 40a3f6c5e2604ff7c007da6628120529569e950c:

  Documentation: core-api: real-time: correct spelling (2026-04-12 13:11:50 -0600)

----------------------------------------------------------------
A busier cycle than I had expected for docs, including:

- Translations: some overdue updates to the Japanese translations, Chinese
  translations for some of the Rust documentation, and the beginnings of a
  Portuguese translation.

- New documents covering CPU isolation, managed interrupts, debugging
  Python gbb scripts, and more.

- More tooling work from Mauro, reducing docs-build warnings, adding self
  tests, improving man-page output, bringing in a proper C tokenizer to
  replace (some of) the mess of kernel-doc regexes, and more.

- Update and synchronize changes.rst and scripts/ver_linux, and put both
  into alphabetical order.

...and a long list of documentation updates, typo fixes, and general
improvements.

Expect a simple conflict with the Rust tree due to the sorting of
Documentation/process/changes.rst.

----------------------------------------------------------------
Akira Yokosawa (1):
      docs/ja_JP: submitting-patches: Amend "Describe your changes"

Akiyoshi Kurita (2):
      docs: ja_JP: process: translate first half of 'Describe your changes'
      docs: ja_JP: process: translate second half of 'Describe your changes'

Ariful Islam Shoikot (3):
      Documentation: process: backporting: fix missing subject
      Documentation/mm: Fix typo in NUMA paragraph
      Documentation/mm/hwpoison.rst: fix typos and grammar.

Askar Safin (1):
      doc: early_userspace_support.rst: trivial fix: directory -> file

Ben Guo (4):
      docs/zh_CN: update rust/arch-support.rst translation
      docs/zh_CN: update rust/coding-guidelines.rst translation
      docs/zh_CN: update rust/quick-start.rst translation
      docs/zh_CN: update rust/index.rst translation

Bhaskar Chowdhury (1):
      Doc: dev-tools: Added Gentoo in the Operating system list under Getting Coccinelle

Costa Shulyupin (2):
      Documentation/rtla: Add hwnoise to main page
      Documentation/rtla: Convert links to RST format

Daniel Castro (1):
      docs: pt_BR: translate process/1.Intro.rst

Daniel Pereira (8):
      docs: pt_BR: Add initial Portuguese translation
      docs: pt_BR: translate process/changes.rst
      docs: pt_BR: Add translation for maintainer-handbooks
      docs: pt_BR: add netdev and maintainer handbook translations
      docs/pt_BR: translation of maintainer-soc.rst
      docs/pt_BR: translation of maintainer-soc-clean-dts.rst
      docs: pt_BR: Add translation for process/conclave.rst
      docs: pt_BR: Add translation for KVM x86 maintainer guide

Daniel Tang (1):
      docs: path-lookup: fix unrenamed WALK_GET

David Hildenbrand (Arm) (1):
      docs: filesystems: clarify KernelPageSize vs. MMUPageSize in smaps

Florian Fainelli (1):
      Documentation: Provide hints on how to debug Python GDB scripts

Frederic Weisbecker (1):
      doc: Add CPU Isolation documentation

Gleb Golovko (1):
      docs: fix typo in zoran driver documentation

Haowen Hu (1):
      docs/zh_TW: remove trailing space

Haoyang LIU (2):
      tools/docs/checktransupdate.py: add support for scanning directory
      tools/docs/checktransupdate.py: fix all issues reported by pylint

Harry Wentland (1):
      scripts/checkpatch: add Assisted-by: tag validation

Jonathan Corbet (11):
      docs: Fix an erroneous reference to sphinx.rst
      docs: remove unneeded maintainer_handbooks_main label
      Merge branch 'mauro' into docs-mw
      Merge branch 'mauro' into docs-mw
      Merge branch 'mauro' into docs-mw
      Merge branch 'mauro' into docs-mw
      Merge branch 'docs-fixes' into docs-mw
      Revert "scripts: ver_linux: expand and fix list"
      Revert "scripts/checkpatch: add Assisted-by: tag validation"
      docs: add an Assisted-by mention to submitting-patches.rst
      Merge tag 'Chinese-docs-7.1' of git://git.kernel.org/pub/scm/linux/kernel/git/alexs/linux into tmp

Julia Lawall (1):
      coccinelle: update Coccinelle URL

Kuan-Wei Chiu (2):
      docs: interconnect: Add provider APIs to documentation
      docs: interconnect: Document consumer APIs and drop outdated text

LIU Haoyang (2):
      docs/dev-tools: fix a broken URL in dev-tools/coccinelle.rst
      docs/zh_CN: fix an inconsistent statement in dev-tools/testing-overview

Li RongQing (1):
      Documentation/kernel-parameters: fix architecture alignment for pt, nopt, and nobypass

Manuel Cortez (1):
      docs: fix typos and duplicated words across documentation

Manuel Ebner (3):
      scripts: ver_linux: expand and fix list
      docs: changes/ver_linux: fix entries and add several tools
      docs: changes.rst and ver_linux: sort the lists

Matteo Martelli (1):
      sched_deadline, docs: fix cpuset example for cgroupv2

Mauro Carvalho Chehab (80):
      docs: kdoc_re: add support for groups()
      docs: kdoc_re: don't go past the end of a line
      docs: kdoc_parser: move var transformers to the beginning
      docs: kdoc_parser: don't mangle with function defines
      docs: kdoc_parser: fix variable regexes to work with size_t
      docs: kdoc_parser: fix the default_value logic for variables
      docs: kdoc_parser: don't exclude defaults from prototype
      docs: kdoc_parser: fix parser to support multi-word types
      docs: kdoc_parser: add support for LIST_HEAD
      docs: kdoc_re: better show KernRe() at documentation
      docs: kdoc_parser: move transform lists to a separate file
      docs: kdoc_re: handle strings and escape chars on NextMatch
      docs: kdoc_re: don't recompile NestedMatch regex every time
      docs: kdoc_re: Change NestedMath args replacement to \0
      docs: kdoc_re: make NestedMatch use KernRe
      docs: kdoc_parser: move nested match transforms to xforms_lists.py
      docs: kdoc_files: allows the caller to use a different xforms class
      docs: kdoc_files: document KernelFiles() ABI
      docs: sphinx-build-wrapper: better handle troff .TH markups
      docs: sphinx-build-wrapper: don't allow "/" on file names
      docs: kdoc_output: use a method to emit the .TH header
      docs: kdoc_output: remove extra attribute on man .TH headers
      docs: kdoc_output: use a single manual for everything
      docs: kdoc_output: don't use a different modulename for functions
      docs: kdoc_output: fix naming for DOC markups
      docs: kdoc_output: describe the class init parameters
      docs: kdoc_output: pick a better default for modulename
      docs: kdoc_output: Change the logic to handle man highlight
      docs: kdoc_output: add a logic to handle tables inside kernel-doc markups
      docs: kdoc_output: add support to handle code blocks
      docs: kdoc_output: better handle lists
      docs: python: add helpers to run unit tests
      unittests: add a testbench to check public/private kdoc comments
      docs: kdoc: don't add broken comments inside prototypes
      docs: kdoc: properly handle empty enum arguments
      docs: add a C tokenizer to be used by kernel-doc
      docs: kdoc: use tokenizer to handle comments on structs
      unittests: test_private: modify it to use CTokenizer directly
      unittests: test_tokenizer: check if the tokenizer works
      unittests: add a runner to execute all unittests
      docs: kdoc: create a CMatch to match nested C blocks
      tools: unittests: add tests for CMatch
      docs: c_lex: properly implement a sub() method for CMatch
      unittests: test_cmatch: add tests for sub()
      docs: kdoc: replace NestedMatch with CMatch
      docs: kdoc_re: get rid of NestedMatch class
      docs: xforms_lists: handle struct_group directly
      docs: xforms_lists: better evaluate struct_group macros
      docs: c_lex: setup a logger to report tokenizer issues
      docs: kernel-doc.rst: document private: scope propagation
      docs: kdoc: ensure that comments are dropped before calling split_struct_proto()
      docs: kdoc_parser: avoid tokenizing structs everytime
      docs: xforms_lists: use CMatch for all identifiers
      unittests: test_tokenizer: better handle mismatch error
      docs: kdoc_re: better represent long regular expressions
      docs: kdoc: add c_lex to generated documentation
      docs: kdoc_files: use a class to group config parameters
      docs: kdoc_files: move output symbols logic to kdoc_output
      docs: kdoc_item: fix initial value for parameterdesc_start_lines
      docs: kdoc_item: add support to generate a KdocItem from a dict
      docs: kdoc_item: fix a typo on sections_start_lines
      docs: unittests: add a parser to test kernel-doc parser logic
      docs: add a schema to help creating unittests for kernel-doc
      docs: add a simple kdoc-test.yaml together with a validation tool
      docs: test_kdoc_parser: add support for dynamic test creation
      docs: add a new file to write kernel-doc output to a YAML file
      docs: kernel-doc: add support to store output on a YAML file
      MAINTAINERS: update documentation scripts to add unittests
      unittests: test_kdoc_parser: add command line arg to read a YAML file
      docs: tools: include kdoc_yaml_file at documentation
      docs: kdoc_yaml_file: add a representer to make strings look nicer
      docs: kdoc-test.yaml: add more tests
      docs: kdoc_output: fix handling of simple tables
      docs: kdoc: better handle source when producing YAML output
      docs: kdoc_yaml_file: use a better name for the tests
      docs: kdoc_output: raise an error if full_proto not available for var
      docs: c_lex.py: store logger on its data
      doc tools: better handle KBUILD_VERBOSE
      tools: unittest_helper: add a quiet mode
      docs: kdoc_diff: add a helper tool to help checking kdoc regressions

Miquel Sabaté Solà (1):
      documentation: update arch features

Pranav Kharche (1):
      docs: kernel-parameters: Fix repeated word in initramfs_options entry

Praveen Kumar Singh (1):
      docs: memory-hotplug: fix typo 'fo' -> 'for' in NODE_ADDED_FIRST_MEMORY description

Randy Dunlap (5):
      linux-next: update maintainer info.
      docs: admin-guide: update tiny script for number of taint flags
      docs: kdoc_parser: handle struct member macro VIRTIO_DECLARE_FEATURES(name)
      docs: xforms_lists: ignore context analysis and lock attributes
      Docs: hid: intel-ish-hid: make long URL usable

Ricardo Ungerer (1):
      jobserver: Fix typo in docstring

Rito Rhymes (5):
      docs: use logo.svg as favicon
      docs: allow inline literals in paragraphs to wrap to prevent overflow
      docs: contain horizontal overflow in C API descriptions
      docs: allow long table reference links to wrap and prevent overflow
      docs: allow long links to wrap per character to prevent page overflow

Ryan Cheevers (1):
      docs: fix typo in housekeeping

Sebastian Andrzej Siewior (1):
      Documentation: Add managed interrupts

Shubham Chakraborty (2):
      docs: sysctl: add documentation for crypto and debug
      docs: sysctl: Add documentation for /proc/sys/xen/

Song Hongyi (1):
      docs/zh_CN: sync process/2.Process.rst with English version

Steven Rostedt (1):
      tracing: Documentation: Update histogram-design.rst for fn() handling

Sukrut Heroorkar (1):
      Documentation: core-api: real-time: correct spelling

Thorsten Blum (1):
      Documentation/maintainer-tip: Fix grammar

Thorsten Leemhuis (6):
      docs: handling-regressions: add, trim, and sort quotes from Linus
      docs: reporting-issues: mention text is best viewed rendered
      docs: reporting-issues: tweak the reference section intro
      docs: reporting-issues: add conclusion to the step-by-step guide
      docs: verify-bugs-… and quickly-build-…: improve feedback section
      docs: reporting-issues: create a proper appendix explaining specialties

Tommaso Cucinotta (1):
      sched/deadline: document new sched_getattr() feature for retrieving current parameters for DEADLINE tasks

Tomás Pando (1):
      docs: driver-api: fix 6 spelling typos in Documentation/driver-api

Wolfram Sang (1):
      Documentation: seq_file: drop 2.6 reference

Zenghui Yu (Huawei) (1):
      docs: proc: remove description of prof_cpu_mask

fangqiurong (1):
      sched/doc: Update yield_task description in sched-design-CFS

h3288824963 (1):
      Documentation: printk: Add section about avoiding lockups

 Documentation/admin-guide/bcache.rst               |    2 +-
 Documentation/admin-guide/cpu-isolation.rst        |  357 ++++
 Documentation/admin-guide/index.rst                |    1 +
 Documentation/admin-guide/kernel-parameters.txt    |    8 +-
 .../admin-guide/pm/intel-speed-select.rst          |    2 +-
 .../admin-guide/quickly-build-trimmed-linux.rst    |   14 +-
 Documentation/admin-guide/reporting-issues.rst     |  210 +--
 Documentation/admin-guide/sysctl/crypto.rst        |   47 +
 Documentation/admin-guide/sysctl/debug.rst         |   52 +
 Documentation/admin-guide/sysctl/index.rst         |    9 +-
 Documentation/admin-guide/sysctl/xen.rst           |   31 +
 Documentation/admin-guide/tainted-kernels.rst      |    2 +-
 .../verify-bugs-and-bisect-regressions.rst         |   15 +-
 Documentation/conf.py                              |    1 +
 Documentation/core-api/housekeeping.rst            |    2 +-
 Documentation/core-api/irq/index.rst               |    1 +
 Documentation/core-api/irq/managed_irq.rst         |  116 ++
 Documentation/core-api/memory-hotplug.rst          |    2 +-
 Documentation/core-api/printk-basics.rst           |   36 +
 .../core-api/real-time/architecture-porting.rst    |    2 +-
 Documentation/core-api/real-time/differences.rst   |    2 +-
 Documentation/dev-tools/coccinelle.rst             |    5 +-
 Documentation/doc-guide/kernel-doc.rst             |    6 +
 Documentation/driver-api/acpi/acpi-drivers.rst     |    2 +-
 .../driver-api/cxl/platform/acpi/cedt.rst          |    2 +-
 .../driver-api/cxl/platform/bios-and-efi.rst       |    2 +-
 Documentation/driver-api/dmaengine/pxa_dma.rst     |    2 +-
 .../early-userspace/early_userspace_support.rst    |    2 +-
 Documentation/driver-api/interconnect.rst          |   16 +-
 Documentation/driver-api/libata.rst                |    2 +-
 Documentation/driver-api/media/drivers/zoran.rst   |    2 +-
 Documentation/driver-api/pci/p2pdma.rst            |    2 +-
 .../features/perf/perf-regs/arch-support.txt       |    2 +-
 .../features/perf/perf-stackdump/arch-support.txt  |    2 +-
 Documentation/filesystems/path-lookup.rst          |    2 +-
 Documentation/filesystems/proc.rst                 |   52 +-
 Documentation/filesystems/seq_file.rst             |    2 +-
 Documentation/hid/intel-ish-hid.rst                |    4 +-
 Documentation/mm/hwpoison.rst                      |    2 +-
 Documentation/mm/numa.rst                          |    2 +-
 Documentation/process/2.Process.rst                |    2 +-
 Documentation/process/backporting.rst              |    2 +-
 Documentation/process/changes.rst                  |   58 +-
 .../process/debugging/gdb-kernel-debugging.rst     |    9 +
 Documentation/process/handling-regressions.rst     |  695 +++++---
 Documentation/process/maintainer-handbooks.rst     |    2 -
 Documentation/process/maintainer-tip.rst           |    2 +-
 Documentation/process/submitting-patches.rst       |   12 +-
 Documentation/scheduler/sched-deadline.rst         |   22 +-
 Documentation/scheduler/sched-design-CFS.rst       |    5 +-
 Documentation/sphinx-static/custom.css             |   26 +
 Documentation/sphinx/translations.py               |    1 +
 Documentation/tools/kdoc_ancillary.rst             |   17 +
 Documentation/tools/kdoc_parser.rst                |    8 +
 Documentation/tools/python.rst                     |    2 +
 Documentation/tools/rtla/rtla-hwnoise.rst          |    2 +-
 Documentation/tools/rtla/rtla-osnoise-hist.rst     |    2 +-
 Documentation/tools/rtla/rtla-osnoise-top.rst      |    2 +-
 Documentation/tools/rtla/rtla-osnoise.rst          |    2 +-
 Documentation/tools/rtla/rtla-timerlat-hist.rst    |    2 +-
 Documentation/tools/rtla/rtla-timerlat-top.rst     |    2 +-
 Documentation/tools/rtla/rtla-timerlat.rst         |    2 +-
 Documentation/tools/rtla/rtla.rst                  |    6 +-
 Documentation/tools/unittest.rst                   |   24 +
 Documentation/trace/histogram-design.rst           |   20 +-
 Documentation/translations/index.rst               |    4 +-
 .../translations/it_IT/process/4.Coding.rst        |    3 +-
 .../ja_JP/process/submitting-patches.rst           |  129 +-
 .../translations/pt_BR/disclaimer-pt_BR.rst        |   11 +
 Documentation/translations/pt_BR/index.rst         |   77 +
 .../translations/pt_BR/process/1.Intro.rst         |  269 ++++
 .../translations/pt_BR/process/changes.rst         |  576 +++++++
 .../translations/pt_BR/process/conclave.rst        |   40 +
 Documentation/translations/pt_BR/process/howto.rst |  637 ++++++++
 .../pt_BR/process/maintainer-handbooks.rst         |   18 +
 .../pt_BR/process/maintainer-kvm-x86.rst           |  435 +++++
 .../pt_BR/process/maintainer-netdev.rst            |  596 +++++++
 .../pt_BR/process/maintainer-soc-clean-dts.rst     |   28 +
 .../translations/pt_BR/process/maintainer-soc.rst  |  222 +++
 .../translations/sp_SP/process/4.Coding.rst        |    3 +-
 .../sp_SP/process/submitting-patches.rst           |    2 +-
 .../sp_SP/scheduler/sched-design-CFS.rst           |    6 +-
 .../zh_CN/dev-tools/testing-overview.rst           |    2 +-
 .../translations/zh_CN/process/2.Process.rst       |   56 +-
 .../translations/zh_CN/process/4.Coding.rst        |    2 +-
 .../translations/zh_CN/rust/arch-support.rst       |    9 +-
 .../translations/zh_CN/rust/coding-guidelines.rst  |  262 ++-
 Documentation/translations/zh_CN/rust/index.rst    |   17 -
 .../translations/zh_CN/rust/quick-start.rst        |  190 ++-
 .../zh_CN/scheduler/sched-design-CFS.rst           |    4 +-
 .../translations/zh_TW/admin-guide/README.rst      |    2 +-
 .../translations/zh_TW/process/4.Coding.rst        |    2 +-
 MAINTAINERS                                        |   10 +-
 scripts/ver_linux                                  |   63 +-
 tools/docs/checktransupdate.py                     |   22 +-
 tools/docs/kdoc_diff                               |  508 ++++++
 tools/docs/kernel-doc                              |   49 +-
 tools/docs/sphinx-build-wrapper                    |   17 +-
 tools/lib/python/jobserver.py                      |    4 +-
 tools/lib/python/kdoc/c_lex.py                     |  662 ++++++++
 tools/lib/python/kdoc/kdoc_files.py                |  151 +-
 tools/lib/python/kdoc/kdoc_item.py                 |   45 +-
 tools/lib/python/kdoc/kdoc_output.py               |  324 +++-
 tools/lib/python/kdoc/kdoc_parser.py               |  292 ++--
 tools/lib/python/kdoc/kdoc_re.py                   |  205 +--
 tools/lib/python/kdoc/kdoc_yaml_file.py            |  178 ++
 tools/lib/python/kdoc/xforms_lists.py              |  153 ++
 tools/lib/python/unittest_helper.py                |  363 +++++
 tools/unittests/kdoc-test-schema.yaml              |  156 ++
 tools/unittests/kdoc-test.yaml                     | 1698 ++++++++++++++++++++
 tools/unittests/run.py                             |   17 +
 tools/unittests/test_cmatch.py                     |  821 ++++++++++
 tools/unittests/test_kdoc_parser.py                |  560 +++++++
 tools/unittests/test_kdoc_test_schema.py           |   94 ++
 tools/unittests/test_tokenizer.py                  |  469 ++++++
 115 files changed, 11356 insertions(+), 1057 deletions(-)
 create mode 100644 Documentation/admin-guide/cpu-isolation.rst
 create mode 100644 Documentation/admin-guide/sysctl/crypto.rst
 create mode 100644 Documentation/admin-guide/sysctl/debug.rst
 create mode 100644 Documentation/admin-guide/sysctl/xen.rst
 create mode 100644 Documentation/core-api/irq/managed_irq.rst
 create mode 100644 Documentation/tools/unittest.rst
 create mode 100644 Documentation/translations/pt_BR/disclaimer-pt_BR.rst
 create mode 100644 Documentation/translations/pt_BR/index.rst
 create mode 100644 Documentation/translations/pt_BR/process/1.Intro.rst
 create mode 100644 Documentation/translations/pt_BR/process/changes.rst
 create mode 100644 Documentation/translations/pt_BR/process/conclave.rst
 create mode 100644 Documentation/translations/pt_BR/process/howto.rst
 create mode 100644 Documentation/translations/pt_BR/process/maintainer-handbooks.rst
 create mode 100644 Documentation/translations/pt_BR/process/maintainer-kvm-x86.rst
 create mode 100644 Documentation/translations/pt_BR/process/maintainer-netdev.rst
 create mode 100644 Documentation/translations/pt_BR/process/maintainer-soc-clean-dts.rst
 create mode 100644 Documentation/translations/pt_BR/process/maintainer-soc.rst
 create mode 100755 tools/docs/kdoc_diff
 create mode 100644 tools/lib/python/kdoc/c_lex.py
 create mode 100644 tools/lib/python/kdoc/kdoc_yaml_file.py
 create mode 100644 tools/lib/python/kdoc/xforms_lists.py
 create mode 100755 tools/lib/python/unittest_helper.py
 create mode 100644 tools/unittests/kdoc-test-schema.yaml
 create mode 100644 tools/unittests/kdoc-test.yaml
 create mode 100755 tools/unittests/run.py
 create mode 100755 tools/unittests/test_cmatch.py
 create mode 100755 tools/unittests/test_kdoc_parser.py
 create mode 100755 tools/unittests/test_kdoc_test_schema.py
 create mode 100755 tools/unittests/test_tokenizer.py

^ permalink raw reply

* Re: [PATCH mm-unstable v15 06/13] mm/khugepaged: skip collapsing mTHP to smaller orders
From: Nico Pache @ 2026-04-13  1:38 UTC (permalink / raw)
  To: David Hildenbrand (Arm)
  Cc: linux-doc, linux-kernel, linux-mm, linux-trace-kernel, aarcange,
	akpm, anshuman.khandual, apopple, baohua, baolin.wang, byungchul,
	catalin.marinas, cl, corbet, dave.hansen, dev.jain, gourry,
	hannes, hughd, jack, jackmanb, jannh, jglisse, joshua.hahnjy, kas,
	lance.yang, Liam.Howlett, lorenzo.stoakes, mathieu.desnoyers,
	matthew.brost, mhiramat, mhocko, peterx, pfalcato, rakie.kim,
	raquini, rdunlap, richard.weiyang, rientjes, rostedt, rppt,
	ryan.roberts, shivankg, sunnanyong, surenb, thomas.hellstrom,
	tiwai, usamaarif642, vbabka, vishal.moola, wangkefeng.wang, will,
	willy, yang, ying.huang, ziy, zokeefe
In-Reply-To: <94290ad5-f63d-4fa7-a898-dcfe6cd9998b@kernel.org>

On Thu, Mar 12, 2026 at 3:00 PM David Hildenbrand (Arm)
<david@kernel.org> wrote:
>
> On 2/26/26 04:24, Nico Pache wrote:
> > khugepaged may try to collapse a mTHP to a smaller mTHP, resulting in
> > some pages being unmapped. Skip these cases until we have a way to check
> > if its ok to collapse to a smaller mTHP size (like in the case of a
> > partially mapped folio).
> >
> > This patch is inspired by Dev Jain's work on khugepaged mTHP support [1].
> >
> > [1] https://lore.kernel.org/lkml/20241216165105.56185-11-dev.jain@arm.com/
> >
> > Reviewed-by: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
> > Reviewed-by: Baolin Wang <baolin.wang@linux.alibaba.com>
> > Co-developed-by: Dev Jain <dev.jain@arm.com>
> > Signed-off-by: Dev Jain <dev.jain@arm.com>
> > Signed-off-by: Nico Pache <npache@redhat.com>
> > ---
> >  mm/khugepaged.c | 8 ++++++++
> >  1 file changed, 8 insertions(+)
> >
> > diff --git a/mm/khugepaged.c b/mm/khugepaged.c
> > index fb3ba8fe5a6c..c739f26dd61e 100644
> > --- a/mm/khugepaged.c
> > +++ b/mm/khugepaged.c
> > @@ -638,6 +638,14 @@ static enum scan_result __collapse_huge_page_isolate(struct vm_area_struct *vma,
> >                               goto out;
> >                       }
> >               }
> > +             /*
> > +              * TODO: In some cases of partially-mapped folios, we'd actually
> > +              * want to collapse.
> > +              */
> > +             if (!is_pmd_order(order) && folio_order(folio) >= order) {
> > +                     result = SCAN_PTE_MAPPED_HUGEPAGE;
> > +                     goto out;
> > +             }
> >
> >               if (folio_test_large(folio)) {
> >                       struct folio *f;
>
> Why aren't we doing the same in hpage_collapse_scan_pmd() ?

We can't do this in the scan phase because we are not yet aware of the
order we want to collapse to.

The scan phase builds the bitmap (if mthp sizes are enabled). I tried
to think if there were any similar checks we could perform; the only
one that came to mind is whether the current folio size exceeds the
highest enabled folio size. Is that worth checking?

Cheers,
-- Nico

>
> --
> Cheers,
>
> David
>


^ permalink raw reply

* Re: [PATCH mm-unstable v15 07/13] mm/khugepaged: add per-order mTHP collapse failure statistics
From: Nico Pache @ 2026-04-13  2:48 UTC (permalink / raw)
  To: Lorenzo Stoakes (Oracle)
  Cc: linux-doc, linux-kernel, linux-mm, linux-trace-kernel, aarcange,
	akpm, anshuman.khandual, apopple, baohua, baolin.wang, byungchul,
	catalin.marinas, cl, corbet, dave.hansen, david, dev.jain, gourry,
	hannes, hughd, jack, jackmanb, jannh, jglisse, joshua.hahnjy, kas,
	lance.yang, Liam.Howlett, lorenzo.stoakes, mathieu.desnoyers,
	matthew.brost, mhiramat, mhocko, peterx, pfalcato, rakie.kim,
	raquini, rdunlap, richard.weiyang, rientjes, rostedt, rppt,
	ryan.roberts, shivankg, sunnanyong, surenb, thomas.hellstrom,
	tiwai, usamaarif642, vbabka, vishal.moola, wangkefeng.wang, will,
	willy, yang, ying.huang, ziy, zokeefe
In-Reply-To: <c832d503-8b8c-487a-b61a-df74a3057308@lucifer.local>

On Tue, Mar 17, 2026 at 11:05 AM Lorenzo Stoakes (Oracle)
<ljs@kernel.org> wrote:
>
> On Wed, Feb 25, 2026 at 08:25:04PM -0700, Nico Pache wrote:
> > Add three new mTHP statistics to track collapse failures for different
> > orders when encountering swap PTEs, excessive none PTEs, and shared PTEs:
> >
> > - collapse_exceed_swap_pte: Increment when mTHP collapse fails due to swap
> >       PTEs
> >
> > - collapse_exceed_none_pte: Counts when mTHP collapse fails due to
> >       exceeding the none PTE threshold for the given order
> >
> > - collapse_exceed_shared_pte: Counts when mTHP collapse fails due to shared
> >       PTEs
> >
> > These statistics complement the existing THP_SCAN_EXCEED_* events by
> > providing per-order granularity for mTHP collapse attempts. The stats are
> > exposed via sysfs under
> > `/sys/kernel/mm/transparent_hugepage/hugepages-*/stats/` for each
> > supported hugepage size.
> >
> > As we currently dont support collapsing mTHPs that contain a swap or
> > shared entry, those statistics keep track of how often we are
> > encountering failed mTHP collapses due to these restrictions.
> >
> > Reviewed-by: Baolin Wang <baolin.wang@linux.alibaba.com>
> > Signed-off-by: Nico Pache <npache@redhat.com>
> > ---
> >  Documentation/admin-guide/mm/transhuge.rst | 24 ++++++++++++++++++++++
> >  include/linux/huge_mm.h                    |  3 +++
> >  mm/huge_memory.c                           |  7 +++++++
> >  mm/khugepaged.c                            | 16 ++++++++++++---
> >  4 files changed, 47 insertions(+), 3 deletions(-)
> >
> > diff --git a/Documentation/admin-guide/mm/transhuge.rst b/Documentation/admin-guide/mm/transhuge.rst
> > index c51932e6275d..eebb1f6bbc6c 100644
> > --- a/Documentation/admin-guide/mm/transhuge.rst
> > +++ b/Documentation/admin-guide/mm/transhuge.rst
> > @@ -714,6 +714,30 @@ nr_anon_partially_mapped
> >         an anonymous THP as "partially mapped" and count it here, even though it
> >         is not actually partially mapped anymore.
> >
> > +collapse_exceed_none_pte
> > +       The number of collapse attempts that failed due to exceeding the
> > +       max_ptes_none threshold. For mTHP collapse, Currently only max_ptes_none
> > +       values of 0 and (HPAGE_PMD_NR - 1) are supported. Any other value will
> > +       emit a warning and no mTHP collapse will be attempted. khugepaged will
>
> It's weird to document this here but not elsewhere in the document? I mean I
> made this comment on the documentation patch also.

I can add some more documentation but TBH I don't really know where or
what else to put. I checked a few of these other per-mTHP stats, and
none are referenced elsewhere. if anything these 3 additions are the
best documented ones.

>
> Not sure if I missed you adding it to another bit of the docs? :)
>
> > +       try to collapse to the largest enabled (m)THP size; if it fails, it will
> > +       try the next lower enabled mTHP size. This counter records the number of
> > +       times a collapse attempt was skipped for exceeding the max_ptes_none
> > +       threshold, and khugepaged will move on to the next available mTHP size.
> > +
> > +collapse_exceed_swap_pte
> > +       The number of anonymous mTHP PTE ranges which were unable to collapse due
> > +       to containing at least one swap PTE. Currently khugepaged does not
> > +       support collapsing mTHP regions that contain a swap PTE. This counter can
> > +       be used to monitor the number of khugepaged mTHP collapses that failed
> > +       due to the presence of a swap PTE.
> > +
> > +collapse_exceed_shared_pte
> > +       The number of anonymous mTHP PTE ranges which were unable to collapse due
> > +       to containing at least one shared PTE. Currently khugepaged does not
> > +       support collapsing mTHP PTE ranges that contain a shared PTE. This
> > +       counter can be used to monitor the number of khugepaged mTHP collapses
> > +       that failed due to the presence of a shared PTE.
>
> All of these talk about 'ranges' that could be of any size. Are these useful
> metrics? Counting a bunch of failures and not knowing if they are 256 KB
> failures or 16 KB failures or whatever is maybe not so useful information?

These are per-mTHP size statistics. If you look at the surrounding
examples and docs this all makes more sense.

>
> Also, from the code, aren't you treating PMD events the same as mTHP ones from
> the point of view of these counters? Maybe worth documenting that?

IIUC, yes but that is true of all these

```
In /sys/kernel/mm/transparent_hugepage/hugepages-<size>kB/stats, There are
also individual counters for each huge page size, which can be utilized to
monitor the system's effectiveness in providing huge pages for usage. Each
counter has its own corresponding file.
```

>
> > +
> >  As the system ages, allocating huge pages may be expensive as the
> >  system uses memory compaction to copy data around memory to free a
> >  huge page for use. There are some counters in ``/proc/vmstat`` to help
> > diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h
> > index 9941fc6d7bd8..e8777bb2347d 100644
> > --- a/include/linux/huge_mm.h
> > +++ b/include/linux/huge_mm.h
> > @@ -144,6 +144,9 @@ enum mthp_stat_item {
> >       MTHP_STAT_SPLIT_DEFERRED,
> >       MTHP_STAT_NR_ANON,
> >       MTHP_STAT_NR_ANON_PARTIALLY_MAPPED,
> > +     MTHP_STAT_COLLAPSE_EXCEED_SWAP,
> > +     MTHP_STAT_COLLAPSE_EXCEED_NONE,
> > +     MTHP_STAT_COLLAPSE_EXCEED_SHARED,
> >       __MTHP_STAT_COUNT
> >  };
> >
> > diff --git a/mm/huge_memory.c b/mm/huge_memory.c
> > index 228f35e962b9..1049a207a257 100644
> > --- a/mm/huge_memory.c
> > +++ b/mm/huge_memory.c
> > @@ -642,6 +642,10 @@ DEFINE_MTHP_STAT_ATTR(split_failed, MTHP_STAT_SPLIT_FAILED);
> >  DEFINE_MTHP_STAT_ATTR(split_deferred, MTHP_STAT_SPLIT_DEFERRED);
> >  DEFINE_MTHP_STAT_ATTR(nr_anon, MTHP_STAT_NR_ANON);
> >  DEFINE_MTHP_STAT_ATTR(nr_anon_partially_mapped, MTHP_STAT_NR_ANON_PARTIALLY_MAPPED);
> > +DEFINE_MTHP_STAT_ATTR(collapse_exceed_swap_pte, MTHP_STAT_COLLAPSE_EXCEED_SWAP);
> > +DEFINE_MTHP_STAT_ATTR(collapse_exceed_none_pte, MTHP_STAT_COLLAPSE_EXCEED_NONE);
> > +DEFINE_MTHP_STAT_ATTR(collapse_exceed_shared_pte, MTHP_STAT_COLLAPSE_EXCEED_SHARED);
>
> Is there a reason there's such a difference between the names and the actual
> enum names?

Good point I didnt think about that. I can update those as long as
they don't conflict with something else (I forget why i named them
like this).

>
> > +
> >
> >  static struct attribute *anon_stats_attrs[] = {
> >       &anon_fault_alloc_attr.attr,
> > @@ -658,6 +662,9 @@ static struct attribute *anon_stats_attrs[] = {
> >       &split_deferred_attr.attr,
> >       &nr_anon_attr.attr,
> >       &nr_anon_partially_mapped_attr.attr,
> > +     &collapse_exceed_swap_pte_attr.attr,
> > +     &collapse_exceed_none_pte_attr.attr,
> > +     &collapse_exceed_shared_pte_attr.attr,
> >       NULL,
> >  };
> >
> > diff --git a/mm/khugepaged.c b/mm/khugepaged.c
> > index c739f26dd61e..a6cf90e09e4a 100644
> > --- a/mm/khugepaged.c
> > +++ b/mm/khugepaged.c
> > @@ -595,7 +595,9 @@ static enum scan_result __collapse_huge_page_isolate(struct vm_area_struct *vma,
> >                               continue;
> >                       } else {
> >                               result = SCAN_EXCEED_NONE_PTE;
> > -                             count_vm_event(THP_SCAN_EXCEED_NONE_PTE);
> > +                             if (is_pmd_order(order))
> > +                                     count_vm_event(THP_SCAN_EXCEED_NONE_PTE);
> > +                             count_mthp_stat(order, MTHP_STAT_COLLAPSE_EXCEED_NONE);
>
> It's a bit gross to have separate stats for both thp and mthp but maybe
> unavoidable from a legacy stand point.

I agree but that's how it currently is. Perhaps we can add this to the
TODO list for THP work.

>
> Why are we dropping the _PTE suffix?

I follow the convention that the other mTHP stats follow for example
(MTHP_STAT_SPLIT_DEFERRED)

>
> >                               goto out;
> >                       }
> >               }
> > @@ -631,10 +633,17 @@ static enum scan_result __collapse_huge_page_isolate(struct vm_area_struct *vma,
> >                        * shared may cause a future higher order collapse on a
> >                        * rescan of the same range.
> >                        */
> > -                     if (!is_pmd_order(order) || (cc->is_khugepaged &&
> > -                         shared > khugepaged_max_ptes_shared)) {
>
> OK losing track here :) as the series sadly doesn't currently apply so can't
> browser file as is.
>
> In the code I'm looking at, there's also a ++shared here that I guess another
> patch removed?
>
> Is this in the folio_maybe_mapped_shared() branch?

yes the counting is now done at the top of that branch.

>
> > +                     if (!is_pmd_order(order)) {
> > +                             result = SCAN_EXCEED_SHARED_PTE;
> > +                             count_mthp_stat(order, MTHP_STAT_COLLAPSE_EXCEED_SHARED);
> > +                             goto out;
> > +                     }
> > +
> > +                     if (cc->is_khugepaged &&
> > +                         shared > khugepaged_max_ptes_shared) {
> >                               result = SCAN_EXCEED_SHARED_PTE;
> >                               count_vm_event(THP_SCAN_EXCEED_SHARED_PTE);
> > +                             count_mthp_stat(order, MTHP_STAT_COLLAPSE_EXCEED_SHARED);
> >                               goto out;
>
> Anyway I'm a bit lost on this logic until a respin but this looks like a LOT of
> code duplication. I see David alluded to a refactoring so maybe what he suggests
> will help (not had a chance to check what it is specifically :P)

Yep :) should look cleaner in the next one. Although it's quite a bit
of refactoring. I'll be praying that i got it right on the first go,
and I put all the other pieces in the desired spot.

>
> >                       }
> >               }
> > @@ -1081,6 +1090,7 @@ static enum scan_result __collapse_huge_page_swapin(struct mm_struct *mm,
> >                * range.
> >                */
> >               if (!is_pmd_order(order)) {
> > +                     count_mthp_stat(order, MTHP_STAT_COLLAPSE_EXCEED_SWAP);
>
> Hmm I thought we were incrementing mthp stats for pmd sized also?

Yes we are supposed to. I've already refactored and it looks fine
there... perhaps i missed this one in this version!

Cheers,

-- Nico

>
> >                       pte_unmap(pte);
> >                       mmap_read_unlock(mm);
> >                       result = SCAN_EXCEED_SWAP_PTE;
> > --
> > 2.53.0
> >
>
> Cheers, Lorenzo
>


^ permalink raw reply

* Re: [PATCH v2 1/3] mm/memory-failure: report MF_MSG_KERNEL for reserved pages
From: Miaohe Lin @ 2026-04-13  3:34 UTC (permalink / raw)
  To: Breno Leitao
  Cc: linux-mm, linux-kernel, linux-doc, kernel-team, Naoya Horiguchi,
	Andrew Morton, Jonathan Corbet, Shuah Khan
In-Reply-To: <adkCtR0LzJ7aWfm-@gmail.com>

On 2026/4/10 22:03, Breno Leitao wrote:
> On Tue, Apr 07, 2026 at 10:56:39AM +0800, Miaohe Lin wrote:
>> On 2026/3/31 19:00, Breno Leitao wrote:
>>> When get_hwpoison_page() returns a negative value, distinguish
>>> reserved pages from other failure cases by reporting MF_MSG_KERNEL
>>> instead of MF_MSG_GET_HWPOISON. Reserved pages belong to the kernel
>>> and should be classified accordingly for proper handling by the
>>> panic_on_unrecoverable_memory_failure mechanism.
>>>
>>> Signed-off-by: Breno Leitao <leitao@debian.org>
>>> ---
>>>  mm/memory-failure.c | 6 +++++-
>>>  1 file changed, 5 insertions(+), 1 deletion(-)
>>>
>>> diff --git a/mm/memory-failure.c b/mm/memory-failure.c
>>> index ee42d4361309..6ff80e01b91a 100644
>>> --- a/mm/memory-failure.c
>>> +++ b/mm/memory-failure.c
>>> @@ -2432,7 +2432,11 @@ int memory_failure(unsigned long pfn, int flags)
>>>  		}
>>>  		goto unlock_mutex;
>>>  	} else if (res < 0) {
>>> -		res = action_result(pfn, MF_MSG_GET_HWPOISON, MF_IGNORED);
>>> +		if (PageReserved(p))
>>> +			res = action_result(pfn, MF_MSG_KERNEL, MF_IGNORED);
>>
>> Is it safe or common to check page flags without holding extra refcnt?
> 
> 
> Yes, this is safe. At this point the page has HWPoison set, preventing
> reallocation.
> 
> PageReserved is an atomic flag test on struct page memory that's always
> valid for online PFNs.
> 
> Reserved pages are inherently stable (kernel text, firmware, etc.) and
> don't change status dynamically.
> 
> This follows the same pattern as the existing is_free_buddy_page(p)
> check a few lines above, which also reads page state without an extra
> refcount.

Got it. Thanks for your explanation.

Thanks.
.

^ permalink raw reply

* Re: [PATCH v2 2/3] mm/memory-failure: add panic_on_unrecoverable_memory_failure sysctl
From: Miaohe Lin @ 2026-04-13  3:42 UTC (permalink / raw)
  To: Breno Leitao
  Cc: linux-mm, linux-kernel, linux-doc, kernel-team, Naoya Horiguchi,
	Andrew Morton, Jonathan Corbet, Shuah Khan
In-Reply-To: <adkFQF7QH8Jc3p3U@gmail.com>

On 2026/4/10 22:17, Breno Leitao wrote:
> On Tue, Apr 07, 2026 at 10:57:36AM +0800, Miaohe Lin wrote:
>> On 2026/3/31 19:00, Breno Leitao wrote:
>>> +	if (sysctl_panic_on_unrecoverable_mf && result == MF_IGNORED &&
>>> +	    (type == MF_MSG_KERNEL || type == MF_MSG_KERNEL_HIGH_ORDER ||
>>> +	     type == MF_MSG_UNKNOWN))
>>> +		panic("Memory failure: %#lx: unrecoverable page", pfn);
>>
>> Will it be better to add a helper here?
> 
> Yes, a helper would make things easier to read and digest. Thanks for
> the feedback. This is what I have in mind:
> 
> commit 36d5b3cbbe6d6abfe3296b7b21135a5f01e743eb
> Author: Breno Leitao <leitao@debian.org>
> Date:   Mon Mar 23 08:00:29 2026 -0700
> 
>     mm/memory-failure: add panic_on_unrecoverable_memory_failure sysctl
>     
>     Add a sysctl that allows the system to panic when an unrecoverable
>     memory failure is detected. This covers kernel pages, high-order
>     kernel pages, and unknown page types that cannot be recovered.
>     
>     Signed-off-by: Breno Leitao <leitao@debian.org>
> 
> diff --git a/mm/memory-failure.c b/mm/memory-failure.c
> index 6ff80e01b91a4..a29b6688fe2d3 100644
> --- a/mm/memory-failure.c
> +++ b/mm/memory-failure.c
> @@ -74,6 +74,8 @@ static int sysctl_memory_failure_recovery __read_mostly = 1;
>  
>  static int sysctl_enable_soft_offline __read_mostly = 1;
>  
> +static int sysctl_panic_on_unrecoverable_mf __read_mostly;
> +
>  atomic_long_t num_poisoned_pages __read_mostly = ATOMIC_LONG_INIT(0);
>  
>  static bool hw_memory_failure __read_mostly = false;
> @@ -155,6 +157,15 @@ static const struct ctl_table memory_failure_table[] = {
>  		.proc_handler	= proc_dointvec_minmax,
>  		.extra1		= SYSCTL_ZERO,
>  		.extra2		= SYSCTL_ONE,
> +	},
> +	{
> +		.procname	= "panic_on_unrecoverable_memory_failure",
> +		.data		= &sysctl_panic_on_unrecoverable_mf,
> +		.maxlen		= sizeof(sysctl_panic_on_unrecoverable_mf),
> +		.mode		= 0644,
> +		.proc_handler	= proc_dointvec_minmax,
> +		.extra1		= SYSCTL_ZERO,
> +		.extra2		= SYSCTL_ONE,
>  	}
>  };
>  
> @@ -1281,6 +1292,16 @@ static void update_per_node_mf_stats(unsigned long pfn,
>  	++mf_stats->total;
>  }
>  
> +static bool is_unrecoverable_memory_failure(enum mf_action_page_type type,
> +					    enum mf_result result)

Thanks for your update.

> +{
> +	return sysctl_panic_on_unrecoverable_mf &&
> +	       result == MF_IGNORED &&
> +	       (type == MF_MSG_KERNEL ||
> +		type == MF_MSG_KERNEL_HIGH_ORDER ||
> +		type == MF_MSG_UNKNOWN);
> +}
> +
>  /*
>   * "Dirty/Clean" indication is not 100% accurate due to the possibility of
>   * setting PG_dirty outside page lock. See also comment above set_page_dirty().
> @@ -1298,6 +1319,9 @@ static int action_result(unsigned long pfn, enum mf_action_page_type type,
>  	pr_err("%#lx: recovery action for %s: %s\n",
>  		pfn, action_page_types[type], action_name[result]);
>  
> +	if (is_unrecoverable_memory_failure(type, result))

Would it be better to name it as panic_on_unrecoverable_mf() or something like it?
This function determines whether panic on the specified memory error.

Thanks.
.


^ permalink raw reply

* [PATCH v5] Documentation: Refactored watchdog old doc
From: Sunny Patel @ 2026-04-13  4:11 UTC (permalink / raw)
  To: linux-doc
  Cc: linux-watchdog, linux-kernel, corbet, wim, linux, rdunlap,
	Sunny Patel

Mark WDIOC_GETTEMP and WDIOS_TEMPPANIC as deprecated since
neither is implemented by the watchdog core and both are only
present in a small number of legacy drivers.

Add documentation for previously undocumented status bits
WDIOF_MAGICCLOSE and WDIOF_ALARMONLY in the options field.

Add documentation for WDIOF_PRETIMEOUT and WDIOF_SETTIMEOUT
status bits describing their respective ioctls.

Fix the following issues in existing documentation:
  - Remove version-specific reference to Linux 2.4.18 from
    the GETTIMEOUT ioctl description
  - Fix duplicate "was is" in printf format strings
  - Replace [FIXME] placeholder with proper descriptions for
    WDIOS_DISABLECARD, WDIOS_ENABLECARD and WDIOS_TEMPPANIC

Signed-off-by: Sunny Patel <nueralspacetech@gmail.com>
---

Changes in v5:
  - Fixed WDIOC_GETTIMELEFT printf statement to correctly reference 
    "timeleft" instead of "timeout".
  
Changes in v4:
  - Fixed WDIOS_DISABLECARD description: corrected inverted logic —
    the ioctl disables the hardware timer entirely rather than
    stopping pings. Clarified that userspace, not the kernel driver,
    is primarily responsible for pinging under normal operation.

 Documentation/watchdog/watchdog-api.rst | 65 +++++++++++++++++++++----
 1 file changed, 55 insertions(+), 10 deletions(-)

diff --git a/Documentation/watchdog/watchdog-api.rst b/Documentation/watchdog/watchdog-api.rst
index 78e228c272cf..736436a68f65 100644
--- a/Documentation/watchdog/watchdog-api.rst
+++ b/Documentation/watchdog/watchdog-api.rst
@@ -2,7 +2,7 @@
 The Linux Watchdog driver API
 =============================
 
-Last reviewed: 10/05/2007
+Last reviewed: 04/08/2026
 
 
 
@@ -42,7 +42,7 @@ activates as soon as /dev/watchdog is opened and will reboot unless
 the watchdog is pinged within a certain time, this time is called the
 timeout or margin.  The simplest way to ping the watchdog is to write
 some data to the device.  So a very simple watchdog daemon would look
-like this source file:  see samples/watchdog/watchdog-simple.c
+like this source file: see samples/watchdog/watchdog-simple.c
 
 A more advanced driver could for example check that a HTTP server is
 still responding before doing the write call to ping the watchdog.
@@ -106,11 +106,10 @@ the requested one due to limitation of the hardware::
 This example might actually print "The timeout was set to 60 seconds"
 if the device has a granularity of minutes for its timeout.
 
-Starting with the Linux 2.4.18 kernel, it is possible to query the
-current timeout using the GETTIMEOUT ioctl::
+It is also possible to get the current timeout with the GETTIMEOUT ioctl::
 
     ioctl(fd, WDIOC_GETTIMEOUT, &timeout);
-    printf("The timeout was is %d seconds\n", timeout);
+    printf("The timeout is %d seconds\n", timeout);
 
 Pretimeouts
 ===========
@@ -133,7 +132,7 @@ seconds.  Setting a pretimeout to zero disables it.
 There is also a get function for getting the pretimeout::
 
     ioctl(fd, WDIOC_GETPRETIMEOUT, &timeout);
-    printf("The pretimeout was is %d seconds\n", timeout);
+    printf("The pretimeout is %d seconds\n", timeout);
 
 Not all watchdog drivers will support a pretimeout.
 
@@ -145,12 +144,12 @@ before the system will reboot. The WDIOC_GETTIMELEFT is the ioctl
 that returns the number of seconds before reboot::
 
     ioctl(fd, WDIOC_GETTIMELEFT, &timeleft);
-    printf("The timeout was is %d seconds\n", timeleft);
+    printf("The timeleft is %d seconds\n", timeleft);
 
 Environmental monitoring
 ========================
 
-All watchdog drivers are required return more information about the system,
+All watchdog drivers are required to return more information about the system,
 some do temperature, fan and power level monitoring, some can tell you
 the reason for the last reboot of the system.  The GETSUPPORT ioctl is
 available to ask what the device can do::
@@ -227,12 +226,33 @@ The watchdog saw a keepalive ping since it was last queried.
 	WDIOF_SETTIMEOUT	Can set/get the timeout
 	================	=======================
 
-The watchdog can do pretimeouts.
+The watchdog supports timeout set/get via the WDIOC_SETTIMEOUT and
+WDIOC_GETTIMEOUT ioctls.
 
 	================	================================
 	WDIOF_PRETIMEOUT	Pretimeout (in seconds), get/set
 	================	================================
 
+The watchdog supports a pretimeout, a warning interrupt that fires before
+the actual reboot timeout. Use WDIOC_SETPRETIMEOUT and WDIOC_GETPRETIMEOUT
+to set/get the pretimeout.
+
+	================	================================
+	WDIOF_MAGICCLOSE	Supports magic close char
+	================	================================
+
+The driver supports the Magic Close feature. The watchdog is only disabled
+if the character 'V' is written to /dev/watchdog before the file descriptor
+is closed. Without writing 'V' before closing, the watchdog remains active
+and will trigger a reboot after the timeout expires.
+
+	================	================================
+	WDIOF_ALARMONLY		Not a reboot watchdog
+	================	================================
+
+The watchdog will not reboot the system when it expires. Instead it
+triggers a management or other external alarm. Userspace should not
+rely on a system reboot occurring.
 
 For those drivers that return any bits set in the option field, the
 GETSTATUS and GETBOOTSTATUS ioctls can be used to ask for the current
@@ -254,6 +274,11 @@ returned value is the temperature in degrees Fahrenheit::
     int temperature;
     ioctl(fd, WDIOC_GETTEMP, &temperature);
 
+.. note::
+	``WDIOC_GETTEMP`` is not implemented by the watchdog core and is
+	considered deprecated. It is only supported by a small number of
+	legacy drivers. New drivers should not implement it.
+
 Finally the SETOPTIONS ioctl can be used to control some aspects of
 the cards operation::
 
@@ -268,4 +293,24 @@ The following options are available:
 	WDIOS_TEMPPANIC		Kernel panic on temperature trip
 	=================	================================
 
-[FIXME -- better explanations]
+``WDIOS_DISABLECARD`` disables the hardware watchdog timer entirely,
+allowing a controlled system shutdown without triggering a reboot.
+Userspace is responsible for pinging the watchdog under normal
+operation; this ioctl stops the underlying hardware timer so that
+the absence of pings no longer causes a system reset.
+
+``WDIOS_ENABLECARD`` starts the watchdog timer. If the watchdog was
+previously stopped via ``WDIOS_DISABLECARD``, this will re-enable it. The
+hardware watchdog will begin counting down from the configured timeout.
+
+``WDIOS_TEMPPANIC`` enables temperature-based kernel panic. When set,
+the driver will call ``panic()`` (or ``kernel_power_off()`` on some
+drivers) if the hardware temperature sensor exceeds its threshold,
+rather than only setting the ``WDIOF_OVERHEAT`` status bit. Support
+for this option is driver-specific; not all watchdog drivers implement
+temperature monitoring.
+
+.. note::
+	``WDIOS_TEMPPANIC`` is not implemented by the watchdog core and is
+	considered deprecated. It is only present in a small number of
+	legacy drivers. New drivers should not implement it.
-- 
2.43.0


^ permalink raw reply related

* Re: [PATCH v4 19/21] uio: replace deprecated mmap hook with mmap_prepare in uio_info
From: Shinichiro Kawasaki @ 2026-04-13  5:14 UTC (permalink / raw)
  To: Lorenzo Stoakes (Oracle)
  Cc: Andrew Morton, Jonathan Corbet, Clemens Ladisch, Arnd Bergmann,
	Greg Kroah-Hartman, K . Y . Srinivasan, Haiyang Zhang, Wei Liu,
	Dexuan Cui, Long Li, Alexander Shishkin, Maxime Coquelin,
	Alexandre Torgue, Miquel Raynal, Richard Weinberger,
	Vignesh Raghavendra, Bodo Stroesser, Martin K . Petersen,
	David Howells, Marc Dionne, Alexander Viro, Christian Brauner,
	Jan Kara, David Hildenbrand, Liam R . Howlett, Vlastimil Babka,
	Mike Rapoport, Suren Baghdasaryan, Michal Hocko, Jann Horn,
	Pedro Falcato, linux-kernel@vger.kernel.org,
	linux-doc@vger.kernel.org, linux-hyperv@vger.kernel.org,
	linux-stm32@st-md-mailman.stormreply.com,
	linux-arm-kernel@lists.infradead.org,
	linux-mtd@lists.infradead.org, linux-staging@lists.linux.dev,
	linux-scsi@vger.kernel.org, target-devel@vger.kernel.org,
	linux-afs@lists.infradead.org, linux-fsdevel@vger.kernel.org,
	linux-mm@kvack.org, Ryan Roberts
In-Reply-To: <157583e4477705b496896c7acd4ac88a937b8fa6.1774045440.git.ljs@kernel.org>

On Mar 20, 2026 / 22:39, Lorenzo Stoakes (Oracle) wrote:
> The f_op->mmap interface is deprecated, so update uio_info to use its
> successor, mmap_prepare.
> 
> Therefore, replace the uio_info->mmap hook with a new
> uio_info->mmap_prepare hook, and update its one user, target_core_user,
> to both specify this new mmap_prepare hook and also to use the new
> vm_ops->mapped() hook to continue to maintain a correct udev->kref
> refcount.
> 
> Then update uio_mmap() to utilise the mmap_prepare compatibility layer to
> invoke this callback from the uio mmap invocation.
> 
> Signed-off-by: Lorenzo Stoakes (Oracle) <ljs@kernel.org>

Hello Lorenzo, since two weeks ago, I observe a failure during my kernel test
set targeting Linux for-next branch. On failure, kernel reported a WARN at
__vma_check_mmap_hook [1]. I bisected and found that this patch is the trigger.
Here I share my observations of the failure. Actions or advices for fix will be
appreciated.


The failure happens when TCMU device is set up with targetcli. When tcmu-runner
is running, the command lines below should successfully create a backstore for a
TCMU device, but it fails.

  $ sudo targetcli
  targetcli shell version 3.0.1
  Copyright 2011-2013 by Datera, Inc and others.
  For help on commands, type 'help'.
  
  /> cd /backstores/user:zbc
  /backstores/user:zbc> create name=test size=1M cfgstring=@/tmp/tmp.img
  UserBackedStorageObject creation failed.

On failure, tcmu-runner reports mmap failures:

  2026-04-13 12:23:49.271 1103 [CRIT] main:1302: Starting...
  2026-04-13 12:23:49.461 1103 [INFO] load_our_module:575: Inserted module 'target_core_user'
  2026-04-13 12:23:49.480 1103 [INFO] tcmur_register_handler:92: Handler fbo is registered
  2026-04-13 12:23:49.486 1103 [INFO] tcmur_register_handler:92: Handler zbc is registered
  2026-04-13 12:23:51.202 1103 [INFO] tcmur_register_handler:92: Handler rbd is registered
  2026-04-13 12:27:24.522 1103 [ERROR] device_open_shm:523: could not mmap /dev/uio0
  2026-04-13 12:27:24.550 1103 [ERROR] device_open_shm:523: could not mmap /dev/uio0

The failure was found with user:zbc handler. I confirmed that the failure is
recreated with fbo handler also. Then, this failure looks common for all TCMU
users.

At the failrue, kernel reported the WARN at __vma_check_mmap_hook [1]. The line
1287 in mm/util.c reported the WARN:

  1284 int __vma_check_mmap_hook(struct vm_area_struct *vma)
  1285 {
  1286         /* vm_ops->mapped is not valid if mmap() is specified. */
  1287         if (vma->vm_ops && WARN_ON_ONCE(vma->vm_ops->mapped))
  1288                 return -EINVAL;
  1289
  1290         return 0;
  1291 }
  1292 EXPORT_SYMBOL(__vma_check_mmap_hook);

When I reverted the commit from the kernel tag next-20260409, the failrue
disappeared.

If other information is required for fix, please let me know. Thanks in advance.


[1] dmesg

WARNING: mm/util.c:1287 at __vma_check_mmap_hook+0x61/0x90, CPU#0: tcmu-runner/1332
Modules linked in: target_core_pscsi target_core_file target_core_iblock xfs target_core_user target_core_mod rfkill nft_fib_inet nft_fib_ipv4 nft_fib_ipv6 nft_fib nft_reject_inet nf_reject_ipv4 nf_reject_ipv6 nft_reject nft_ct nft_chain_nat ip6table_nat ip6table_mangle ip6table_raw ip6table_security iptable_nat nf_nat nf_conntrack nf_defrag_ipv6 nf_defrag_ipv4 iptable_mangle iptable_raw iptable_security nf_tables ip6table_filter ip6_tables iptable_filter ip_tables qrtr irdma intel_rapl_msr intel_rapl_common ice intel_uncore_frequency intel_uncore_frequency_common skx_edac skx_edac_common libie_fwlog nfit sunrpc gnss idpf libnvdimm x86_pkg_temp_thermal libeth_xdp intel_powerclamp libeth ib_core spi_nor mtd coretemp kvm_intel kvm i40e iTCO_wdt irqbypass intel_pmc_bxt rapl ses vfat intel_cstate libie fat intel_uncore libie_adminq enclosure i2c_i801 spi_intel_pci i2c_smbus spi_intel lpc_ich wmi joydev mei_me ioatdma acpi_power_meter acpi_pad mei intel_pch_thermal dca fuse loop dm_multipath nfnetlink zram
 lz4hc_compress lz4_compress zstd_compress ast drm_client_lib i2c_algo_bit drm_shmem_helper drm_kms_helper nvme drm mpi3mr nvme_core scsi_transport_sas nvme_keyring nvme_auth scsi_dh_rdac scsi_dh_emc scsi_dh_alua pkcs8_key_parser i2c_dev [last unloaded: null_blk]
CPU: 0 UID: 0 PID: 1332 Comm: tcmu-runner Not tainted 7.0.0-rc6-next-20260401-kts #1 PREEMPT(lazy) 
Hardware name: Supermicro Super Server/X11SPi-TF, BIOS 3.5 05/18/2021
RIP: 0010:__vma_check_mmap_hook+0x61/0x90
Code: 00 00 00 00 fc ff df 48 8d 78 10 48 89 f9 48 c1 e9 03 80 3c 11 00 75 2a 48 83 78 10 00 75 0b 31 c0 48 83 c4 08 c3 cc cc cc cc <0f> 0b b8 ea ff ff ff eb ee 48 89 04 24 e8 6d 4c 1f 00 48 8b 04 24
RSP: 0018:ffff8881391f7488 EFLAGS: 00010282
RAX: ffffffffc2abca40 RBX: 0000000000000000 RCX: 1ffffffff855794a
RDX: dffffc0000000000 RSI: 0000000000000000 RDI: ffffffffc2abca50
RBP: ffff8881391f76a0 R08: ffffffffa10016e9 R09: ffffed102723ee44
R10: ffffed102723ee45 R11: 0000000000000000 R12: ffff8881391f78e0
R13: ffff8881391f78f0 R14: ffff88810d1ec780 R15: ffff8881391f7a78
FS:  00007f154f1a9840(0000) GS:ffff888e9b440000(0000) knlGS:0000000000000000
CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
CR2: 00007f5efd40fe88 CR3: 000000010cfbf005 CR4: 00000000007726f0
PKRU: 55555554
Call Trace:
 <TASK>
 __mmap_new_vma+0x116e/0x18d0
 ? __pfx___mmap_new_vma+0x10/0x10
 ? vma_merge_new_range+0x495/0xa00
 ? __pfx_vma_merge_new_range+0x10/0x10
 ? lock_acquire+0x126/0x140
 __mmap_region+0x651/0xa00
 ? __pfx_process_measurement+0x10/0x10
 ? __pfx___mmap_region+0x10/0x10
 ? __lock_acquire+0x55d/0xbd0
 ? __lock_acquire+0x55d/0xbd0
 ? lock_is_held_type+0x9a/0x110
 ? mas_find+0xc9/0x690
 ? arch_get_unmapped_area_topdown+0x2a7/0x890
 mmap_region+0x3c2/0x4c0
 ? __pfx_mmap_region+0x10/0x10
 ? security_mmap_addr+0x54/0xd0
 ? __get_unmapped_area+0x18c/0x300
 ? __pfx_uio_mmap+0x10/0x10
 do_mmap+0xa26/0x10f0
 ? lock_acquire+0x126/0x140
 ? __pfx_do_mmap+0x10/0x10
 ? __pfx_down_write_killable+0x10/0x10
 ? __lock_acquire+0x55d/0xbd0
 vm_mmap_pgoff+0x218/0x3a0
 ? __pfx_vm_mmap_pgoff+0x10/0x10
 ? __fget_files+0x1b4/0x2f0
 ksys_mmap_pgoff+0x229/0x570
 ? clockevents_program_event+0x144/0x370
 do_syscall_64+0xf4/0x1560
 ? do_syscall_64+0x1d7/0x1560
 ? __lock_release.isra.0+0x59/0x170
 ? do_syscall_64+0x34/0x1560
 ? lockdep_hardirqs_on_prepare.part.0+0x9b/0x140
 ? do_syscall_64+0x34/0x1560
 ? trace_hardirqs_on+0x19/0x1a0
 ? do_syscall_64+0xab/0x1560
 ? clear_bhb_loop+0x30/0x80
 entry_SYSCALL_64_after_hwframe+0x76/0x7e
RIP: 0033:0x7f154f5728dc
Code: 1e fa 41 f7 c1 ff 0f 00 00 75 33 55 48 89 e5 41 54 41 89 cc 53 48 89 fb 48 85 ff 74 41 45 89 e2 48 89 df b8 09 00 00 00 0f 05 <48> 3d 00 f0 ff ff 77 7c 5b 41 5c 5d c3 0f 1f 80 00 00 00 00 48 8b
RSP: 002b:00007ffeb6ac04f0 EFLAGS: 00000246 ORIG_RAX: 0000000000000009
RAX: ffffffffffffffda RBX: 0000000000000000 RCX: 00007f154f5728dc
RDX: 0000000000000003 RSI: 0000000040800000 RDI: 0000000000000000
RBP: 00007ffeb6ac0500 R08: 000000000000000c R09: 0000000000000000
R10: 0000000000000001 R11: 0000000000000246 R12: 0000000000000001
R13: 000000001ddac980 R14: 00007f154fa629c0 R15: 00007f154fa62940
 </TASK>
irq event stamp: 62665
hardirqs last  enabled at (62679): [<ffffffff9e1cd23e>] __up_console_sem+0x5e/0x70
hardirqs last disabled at (62698): [<ffffffff9e1cd223>] __up_console_sem+0x43/0x70
softirqs last  enabled at (62692): [<ffffffff9dfc5d01>] handle_softirqs+0x5c1/0x8b0
softirqs last disabled at (62721): [<ffffffff9dfc6152>] __irq_exit_rcu+0x152/0x280
---[ end trace 0000000000000000 ]---
scsi host74: TCM_Loopback

^ permalink raw reply

* Re: [PATCH v4 19/21] uio: replace deprecated mmap hook with mmap_prepare in uio_info
From: Lorenzo Stoakes @ 2026-04-13  5:37 UTC (permalink / raw)
  To: Shinichiro Kawasaki
  Cc: Andrew Morton, Jonathan Corbet, Clemens Ladisch, Arnd Bergmann,
	Greg Kroah-Hartman, K . Y . Srinivasan, Haiyang Zhang, Wei Liu,
	Dexuan Cui, Long Li, Alexander Shishkin, Maxime Coquelin,
	Alexandre Torgue, Miquel Raynal, Richard Weinberger,
	Vignesh Raghavendra, Bodo Stroesser, Martin K . Petersen,
	David Howells, Marc Dionne, Alexander Viro, Christian Brauner,
	Jan Kara, David Hildenbrand, Liam R . Howlett, Vlastimil Babka,
	Mike Rapoport, Suren Baghdasaryan, Michal Hocko, Jann Horn,
	Pedro Falcato, linux-kernel@vger.kernel.org,
	linux-doc@vger.kernel.org, linux-hyperv@vger.kernel.org,
	linux-stm32@st-md-mailman.stormreply.com,
	linux-arm-kernel@lists.infradead.org,
	linux-mtd@lists.infradead.org, linux-staging@lists.linux.dev,
	linux-scsi@vger.kernel.org, target-devel@vger.kernel.org,
	linux-afs@lists.infradead.org, linux-fsdevel@vger.kernel.org,
	linux-mm@kvack.org, Ryan Roberts
In-Reply-To: <adx2ws5z0NMIe5Yj@shinmob>

On Mon, Apr 13, 2026 at 05:14:08AM +0000, Shinichiro Kawasaki wrote:
> On Mar 20, 2026 / 22:39, Lorenzo Stoakes (Oracle) wrote:
> > The f_op->mmap interface is deprecated, so update uio_info to use its
> > successor, mmap_prepare.
> >
> > Therefore, replace the uio_info->mmap hook with a new
> > uio_info->mmap_prepare hook, and update its one user, target_core_user,
> > to both specify this new mmap_prepare hook and also to use the new
> > vm_ops->mapped() hook to continue to maintain a correct udev->kref
> > refcount.
> >
> > Then update uio_mmap() to utilise the mmap_prepare compatibility layer to
> > invoke this callback from the uio mmap invocation.
> >
> > Signed-off-by: Lorenzo Stoakes (Oracle) <ljs@kernel.org>
>
> Hello Lorenzo, since two weeks ago, I observe a failure during my kernel test
> set targeting Linux for-next branch. On failure, kernel reported a WARN at
> __vma_check_mmap_hook [1]. I bisected and found that this patch is the trigger.
> Here I share my observations of the failure. Actions or advices for fix will be
> appreciated.

Ugh yeah thanks, this actually needs to account for use of compatibility layer,
so probably we shouldn't even assert this as that isn't easily detectable.

I'll send a hotfix for this that can be bundled up with 7.1 patches.

Cheers, Lorenzo

^ permalink raw reply

* Re: [PATCH] cpufreq: CPPC: add autonomous mode boot parameter support
From: Viresh Kumar @ 2026-04-13  5:51 UTC (permalink / raw)
  To: Pierre Gondois
  Cc: Sumit Gupta, linux-tegra, linux-kernel, linux-doc, zhenglifeng1,
	treding, jonathanh, vsethi, ionela.voinescu, ksitaraman, sanjayc,
	zhanjie9, corbet, mochs, skhan, bbasu, rdunlap, linux-pm,
	mario.limonciello, rafael
In-Reply-To: <208360b1-36a5-419d-80f4-431914407f61@arm.com>

On 10-04-26, 15:47, Pierre Gondois wrote:
> I need to ping Viresh to check if this is still relevant.

I think its okay to clear the min/max state in the kernel once and for all if
you think it is not done nicely. As discussed earlier, try that in a fresh
series which only does that part.

-- 
viresh

^ permalink raw reply

* Re: [PATCH mm-unstable v15 06/13] mm/khugepaged: skip collapsing mTHP to smaller orders
From: David Hildenbrand (Arm) @ 2026-04-13  7:37 UTC (permalink / raw)
  To: Nico Pache
  Cc: linux-doc, linux-kernel, linux-mm, linux-trace-kernel, aarcange,
	akpm, anshuman.khandual, apopple, baohua, baolin.wang, byungchul,
	catalin.marinas, cl, corbet, dave.hansen, dev.jain, gourry,
	hannes, hughd, jack, jackmanb, jannh, jglisse, joshua.hahnjy, kas,
	lance.yang, Liam.Howlett, lorenzo.stoakes, mathieu.desnoyers,
	matthew.brost, mhiramat, mhocko, peterx, pfalcato, rakie.kim,
	raquini, rdunlap, richard.weiyang, rientjes, rostedt, rppt,
	ryan.roberts, shivankg, sunnanyong, surenb, thomas.hellstrom,
	tiwai, usamaarif642, vbabka, vishal.moola, wangkefeng.wang, will,
	willy, yang, ying.huang, ziy, zokeefe
In-Reply-To: <CAA1CXcDnz_7+16sDVbGJ2ZZPWxs7ta_Z0YU6x1dUe7yiSJ3OKg@mail.gmail.com>

On 4/13/26 03:38, Nico Pache wrote:
> On Thu, Mar 12, 2026 at 3:00 PM David Hildenbrand (Arm)
> <david@kernel.org> wrote:
>>
>> On 2/26/26 04:24, Nico Pache wrote:
>>> khugepaged may try to collapse a mTHP to a smaller mTHP, resulting in
>>> some pages being unmapped. Skip these cases until we have a way to check
>>> if its ok to collapse to a smaller mTHP size (like in the case of a
>>> partially mapped folio).
>>>
>>> This patch is inspired by Dev Jain's work on khugepaged mTHP support [1].
>>>
>>> [1] https://lore.kernel.org/lkml/20241216165105.56185-11-dev.jain@arm.com/
>>>
>>> Reviewed-by: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
>>> Reviewed-by: Baolin Wang <baolin.wang@linux.alibaba.com>
>>> Co-developed-by: Dev Jain <dev.jain@arm.com>
>>> Signed-off-by: Dev Jain <dev.jain@arm.com>
>>> Signed-off-by: Nico Pache <npache@redhat.com>
>>> ---
>>>  mm/khugepaged.c | 8 ++++++++
>>>  1 file changed, 8 insertions(+)
>>>
>>> diff --git a/mm/khugepaged.c b/mm/khugepaged.c
>>> index fb3ba8fe5a6c..c739f26dd61e 100644
>>> --- a/mm/khugepaged.c
>>> +++ b/mm/khugepaged.c
>>> @@ -638,6 +638,14 @@ static enum scan_result __collapse_huge_page_isolate(struct vm_area_struct *vma,
>>>                               goto out;
>>>                       }
>>>               }
>>> +             /*
>>> +              * TODO: In some cases of partially-mapped folios, we'd actually
>>> +              * want to collapse.
>>> +              */
>>> +             if (!is_pmd_order(order) && folio_order(folio) >= order) {
>>> +                     result = SCAN_PTE_MAPPED_HUGEPAGE;
>>> +                     goto out;
>>> +             }
>>>
>>>               if (folio_test_large(folio)) {
>>>                       struct folio *f;
>>
>> Why aren't we doing the same in hpage_collapse_scan_pmd() ?
> 
> We can't do this in the scan phase because we are not yet aware of the
> order we want to collapse to.

Yes, realized that myself later. It's confusing, try documenting that in
the patch description.

-- 
Cheers,

David

^ permalink raw reply

* [PATCH v2 00/12] Dynamic Housekeeping Management (DHM) via CPUSets
From: Qiliang Yuan @ 2026-04-13  7:43 UTC (permalink / raw)
  To: Ingo Molnar, Peter Zijlstra, Juri Lelli, Vincent Guittot,
	Dietmar Eggemann, Steven Rostedt, Ben Segall, Mel Gorman,
	Valentin Schneider, Paul E. McKenney, Frederic Weisbecker,
	Neeraj Upadhyay, Joel Fernandes, Josh Triplett, Boqun Feng,
	Uladzislau Rezki, Mathieu Desnoyers, Lai Jiangshan, Zqiang,
	Anna-Maria Behnsen, Ingo Molnar, Thomas Gleixner, Tejun Heo,
	Andrew Morton, Vlastimil Babka, Suren Baghdasaryan, Michal Hocko,
	Brendan Jackman, Johannes Weiner, Zi Yan, Waiman Long,
	Chen Ridong, Michal Koutný, Jonathan Corbet, Shuah Khan,
	Shuah Khan
  Cc: linux-kernel, rcu, linux-mm, cgroups, linux-doc, linux-kselftest,
	Qiliang Yuan

This series introduces Dynamic Housekeeping Management (DHM) to the
Linux kernel.

Previously known as the DHEI (Dynamic Housekeeping Environment Interface)
patchset (RFC and v1), this series has been fundamentally refactored in
response to upstream feedback. The custom sysfs interface has been entirely
dropped. Instead, DHM is now natively integrated into the cgroup v2
cpuset controller.

By exposing `cpuset.housekeeping.cpus` on the root cgroup, system
administrators and workload orchestrators (like Kubernetes) can
dynamically update the kernel's global housekeeping masks at runtime,
without requiring a node reboot.

This version provides dynamic reconfiguration support for the following
subsystems:
- RCU (NOCB offloading)
- Tick/NOHZ (Full dynticks)
- Global Workqueues and Timers
- Managed Interrupts (genirq)
- Hardlockup Detectors (Watchdog)
- Scheduler Domains (Isolation)
- Memory Management (vmstat/lru_add_drain)
- Kthreads and Softirqs (Affinity)

Many thanks to the maintainers for the valuable guidance that led to this
significantly improved and upstream-aligned architecture.

To: Ingo Molnar <mingo@redhat.com>
To: Peter Zijlstra <peterz@infradead.org>
To: Juri Lelli <juri.lelli@redhat.com>
To: Vincent Guittot <vincent.guittot@linaro.org>
To: Dietmar Eggemann <dietmar.eggemann@arm.com>
To: Steven Rostedt <rostedt@goodmis.org>
To: Ben Segall <bsegall@google.com>
To: Mel Gorman <mgorman@suse.de>
To: Valentin Schneider <vschneid@redhat.com>
To: Paul E. McKenney <paulmck@kernel.org>
To: Frederic Weisbecker <frederic@kernel.org>
To: Neeraj Upadhyay <neeraj.upadhyay@kernel.org>
To: Joel Fernandes <joelagnelf@nvidia.com>
To: Josh Triplett <josh@joshtriplett.org>
To: Boqun Feng <boqun@kernel.org>
To: Uladzislau Rezki <urezki@gmail.com>
To: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
To: Lai Jiangshan <jiangshanlai@gmail.com>
To: Zqiang <qiang.zhang@linux.dev>
To: Anna-Maria Behnsen <anna-maria@linutronix.de>
To: Ingo Molnar <mingo@kernel.org>
To: Thomas Gleixner <tglx@kernel.org>
To: Tejun Heo <tj@kernel.org>
To: Andrew Morton <akpm@linux-foundation.org>
To: Vlastimil Babka <vbabka@kernel.org>
To: Suren Baghdasaryan <surenb@google.com>
To: Michal Hocko <mhocko@suse.com>
To: Brendan Jackman <jackmanb@google.com>
To: Johannes Weiner <hannes@cmpxchg.org>
To: Zi Yan <ziy@nvidia.com>
To: Waiman Long <longman@redhat.com>
To: Chen Ridong <chenridong@huaweicloud.com>
To: Michal Koutný <mkoutny@suse.com>
To: Jonathan Corbet <corbet@lwn.net>
To: Shuah Khan <skhan@linuxfoundation.org>
To: Shuah Khan <shuah@kernel.org>
Cc: linux-kernel@vger.kernel.org
Cc: rcu@vger.kernel.org
Cc: linux-mm@kvack.org
Cc: cgroups@vger.kernel.org
Cc: linux-doc@vger.kernel.org
Cc: linux-kselftest@vger.kernel.org

Changes in v2:
- Rebranded series from DHEI to DHM (Dynamic Housekeeping Management).
- Entirely dropped custom sysfs interface.
- Integrated housekeeping control into cgroup v2 cpuset controller
  at the root level.
- Added SMT-aware pipeline logic (cpuset.housekeeping.smt_aware) to
  prevent splitting SMT siblings.
- Added comprehensive documentation and cgroup functional selftests for
  the DHM APIs.
- Refactored the internal mask transition logic to use RCU-safe
  handover.
- Separated patch series into 4 logical phases for review.

v1 Link: https://lore.kernel.org/all/20260325-dhei-v12-final-v1-0-919cca23cadf@gmail.com

Signed-off-by: Qiliang Yuan <realwujing@gmail.com>
---
Qiliang Yuan (12):
      sched/isolation: Separate housekeeping types in enum hk_type
      sched/isolation: Introduce housekeeping notifier infrastructure
      rcu: Support runtime NOCB initialization and dynamic offloading
      tick/nohz: Transition to dynamic full dynticks state management
      genirq: Support dynamic migration for managed interrupts
      watchdog: Allow runtime toggle of lockup detector affinity
      sched/core: Dynamically update scheduler domain housekeeping mask
      workqueue, mm: Support dynamic housekeeping mask updates
      cgroup/cpuset: Introduce CPUSet-driven dynamic housekeeping (DHM)
      cgroup/cpuset: Implement SMT-aware grouping and safety guards
      Documentation: cgroup-v2: Document dynamic housekeeping (DHM)
      selftests: cgroup: Add functional tests for dynamic housekeeping

 Documentation/admin-guide/cgroup-v2.rst      |  24 +++++
 include/linux/sched/isolation.h              |  51 ++++++++---
 kernel/cgroup/cpuset-internal.h              |   2 +
 kernel/cgroup/cpuset.c                       |  73 +++++++++++++++
 kernel/irq/manage.c                          |  49 ++++++++++
 kernel/rcu/rcu.h                             |   4 +
 kernel/rcu/tree.c                            |  75 ++++++++++++++++
 kernel/rcu/tree.h                            |   2 +-
 kernel/rcu/tree_nocb.h                       |  31 ++++---
 kernel/sched/core.c                          |  23 +++++
 kernel/sched/isolation.c                     |  74 ++++++++++++++-
 kernel/time/tick-sched.c                     | 130 +++++++++++++++++++++------
 kernel/watchdog.c                            |  26 ++++++
 kernel/workqueue.c                           |  42 +++++++++
 mm/compaction.c                              |  27 ++++++
 tools/testing/selftests/cgroup/test_cpuset.c |  36 ++++++++
 16 files changed, 620 insertions(+), 49 deletions(-)
---
base-commit: bfe62a454542cfad3379f6ef5680b125f41e20f4
change-id: 20260408-wujing-dhm-8f43e2d49cd8

Best regards,
-- 
Qiliang Yuan <realwujing@gmail.com>


^ permalink raw reply

* [PATCH v2 01/12] sched/isolation: Separate housekeeping types in enum hk_type
From: Qiliang Yuan @ 2026-04-13  7:43 UTC (permalink / raw)
  To: Ingo Molnar, Peter Zijlstra, Juri Lelli, Vincent Guittot,
	Dietmar Eggemann, Steven Rostedt, Ben Segall, Mel Gorman,
	Valentin Schneider, Paul E. McKenney, Frederic Weisbecker,
	Neeraj Upadhyay, Joel Fernandes, Josh Triplett, Boqun Feng,
	Uladzislau Rezki, Mathieu Desnoyers, Lai Jiangshan, Zqiang,
	Anna-Maria Behnsen, Ingo Molnar, Thomas Gleixner, Tejun Heo,
	Andrew Morton, Vlastimil Babka, Suren Baghdasaryan, Michal Hocko,
	Brendan Jackman, Johannes Weiner, Zi Yan, Waiman Long,
	Chen Ridong, Michal Koutný, Jonathan Corbet, Shuah Khan,
	Shuah Khan
  Cc: linux-kernel, rcu, linux-mm, cgroups, linux-doc, linux-kselftest,
	Qiliang Yuan
In-Reply-To: <20260413-wujing-dhm-v2-0-06df21caba5d@gmail.com>

Most kernel noise types (TICK, TIMER, RCU, etc.) are currently aliased
to a single HK_TYPE_KERNEL_NOISE enum value. This prevents fine-grained
runtime isolation control as all masks are forced to be identical.

Un-alias service-specific housekeeping types in enum hk_type. This
separation provides the necessary granularity for DHM subsystems to
subscribe to and maintain independent affinity masks.

Signed-off-by: Qiliang Yuan <realwujing@gmail.com>
---
 include/linux/sched/isolation.h | 20 ++++++++------------
 kernel/sched/isolation.c        | 10 +++++++++-
 2 files changed, 17 insertions(+), 13 deletions(-)

diff --git a/include/linux/sched/isolation.h b/include/linux/sched/isolation.h
index dc3975ff1b2e1..b9a041247565c 100644
--- a/include/linux/sched/isolation.h
+++ b/include/linux/sched/isolation.h
@@ -17,21 +17,17 @@ enum hk_type {
 	/* Inverse of boot-time isolcpus=managed_irq argument */
 	HK_TYPE_MANAGED_IRQ,
 	/* Inverse of boot-time nohz_full= or isolcpus=nohz arguments */
-	HK_TYPE_KERNEL_NOISE,
+	HK_TYPE_TICK,
+	HK_TYPE_TIMER,
+	HK_TYPE_RCU,
+	HK_TYPE_MISC,
+	HK_TYPE_WQ,
+	HK_TYPE_KTHREAD,
 	HK_TYPE_MAX,
-
-	/*
-	 * The following housekeeping types are only set by the nohz_full
-	 * boot commandline option. So they can share the same value.
-	 */
-	HK_TYPE_TICK    = HK_TYPE_KERNEL_NOISE,
-	HK_TYPE_TIMER   = HK_TYPE_KERNEL_NOISE,
-	HK_TYPE_RCU     = HK_TYPE_KERNEL_NOISE,
-	HK_TYPE_MISC    = HK_TYPE_KERNEL_NOISE,
-	HK_TYPE_WQ      = HK_TYPE_KERNEL_NOISE,
-	HK_TYPE_KTHREAD = HK_TYPE_KERNEL_NOISE
 };
 
+#define HK_TYPE_KERNEL_NOISE HK_TYPE_TICK
+
 #ifdef CONFIG_CPU_ISOLATION
 DECLARE_STATIC_KEY_FALSE(housekeeping_overridden);
 extern int housekeeping_any_cpu(enum hk_type type);
diff --git a/kernel/sched/isolation.c b/kernel/sched/isolation.c
index ef152d401fe20..e05ed5118e651 100644
--- a/kernel/sched/isolation.c
+++ b/kernel/sched/isolation.c
@@ -15,9 +15,17 @@ enum hk_flags {
 	HK_FLAG_DOMAIN_BOOT	= BIT(HK_TYPE_DOMAIN_BOOT),
 	HK_FLAG_DOMAIN		= BIT(HK_TYPE_DOMAIN),
 	HK_FLAG_MANAGED_IRQ	= BIT(HK_TYPE_MANAGED_IRQ),
-	HK_FLAG_KERNEL_NOISE	= BIT(HK_TYPE_KERNEL_NOISE),
+	HK_FLAG_TICK		= BIT(HK_TYPE_TICK),
+	HK_FLAG_TIMER		= BIT(HK_TYPE_TIMER),
+	HK_FLAG_RCU		= BIT(HK_TYPE_RCU),
+	HK_FLAG_MISC		= BIT(HK_TYPE_MISC),
+	HK_FLAG_WQ		= BIT(HK_TYPE_WQ),
+	HK_FLAG_KTHREAD		= BIT(HK_TYPE_KTHREAD),
 };
 
+#define HK_FLAG_KERNEL_NOISE (HK_FLAG_TICK | HK_FLAG_TIMER | HK_FLAG_RCU | \
+			      HK_FLAG_MISC | HK_FLAG_WQ | HK_FLAG_KTHREAD)
+
 DEFINE_STATIC_KEY_FALSE(housekeeping_overridden);
 EXPORT_SYMBOL_GPL(housekeeping_overridden);
 

-- 
2.43.0


^ permalink raw reply related

* [PATCH v2 02/12] sched/isolation: Introduce housekeeping notifier infrastructure
From: Qiliang Yuan @ 2026-04-13  7:43 UTC (permalink / raw)
  To: Ingo Molnar, Peter Zijlstra, Juri Lelli, Vincent Guittot,
	Dietmar Eggemann, Steven Rostedt, Ben Segall, Mel Gorman,
	Valentin Schneider, Paul E. McKenney, Frederic Weisbecker,
	Neeraj Upadhyay, Joel Fernandes, Josh Triplett, Boqun Feng,
	Uladzislau Rezki, Mathieu Desnoyers, Lai Jiangshan, Zqiang,
	Anna-Maria Behnsen, Ingo Molnar, Thomas Gleixner, Tejun Heo,
	Andrew Morton, Vlastimil Babka, Suren Baghdasaryan, Michal Hocko,
	Brendan Jackman, Johannes Weiner, Zi Yan, Waiman Long,
	Chen Ridong, Michal Koutný, Jonathan Corbet, Shuah Khan,
	Shuah Khan
  Cc: linux-kernel, rcu, linux-mm, cgroups, linux-doc, linux-kselftest,
	Qiliang Yuan
In-Reply-To: <20260413-wujing-dhm-v2-0-06df21caba5d@gmail.com>

Subsystems currently rely on static housekeeping masks determined at
boot. Supporting runtime reconfiguration (DHM v2) requires a mechanism
to broadcast mask changes to affected kernel components.

Implement a blocking notifier chain for housekeeping mask updates. This
infrastructure enables subsystems like genirq, workqueues, and RCU to
react dynamically to isolation changes triggered by cpusets.

Signed-off-by: Qiliang Yuan <realwujing@gmail.com>
---
 include/linux/sched/isolation.h | 21 +++++++++++++++++++++
 kernel/sched/isolation.c        | 26 ++++++++++++++++++++++++++
 2 files changed, 47 insertions(+)

diff --git a/include/linux/sched/isolation.h b/include/linux/sched/isolation.h
index b9a041247565c..aea1dbc4d7486 100644
--- a/include/linux/sched/isolation.h
+++ b/include/linux/sched/isolation.h
@@ -4,6 +4,7 @@
 #include <linux/cpumask.h>
 #include <linux/init.h>
 #include <linux/tick.h>
+#include <linux/notifier.h>
 
 enum hk_type {
 	/* Inverse of boot-time isolcpus= argument */
@@ -28,6 +29,13 @@ enum hk_type {
 
 #define HK_TYPE_KERNEL_NOISE HK_TYPE_TICK
 
+struct housekeeping_update {
+	enum hk_type type;
+	const struct cpumask *new_mask;
+};
+
+#define HK_UPDATE_MASK	0x01
+
 #ifdef CONFIG_CPU_ISOLATION
 DECLARE_STATIC_KEY_FALSE(housekeeping_overridden);
 extern int housekeeping_any_cpu(enum hk_type type);
@@ -38,6 +46,9 @@ extern bool housekeeping_test_cpu(int cpu, enum hk_type type);
 extern int housekeeping_update(struct cpumask *isol_mask);
 extern void __init housekeeping_init(void);
 
+extern int housekeeping_register_notifier(struct notifier_block *nb);
+extern int housekeeping_unregister_notifier(struct notifier_block *nb);
+
 #else
 
 static inline int housekeeping_any_cpu(enum hk_type type)
@@ -65,6 +76,16 @@ static inline bool housekeeping_test_cpu(int cpu, enum hk_type type)
 
 static inline int housekeeping_update(struct cpumask *isol_mask) { return 0; }
 static inline void housekeeping_init(void) { }
+
+static inline int housekeeping_register_notifier(struct notifier_block *nb)
+{
+	return 0;
+}
+
+static inline int housekeeping_unregister_notifier(struct notifier_block *nb)
+{
+	return 0;
+}
 #endif /* CONFIG_CPU_ISOLATION */
 
 static inline bool housekeeping_cpu(int cpu, enum hk_type type)
diff --git a/kernel/sched/isolation.c b/kernel/sched/isolation.c
index e05ed5118e651..0462b41807161 100644
--- a/kernel/sched/isolation.c
+++ b/kernel/sched/isolation.c
@@ -10,6 +10,7 @@
 #include <linux/sched/isolation.h>
 #include <linux/pci.h>
 #include "sched.h"
+#include <linux/notifier.h>
 
 enum hk_flags {
 	HK_FLAG_DOMAIN_BOOT	= BIT(HK_TYPE_DOMAIN_BOOT),
@@ -26,6 +27,8 @@ enum hk_flags {
 #define HK_FLAG_KERNEL_NOISE (HK_FLAG_TICK | HK_FLAG_TIMER | HK_FLAG_RCU | \
 			      HK_FLAG_MISC | HK_FLAG_WQ | HK_FLAG_KTHREAD)
 
+static BLOCKING_NOTIFIER_HEAD(housekeeping_notifier_list);
+
 DEFINE_STATIC_KEY_FALSE(housekeeping_overridden);
 EXPORT_SYMBOL_GPL(housekeeping_overridden);
 
@@ -170,6 +173,29 @@ int housekeeping_update(struct cpumask *isol_mask)
 	return 0;
 }
 
+int housekeeping_register_notifier(struct notifier_block *nb)
+{
+	return blocking_notifier_chain_register(&housekeeping_notifier_list, nb);
+}
+EXPORT_SYMBOL_GPL(housekeeping_register_notifier);
+
+int housekeeping_unregister_notifier(struct notifier_block *nb)
+{
+	return blocking_notifier_chain_unregister(&housekeeping_notifier_list, nb);
+}
+EXPORT_SYMBOL_GPL(housekeeping_unregister_notifier);
+
+int housekeeping_update_notify(enum hk_type type, const struct cpumask *new_mask)
+{
+	struct housekeeping_update update = {
+		.type = type,
+		.new_mask = new_mask,
+	};
+
+	return blocking_notifier_call_chain(&housekeeping_notifier_list, HK_UPDATE_MASK, &update);
+}
+EXPORT_SYMBOL_GPL(housekeeping_update_notify);
+
 void __init housekeeping_init(void)
 {
 	enum hk_type type;

-- 
2.43.0


^ permalink raw reply related

* [PATCH v2 03/12] rcu: Support runtime NOCB initialization and dynamic offloading
From: Qiliang Yuan @ 2026-04-13  7:43 UTC (permalink / raw)
  To: Ingo Molnar, Peter Zijlstra, Juri Lelli, Vincent Guittot,
	Dietmar Eggemann, Steven Rostedt, Ben Segall, Mel Gorman,
	Valentin Schneider, Paul E. McKenney, Frederic Weisbecker,
	Neeraj Upadhyay, Joel Fernandes, Josh Triplett, Boqun Feng,
	Uladzislau Rezki, Mathieu Desnoyers, Lai Jiangshan, Zqiang,
	Anna-Maria Behnsen, Ingo Molnar, Thomas Gleixner, Tejun Heo,
	Andrew Morton, Vlastimil Babka, Suren Baghdasaryan, Michal Hocko,
	Brendan Jackman, Johannes Weiner, Zi Yan, Waiman Long,
	Chen Ridong, Michal Koutný, Jonathan Corbet, Shuah Khan,
	Shuah Khan
  Cc: linux-kernel, rcu, linux-mm, cgroups, linux-doc, linux-kselftest,
	Qiliang Yuan
In-Reply-To: <20260413-wujing-dhm-v2-0-06df21caba5d@gmail.com>

Context:
The RCU Non-Callback (NOCB) infrastructure traditionally requires
boot-time parameters (e.g., rcu_nocbs) to allocate masks and spawn
management kthreads (rcuog/rcuo). This prevents systems from activating
offloading on-demand without a reboot.

Problem:
Dynamic Housekeeping Management requires CPUs to transition to
NOCB mode at runtime when they are newly isolated. Without boot-time
setup, the NOCB masks are unallocated, and critical kthreads are missing,
preventing effective tick suppression and isolation.

Solution:
Refactor RCU initialization to support dynamic on-demand setup.
- Introduce rcu_init_nocb_dynamic() to allocate masks and organize
  kthreads if the system wasn't initially configured for NOCB.
- Introduce rcu_housekeeping_reconfigure() to iterate over CPUs and
  perform safe offload/deoffload transitions via hotplug sequences
  (cpu_down -> offload -> cpu_up) when a housekeeping cpuset triggers
  a notifier event.
- Remove __init from rcu_organize_nocb_kthreads to allow runtime
  reconfiguration of the callback management hierarchy.

This enables a true "Zero-Conf" isolation experience where any CPU
can be fully isolated at runtime regardless of boot parameters.

Signed-off-by: Qiliang Yuan <realwujing@gmail.com>
---
 kernel/rcu/rcu.h       |  4 +++
 kernel/rcu/tree.c      | 75 ++++++++++++++++++++++++++++++++++++++++++++++++++
 kernel/rcu/tree.h      |  2 +-
 kernel/rcu/tree_nocb.h | 31 +++++++++++++--------
 4 files changed, 100 insertions(+), 12 deletions(-)

diff --git a/kernel/rcu/rcu.h b/kernel/rcu/rcu.h
index 9b10b57b79ada..282874443c96b 100644
--- a/kernel/rcu/rcu.h
+++ b/kernel/rcu/rcu.h
@@ -663,8 +663,12 @@ unsigned long srcu_batches_completed(struct srcu_struct *sp);
 #endif // #else // #ifdef CONFIG_TINY_SRCU
 
 #ifdef CONFIG_RCU_NOCB_CPU
+void rcu_init_nocb_dynamic(void);
+void rcu_spawn_cpu_nocb_kthread(int cpu);
 void rcu_bind_current_to_nocb(void);
 #else
+static inline void rcu_init_nocb_dynamic(void) { }
+static inline void rcu_spawn_cpu_nocb_kthread(int cpu) { }
 static inline void rcu_bind_current_to_nocb(void) { }
 #endif
 
diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index 55df6d37145e8..84c8388cf89a1 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -4928,4 +4928,79 @@ void __init rcu_init(void)
 #include "tree_stall.h"
 #include "tree_exp.h"
 #include "tree_nocb.h"
+
+#ifdef CONFIG_SMP
+static int rcu_housekeeping_reconfigure(struct notifier_block *nb,
+					unsigned long action, void *data)
+{
+	struct housekeeping_update *upd = data;
+	struct task_struct *t;
+	int cpu;
+
+	if (action != HK_UPDATE_MASK || upd->type != HK_TYPE_RCU)
+		return NOTIFY_OK;
+
+	rcu_init_nocb_dynamic();
+
+	for_each_possible_cpu(cpu) {
+		struct rcu_data *rdp = per_cpu_ptr(&rcu_data, cpu);
+		bool isolated = !cpumask_test_cpu(cpu, upd->new_mask);
+		bool offloaded = rcu_rdp_is_offloaded(rdp);
+
+		if (isolated && !offloaded) {
+			/* Transition to NOCB */
+			pr_info("rcu: CPU %d transitioning to NOCB mode\n", cpu);
+			if (cpu_online(cpu)) {
+				remove_cpu(cpu);
+				rcu_spawn_cpu_nocb_kthread(cpu);
+				rcu_nocb_cpu_offload(cpu);
+				add_cpu(cpu);
+			} else {
+				rcu_spawn_cpu_nocb_kthread(cpu);
+				rcu_nocb_cpu_offload(cpu);
+			}
+		} else if (!isolated && offloaded) {
+			/* Transition to CB */
+			pr_info("rcu: CPU %d transitioning to CB mode\n", cpu);
+			if (cpu_online(cpu)) {
+				remove_cpu(cpu);
+				rcu_nocb_cpu_deoffload(cpu);
+				add_cpu(cpu);
+			} else {
+				rcu_nocb_cpu_deoffload(cpu);
+			}
+		}
+	}
+
+	t = READ_ONCE(rcu_state.gp_kthread);
+	if (t)
+		housekeeping_affine(t, HK_TYPE_RCU);
+
+#ifdef CONFIG_TASKS_RCU
+	t = get_rcu_tasks_gp_kthread();
+	if (t)
+		housekeeping_affine(t, HK_TYPE_RCU);
+#endif
+
+#ifdef CONFIG_TASKS_RUDE_RCU
+	t = get_rcu_tasks_rude_gp_kthread();
+	if (t)
+		housekeeping_affine(t, HK_TYPE_RCU);
+#endif
+
+	return NOTIFY_OK;
+}
+
+static struct notifier_block rcu_housekeeping_nb = {
+	.notifier_call = rcu_housekeeping_reconfigure,
+};
+
+static int __init rcu_init_housekeeping_notifier(void)
+{
+	housekeeping_register_notifier(&rcu_housekeeping_nb);
+	return 0;
+}
+late_initcall(rcu_init_housekeeping_notifier);
+#endif
+
 #include "tree_plugin.h"
diff --git a/kernel/rcu/tree.h b/kernel/rcu/tree.h
index 7dfc57e9adb18..f3d31918ea322 100644
--- a/kernel/rcu/tree.h
+++ b/kernel/rcu/tree.h
@@ -517,7 +517,7 @@ static void rcu_nocb_unlock_irqrestore(struct rcu_data *rdp,
 				       unsigned long flags);
 static void rcu_lockdep_assert_cblist_protected(struct rcu_data *rdp);
 #ifdef CONFIG_RCU_NOCB_CPU
-static void __init rcu_organize_nocb_kthreads(void);
+static void rcu_organize_nocb_kthreads(void);
 
 /*
  * Disable IRQs before checking offloaded state so that local
diff --git a/kernel/rcu/tree_nocb.h b/kernel/rcu/tree_nocb.h
index b3337c7231ccb..36f6c9be937aa 100644
--- a/kernel/rcu/tree_nocb.h
+++ b/kernel/rcu/tree_nocb.h
@@ -1259,6 +1259,22 @@ lazy_rcu_shrink_scan(struct shrinker *shrink, struct shrink_control *sc)
 }
 #endif // #ifdef CONFIG_RCU_LAZY
 
+void rcu_init_nocb_dynamic(void)
+{
+	if (rcu_state.nocb_is_setup)
+		return;
+
+	if (!cpumask_available(rcu_nocb_mask)) {
+		if (!zalloc_cpumask_var(&rcu_nocb_mask, GFP_KERNEL)) {
+			pr_info("rcu_nocb_mask allocation failed, dynamic offloading disabled.\n");
+			return;
+		}
+	}
+
+	rcu_state.nocb_is_setup = true;
+	rcu_organize_nocb_kthreads();
+}
+
 void __init rcu_init_nohz(void)
 {
 	int cpu;
@@ -1276,15 +1292,8 @@ void __init rcu_init_nohz(void)
 		cpumask = cpu_possible_mask;
 
 	if (cpumask) {
-		if (!cpumask_available(rcu_nocb_mask)) {
-			if (!zalloc_cpumask_var(&rcu_nocb_mask, GFP_KERNEL)) {
-				pr_info("rcu_nocb_mask allocation failed, callback offloading disabled.\n");
-				return;
-			}
-		}
-
+		rcu_init_nocb_dynamic();
 		cpumask_or(rcu_nocb_mask, rcu_nocb_mask, cpumask);
-		rcu_state.nocb_is_setup = true;
 	}
 
 	if (!rcu_state.nocb_is_setup)
@@ -1344,7 +1353,7 @@ static void __init rcu_boot_init_nocb_percpu_data(struct rcu_data *rdp)
  * rcuo CB kthread, spawn it.  Additionally, if the rcuo GP kthread
  * for this CPU's group has not yet been created, spawn it as well.
  */
-static void rcu_spawn_cpu_nocb_kthread(int cpu)
+void rcu_spawn_cpu_nocb_kthread(int cpu)
 {
 	struct rcu_data *rdp = per_cpu_ptr(&rcu_data, cpu);
 	struct rcu_data *rdp_gp;
@@ -1416,7 +1425,7 @@ module_param(rcu_nocb_gp_stride, int, 0444);
 /*
  * Initialize GP-CB relationships for all no-CBs CPU.
  */
-static void __init rcu_organize_nocb_kthreads(void)
+static void rcu_organize_nocb_kthreads(void)
 {
 	int cpu;
 	bool firsttime = true;
@@ -1668,7 +1677,7 @@ static bool do_nocb_deferred_wakeup(struct rcu_data *rdp)
 	return false;
 }
 
-static void rcu_spawn_cpu_nocb_kthread(int cpu)
+void rcu_spawn_cpu_nocb_kthread(int cpu)
 {
 }
 

-- 
2.43.0


^ permalink raw reply related

* [PATCH v2 04/12] tick/nohz: Transition to dynamic full dynticks state management
From: Qiliang Yuan @ 2026-04-13  7:43 UTC (permalink / raw)
  To: Ingo Molnar, Peter Zijlstra, Juri Lelli, Vincent Guittot,
	Dietmar Eggemann, Steven Rostedt, Ben Segall, Mel Gorman,
	Valentin Schneider, Paul E. McKenney, Frederic Weisbecker,
	Neeraj Upadhyay, Joel Fernandes, Josh Triplett, Boqun Feng,
	Uladzislau Rezki, Mathieu Desnoyers, Lai Jiangshan, Zqiang,
	Anna-Maria Behnsen, Ingo Molnar, Thomas Gleixner, Tejun Heo,
	Andrew Morton, Vlastimil Babka, Suren Baghdasaryan, Michal Hocko,
	Brendan Jackman, Johannes Weiner, Zi Yan, Waiman Long,
	Chen Ridong, Michal Koutný, Jonathan Corbet, Shuah Khan,
	Shuah Khan
  Cc: linux-kernel, rcu, linux-mm, cgroups, linux-doc, linux-kselftest,
	Qiliang Yuan
In-Reply-To: <20260413-wujing-dhm-v2-0-06df21caba5d@gmail.com>

Context:
Full dynticks (NOHZ_FULL) is typically a static configuration determined
at boot time. DHEI extends this to support runtime activation.

Problem:
Switching to NOHZ_FULL at runtime requires careful synchronization
of context tracking and housekeeping states. Re-invoking setup logic
multiple times could lead to inconsistencies or warnings, and RCU
dependency checks often prevented tick suppression in Zero-Conf setups.

Solution:
- Replace the static tick_nohz_full_enabled() checks with a dynamic
  tick_nohz_full_running state variable.
- Refactor tick_nohz_full_setup to be safe for runtime invocation,
  adding guards against re-initialization and ensuring IRQ work
  interrupt support.
- Implement boot-time pre-activation of context tracking (shadow
  init) for all possible CPUs to avoid instruction flow issues during
  dynamic transitions.
- Hook into housekeeping_notifier_list to update NO_HZ states dynamically.

This provides the core state machine for reliable, on-demand tick
suppression and high-performance isolation.

Signed-off-by: Qiliang Yuan <realwujing@gmail.com>
---
 kernel/time/tick-sched.c | 130 ++++++++++++++++++++++++++++++++++++++---------
 1 file changed, 105 insertions(+), 25 deletions(-)

diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index f7907fadd63f2..23d69d7d44538 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -27,6 +27,7 @@
 #include <linux/posix-timers.h>
 #include <linux/context_tracking.h>
 #include <linux/mm.h>
+#include <linux/sched/isolation.h>
 
 #include <asm/irq_regs.h>
 
@@ -624,13 +625,25 @@ void __tick_nohz_task_switch(void)
 /* Get the boot-time nohz CPU list from the kernel parameters. */
 void __init tick_nohz_full_setup(cpumask_var_t cpumask)
 {
-	alloc_bootmem_cpumask_var(&tick_nohz_full_mask);
+	if (!tick_nohz_full_mask) {
+		if (!slab_is_available())
+			alloc_bootmem_cpumask_var(&tick_nohz_full_mask);
+		else
+			zalloc_cpumask_var(&tick_nohz_full_mask, GFP_KERNEL);
+	}
 	cpumask_copy(tick_nohz_full_mask, cpumask);
 	tick_nohz_full_running = true;
 }
 
 bool tick_nohz_cpu_hotpluggable(unsigned int cpu)
 {
+	/*
+	 * Allow all CPUs to go down during shutdown/reboot to avoid
+	 * interfering with the final power-off sequence.
+	 */
+	if (system_state > SYSTEM_RUNNING)
+		return true;
+
 	/*
 	 * The 'tick_do_timer_cpu' CPU handles housekeeping duty (unbound
 	 * timers, workqueues, timekeeping, ...) on behalf of full dynticks
@@ -646,45 +659,112 @@ static int tick_nohz_cpu_down(unsigned int cpu)
 	return tick_nohz_cpu_hotpluggable(cpu) ? 0 : -EBUSY;
 }
 
+static int tick_nohz_housekeeping_reconfigure(struct notifier_block *nb,
+					     unsigned long action, void *data)
+{
+	struct housekeeping_update *upd = data;
+	int cpu;
+
+	if (action == HK_UPDATE_MASK && upd->type == HK_TYPE_TICK) {
+		cpumask_var_t non_housekeeping_mask;
+
+		if (!alloc_cpumask_var(&non_housekeeping_mask, GFP_KERNEL))
+			return NOTIFY_BAD;
+
+		cpumask_andnot(non_housekeeping_mask, cpu_possible_mask, upd->new_mask);
+
+		if (!tick_nohz_full_mask) {
+			if (!zalloc_cpumask_var(&tick_nohz_full_mask, GFP_KERNEL)) {
+				free_cpumask_var(non_housekeeping_mask);
+				return NOTIFY_BAD;
+			}
+		}
+
+		/* Kick all CPUs to re-evaluate tick dependency before change */
+		for_each_online_cpu(cpu)
+			tick_nohz_full_kick_cpu(cpu);
+
+		cpumask_copy(tick_nohz_full_mask, non_housekeeping_mask);
+		tick_nohz_full_running = !cpumask_empty(tick_nohz_full_mask);
+
+		/*
+		 * If nohz_full is running, the timer duty must be on a housekeeper.
+		 * If the current timer CPU is not a housekeeper, or no duty is assigned,
+		 * pick the first housekeeper and assign it.
+		 */
+		if (tick_nohz_full_running) {
+			int timer_cpu = READ_ONCE(tick_do_timer_cpu);
+			if (timer_cpu == TICK_DO_TIMER_NONE ||
+			    !cpumask_test_cpu(timer_cpu, upd->new_mask)) {
+				int next_timer = cpumask_first(upd->new_mask);
+				if (next_timer < nr_cpu_ids)
+					WRITE_ONCE(tick_do_timer_cpu, next_timer);
+			}
+		}
+
+		/* Kick all CPUs again to apply new nohz full state */
+		for_each_online_cpu(cpu)
+			tick_nohz_full_kick_cpu(cpu);
+
+		free_cpumask_var(non_housekeeping_mask);
+	}
+
+	return NOTIFY_OK;
+}
+
+static struct notifier_block tick_nohz_housekeeping_nb = {
+	.notifier_call = tick_nohz_housekeeping_reconfigure,
+};
+
 void __init tick_nohz_init(void)
 {
 	int cpu, ret;
 
-	if (!tick_nohz_full_running)
-		return;
-
-	/*
-	 * Full dynticks uses IRQ work to drive the tick rescheduling on safe
-	 * locking contexts. But then we need IRQ work to raise its own
-	 * interrupts to avoid circular dependency on the tick.
-	 */
-	if (!arch_irq_work_has_interrupt()) {
-		pr_warn("NO_HZ: Can't run full dynticks because arch doesn't support IRQ work self-IPIs\n");
-		cpumask_clear(tick_nohz_full_mask);
-		tick_nohz_full_running = false;
-		return;
+	if (!tick_nohz_full_mask) {
+		if (!slab_is_available())
+			alloc_bootmem_cpumask_var(&tick_nohz_full_mask);
+		else
+			zalloc_cpumask_var(&tick_nohz_full_mask, GFP_KERNEL);
 	}
 
-	if (IS_ENABLED(CONFIG_PM_SLEEP_SMP) &&
-			!IS_ENABLED(CONFIG_PM_SLEEP_SMP_NONZERO_CPU)) {
-		cpu = smp_processor_id();
+	housekeeping_register_notifier(&tick_nohz_housekeeping_nb);
 
-		if (cpumask_test_cpu(cpu, tick_nohz_full_mask)) {
-			pr_warn("NO_HZ: Clearing %d from nohz_full range "
-				"for timekeeping\n", cpu);
-			cpumask_clear_cpu(cpu, tick_nohz_full_mask);
+	if (tick_nohz_full_running) {
+		/*
+		 * Full dynticks uses IRQ work to drive the tick rescheduling on safe
+		 * locking contexts. But then we need IRQ work to raise its own
+		 * interrupts to avoid circular dependency on the tick.
+		 */
+		if (!arch_irq_work_has_interrupt()) {
+			pr_warn("NO_HZ: Can't run full dynticks because arch doesn't support IRQ work self-IPIs\n");
+			cpumask_clear(tick_nohz_full_mask);
+			tick_nohz_full_running = false;
+			goto out;
 		}
+
+		if (IS_ENABLED(CONFIG_PM_SLEEP_SMP) &&
+				!IS_ENABLED(CONFIG_PM_SLEEP_SMP_NONZERO_CPU)) {
+			cpu = smp_processor_id();
+
+			if (cpumask_test_cpu(cpu, tick_nohz_full_mask)) {
+				pr_warn("NO_HZ: Clearing %d from nohz_full range "
+					"for timekeeping\n", cpu);
+				cpumask_clear_cpu(cpu, tick_nohz_full_mask);
+			}
+		}
+
+		pr_info("NO_HZ: Full dynticks CPUs: %*pbl.\n",
+			cpumask_pr_args(tick_nohz_full_mask));
 	}
 
-	for_each_cpu(cpu, tick_nohz_full_mask)
+out:
+	for_each_possible_cpu(cpu)
 		ct_cpu_track_user(cpu);
 
 	ret = cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN,
 					"kernel/nohz:predown", NULL,
 					tick_nohz_cpu_down);
 	WARN_ON(ret < 0);
-	pr_info("NO_HZ: Full dynticks CPUs: %*pbl.\n",
-		cpumask_pr_args(tick_nohz_full_mask));
 }
 #endif /* #ifdef CONFIG_NO_HZ_FULL */
 
@@ -1209,7 +1289,7 @@ static bool can_stop_idle_tick(int cpu, struct tick_sched *ts)
 	if (unlikely(report_idle_softirq()))
 		return false;
 
-	if (tick_nohz_full_enabled()) {
+	if (tick_nohz_full_running) {
 		int tick_cpu = READ_ONCE(tick_do_timer_cpu);
 
 		/*

-- 
2.43.0


^ permalink raw reply related

* [PATCH v2 05/12] genirq: Support dynamic migration for managed interrupts
From: Qiliang Yuan @ 2026-04-13  7:43 UTC (permalink / raw)
  To: Ingo Molnar, Peter Zijlstra, Juri Lelli, Vincent Guittot,
	Dietmar Eggemann, Steven Rostedt, Ben Segall, Mel Gorman,
	Valentin Schneider, Paul E. McKenney, Frederic Weisbecker,
	Neeraj Upadhyay, Joel Fernandes, Josh Triplett, Boqun Feng,
	Uladzislau Rezki, Mathieu Desnoyers, Lai Jiangshan, Zqiang,
	Anna-Maria Behnsen, Ingo Molnar, Thomas Gleixner, Tejun Heo,
	Andrew Morton, Vlastimil Babka, Suren Baghdasaryan, Michal Hocko,
	Brendan Jackman, Johannes Weiner, Zi Yan, Waiman Long,
	Chen Ridong, Michal Koutný, Jonathan Corbet, Shuah Khan,
	Shuah Khan
  Cc: linux-kernel, rcu, linux-mm, cgroups, linux-doc, linux-kselftest,
	Qiliang Yuan
In-Reply-To: <20260413-wujing-dhm-v2-0-06df21caba5d@gmail.com>

Managed interrupts currently have their affinity determined once,
honoring boot-time isolation settings. There is no mechanism to migrate
them when housekeeping boundaries change at runtime.

Enable managed interrupts to respond dynamically to housekeeping updates.

This ensures that managed interrupts are migrated away from newly
isolated CPUs or redistributed when housekeeping CPUs are added.

Signed-off-by: Qiliang Yuan <realwujing@gmail.com>
---
 kernel/irq/manage.c | 49 +++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 49 insertions(+)

diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c
index 2e80724378267..31e263d9f40d0 100644
--- a/kernel/irq/manage.c
+++ b/kernel/irq/manage.c
@@ -2801,3 +2801,52 @@ bool irq_check_status_bit(unsigned int irq, unsigned int bitmask)
 	return res;
 }
 EXPORT_SYMBOL_GPL(irq_check_status_bit);
+
+#ifdef CONFIG_SMP
+static int irq_housekeeping_reconfigure(struct notifier_block *nb,
+				       unsigned long action, void *data)
+{
+	struct housekeeping_update *upd = data;
+	unsigned int irq;
+
+	if (action != HK_UPDATE_MASK || upd->type != HK_TYPE_MANAGED_IRQ)
+		return NOTIFY_OK;
+
+	irq_lock_sparse();
+	for_each_active_irq(irq) {
+		struct irq_data *irqd;
+		struct irq_desc *desc;
+
+		desc = irq_to_desc(irq);
+		if (!desc)
+			continue;
+
+		scoped_guard(raw_spinlock_irqsave, &desc->lock) {
+			irqd = irq_desc_get_irq_data(desc);
+			if (!irqd_affinity_is_managed(irqd) || !desc->action ||
+			    !irq_data_get_irq_chip(irqd))
+				continue;
+
+			/*
+			 * Re-apply existing affinity to honor the new
+			 * housekeeping mask via __irq_set_affinity() logic.
+			 */
+			irq_set_affinity_locked(irqd, irq_data_get_affinity_mask(irqd), false);
+		}
+	}
+	irq_unlock_sparse();
+
+	return NOTIFY_OK;
+}
+
+static struct notifier_block irq_housekeeping_nb = {
+	.notifier_call = irq_housekeeping_reconfigure,
+};
+
+static int __init irq_init_housekeeping_notifier(void)
+{
+	housekeeping_register_notifier(&irq_housekeeping_nb);
+	return 0;
+}
+core_initcall(irq_init_housekeeping_notifier);
+#endif

-- 
2.43.0


^ permalink raw reply related

* [PATCH 1/2] Documentation/process: maintainer-soc: Trim from trivial ask-DT
From: Krzysztof Kozlowski @ 2026-04-13  7:44 UTC (permalink / raw)
  To: Arnd Bergmann, Krzysztof Kozlowski, Alexandre Belloni,
	Linus Walleij, Drew Fustini, Jonathan Corbet, Shuah Khan,
	linux-arm-kernel, soc, workflows, linux-doc, linux-kernel
  Cc: Krzysztof Kozlowski

It is obvious that one can ask DT maintainers of something, just like
one can ask anyone, so just drop the sentence.  Concise documents with
rules have bigger chances of actually being read by people.

Signed-off-by: Krzysztof Kozlowski <krzysztof.kozlowski@oss.qualcomm.com>
---
 Documentation/process/maintainer-soc.rst | 2 --
 1 file changed, 2 deletions(-)

diff --git a/Documentation/process/maintainer-soc.rst b/Documentation/process/maintainer-soc.rst
index 7d6bad989ad8..4029dc6938d8 100644
--- a/Documentation/process/maintainer-soc.rst
+++ b/Documentation/process/maintainer-soc.rst
@@ -169,8 +169,6 @@ more information on the validation of devicetrees.
 For new platforms, or additions to existing ones, ``make dtbs_check`` should not
 add any new warnings.  For RISC-V and Samsung SoC, ``make dtbs_check W=1`` is
 required to not add any new warnings.
-If in any doubt about a devicetree change, reach out to the devicetree
-maintainers.
 
 Branches and Pull Requests
 ~~~~~~~~~~~~~~~~~~~~~~~~~~
-- 
2.51.0


^ permalink raw reply related

* [PATCH 2/2] Documentation/process: maintainer-soc: Document purpose of defconfigs
From: Krzysztof Kozlowski @ 2026-04-13  7:44 UTC (permalink / raw)
  To: Arnd Bergmann, Krzysztof Kozlowski, Alexandre Belloni,
	Linus Walleij, Drew Fustini, Jonathan Corbet, Shuah Khan,
	linux-arm-kernel, soc, workflows, linux-doc, linux-kernel
  Cc: Krzysztof Kozlowski
In-Reply-To: <20260413074401.27282-3-krzysztof.kozlowski@oss.qualcomm.com>

Common mistake in commit messages of patches on mailing list adding
CONFIG options to arm/multi_v7 or arm64/defconfig is saying what that
patch is doing, e.g. "Enable driver foo".  That is obvious from the diff
part, thus explaining it does not bring any value.  What brings value is
to understand why "driver foo" should be in a shared, upstream
defconfig, especially considering that distros have their own defconfigs
and we do not care about non-upstream trees.

Signed-off-by: Krzysztof Kozlowski <krzysztof.kozlowski@oss.qualcomm.com>
---
 Documentation/process/maintainer-soc.rst | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/Documentation/process/maintainer-soc.rst b/Documentation/process/maintainer-soc.rst
index 4029dc6938d8..a3a90a7d4c68 100644
--- a/Documentation/process/maintainer-soc.rst
+++ b/Documentation/process/maintainer-soc.rst
@@ -207,3 +207,13 @@ The subject line of a pull request should begin with "[GIT PULL]" and made using
 a signed tag, rather than a branch.  This tag should contain a short description
 summarising the changes in the pull request.  For more detail on sending pull
 requests, please see Documentation/maintainer/pull-requests.rst.
+
+Defconfigs purpose
+~~~~~~~~~~~~~~~~~~
+
+Defconfigs are primarily used by the kernel developers, because distros have
+their own configs.  A change adding new CONFIG options to a defconfig should
+explain why the kernel developers in general would want such option, e.g. by
+providing a name of an upstream-supported machine/board using that new option.
+This implies that enabling options in defconfig for non-upstream machines shall
+not be accepted.
-- 
2.51.0


^ permalink raw reply related

* [PATCH v2 06/12] watchdog: Allow runtime toggle of lockup detector affinity
From: Qiliang Yuan @ 2026-04-13  7:43 UTC (permalink / raw)
  To: Ingo Molnar, Peter Zijlstra, Juri Lelli, Vincent Guittot,
	Dietmar Eggemann, Steven Rostedt, Ben Segall, Mel Gorman,
	Valentin Schneider, Paul E. McKenney, Frederic Weisbecker,
	Neeraj Upadhyay, Joel Fernandes, Josh Triplett, Boqun Feng,
	Uladzislau Rezki, Mathieu Desnoyers, Lai Jiangshan, Zqiang,
	Anna-Maria Behnsen, Ingo Molnar, Thomas Gleixner, Tejun Heo,
	Andrew Morton, Vlastimil Babka, Suren Baghdasaryan, Michal Hocko,
	Brendan Jackman, Johannes Weiner, Zi Yan, Waiman Long,
	Chen Ridong, Michal Koutný, Jonathan Corbet, Shuah Khan,
	Shuah Khan
  Cc: linux-kernel, rcu, linux-mm, cgroups, linux-doc, linux-kselftest,
	Qiliang Yuan
In-Reply-To: <20260413-wujing-dhm-v2-0-06df21caba5d@gmail.com>

The hardlockup detector threads are affined to CPUs based on the
HK_TYPE_TIMER housekeeping mask at boot. If this mask is updated at
runtime, these threads remain on their original CPUs, potentially
running on isolated cores.

Synchronize watchdog thread affinity with HK_TYPE_TIMER updates.

This ensures that hardlockup detector threads correctly follow the
dynamic housekeeping boundaries for timers.

Signed-off-by: Qiliang Yuan <realwujing@gmail.com>
---
 kernel/watchdog.c | 26 ++++++++++++++++++++++++++
 1 file changed, 26 insertions(+)

diff --git a/kernel/watchdog.c b/kernel/watchdog.c
index 7d675781bc917..bcd8373038126 100644
--- a/kernel/watchdog.c
+++ b/kernel/watchdog.c
@@ -26,6 +26,7 @@
 #include <linux/sysctl.h>
 #include <linux/tick.h>
 #include <linux/sys_info.h>
+#include <linux/sched/isolation.h>
 
 #include <linux/sched/clock.h>
 #include <linux/sched/debug.h>
@@ -1361,6 +1362,30 @@ static int __init lockup_detector_check(void)
 }
 late_initcall_sync(lockup_detector_check);
 
+static int watchdog_housekeeping_reconfigure(struct notifier_block *nb,
+					    unsigned long action, void *data)
+{
+	if (action == HK_UPDATE_MASK) {
+		struct housekeeping_update *upd = data;
+		unsigned int type = upd->type;
+
+		if (type == HK_TYPE_TIMER) {
+			mutex_lock(&watchdog_mutex);
+			cpumask_copy(&watchdog_cpumask,
+				     housekeeping_cpumask(HK_TYPE_TIMER));
+			cpumask_and(&watchdog_cpumask, &watchdog_cpumask, cpu_possible_mask);
+			__lockup_detector_reconfigure(false);
+			mutex_unlock(&watchdog_mutex);
+		}
+	}
+
+	return NOTIFY_OK;
+}
+
+static struct notifier_block watchdog_housekeeping_nb = {
+	.notifier_call = watchdog_housekeeping_reconfigure,
+};
+
 void __init lockup_detector_init(void)
 {
 	if (tick_nohz_full_enabled())
@@ -1375,4 +1400,5 @@ void __init lockup_detector_init(void)
 		allow_lockup_detector_init_retry = true;
 
 	lockup_detector_setup();
+	housekeeping_register_notifier(&watchdog_housekeeping_nb);
 }

-- 
2.43.0


^ permalink raw reply related

* [PATCH v2 07/12] sched/core: Dynamically update scheduler domain housekeeping mask
From: Qiliang Yuan @ 2026-04-13  7:43 UTC (permalink / raw)
  To: Ingo Molnar, Peter Zijlstra, Juri Lelli, Vincent Guittot,
	Dietmar Eggemann, Steven Rostedt, Ben Segall, Mel Gorman,
	Valentin Schneider, Paul E. McKenney, Frederic Weisbecker,
	Neeraj Upadhyay, Joel Fernandes, Josh Triplett, Boqun Feng,
	Uladzislau Rezki, Mathieu Desnoyers, Lai Jiangshan, Zqiang,
	Anna-Maria Behnsen, Ingo Molnar, Thomas Gleixner, Tejun Heo,
	Andrew Morton, Vlastimil Babka, Suren Baghdasaryan, Michal Hocko,
	Brendan Jackman, Johannes Weiner, Zi Yan, Waiman Long,
	Chen Ridong, Michal Koutný, Jonathan Corbet, Shuah Khan,
	Shuah Khan
  Cc: linux-kernel, rcu, linux-mm, cgroups, linux-doc, linux-kselftest,
	Qiliang Yuan
In-Reply-To: <20260413-wujing-dhm-v2-0-06df21caba5d@gmail.com>

Scheduler domains rely on HK_TYPE_DOMAIN to identify which CPUs are
isolated from general load balancing. Currently, these boundaries are
static and determined only during boot-time domain initialization.

Trigger a scheduler domain rebuild when the HK_TYPE_DOMAIN mask changes.

This ensures that scheduler isolation boundaries can be reconfigured
at runtime via the DHEI sysfs or cpuset interface.

Signed-off-by: Qiliang Yuan <realwujing@gmail.com>
---
 kernel/sched/core.c | 23 +++++++++++++++++++++++
 1 file changed, 23 insertions(+)

diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 496dff740dcaf..b71c433bbc420 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -39,6 +39,7 @@
 #include <linux/sched/nohz.h>
 #include <linux/sched/rseq_api.h>
 #include <linux/sched/rt.h>
+#include <linux/sched/topology.h>
 
 #include <linux/blkdev.h>
 #include <linux/context_tracking.h>
@@ -10959,3 +10960,25 @@ void sched_change_end(struct sched_change_ctx *ctx)
 		p->sched_class->prio_changed(rq, p, ctx->prio);
 	}
 }
+
+static int sched_housekeeping_update(struct notifier_block *nb,
+				     unsigned long action, void *data)
+{
+	struct housekeeping_update *update = data;
+
+	if (action == HK_UPDATE_MASK && update->type == HK_TYPE_DOMAIN)
+		rebuild_sched_domains();
+
+	return NOTIFY_OK;
+}
+
+static struct notifier_block sched_housekeeping_nb = {
+	.notifier_call = sched_housekeeping_update,
+};
+
+static int __init sched_housekeeping_init(void)
+{
+	housekeeping_register_notifier(&sched_housekeeping_nb);
+	return 0;
+}
+late_initcall(sched_housekeeping_init);

-- 
2.43.0


^ permalink raw reply related

* [PATCH v2 08/12] workqueue, mm: Support dynamic housekeeping mask updates
From: Qiliang Yuan @ 2026-04-13  7:43 UTC (permalink / raw)
  To: Ingo Molnar, Peter Zijlstra, Juri Lelli, Vincent Guittot,
	Dietmar Eggemann, Steven Rostedt, Ben Segall, Mel Gorman,
	Valentin Schneider, Paul E. McKenney, Frederic Weisbecker,
	Neeraj Upadhyay, Joel Fernandes, Josh Triplett, Boqun Feng,
	Uladzislau Rezki, Mathieu Desnoyers, Lai Jiangshan, Zqiang,
	Anna-Maria Behnsen, Ingo Molnar, Thomas Gleixner, Tejun Heo,
	Andrew Morton, Vlastimil Babka, Suren Baghdasaryan, Michal Hocko,
	Brendan Jackman, Johannes Weiner, Zi Yan, Waiman Long,
	Chen Ridong, Michal Koutný, Jonathan Corbet, Shuah Khan,
	Shuah Khan
  Cc: linux-kernel, rcu, linux-mm, cgroups, linux-doc, linux-kselftest,
	Qiliang Yuan
In-Reply-To: <20260413-wujing-dhm-v2-0-06df21caba5d@gmail.com>

Unbound workqueues and kcompactd threads determine their default CPU
affinity from housekeeping masks (HK_TYPE_WQ, HK_TYPE_DOMAIN, and
HK_TYPE_KTHREAD) at boot. Currently, these boundaries are static and
are not updated if housekeeping is reconfigured at runtime.

Implement housekeeping notifiers for both workqueue and mm compaction.

This ensures that unbound workqueue tasks and background compaction
threads honor dynamic isolation boundaries configured via sysfs or
cpuset at runtime.

Signed-off-by: Qiliang Yuan <realwujing@gmail.com>
---
 kernel/workqueue.c | 42 ++++++++++++++++++++++++++++++++++++++++++
 mm/compaction.c    | 27 +++++++++++++++++++++++++++
 2 files changed, 69 insertions(+)

diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index eda756556341a..354e788004b48 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -8008,6 +8008,47 @@ static void __init wq_cpu_intensive_thresh_init(void)
 	wq_cpu_intensive_thresh_us = thresh;
 }
 
+static int wq_housekeeping_reconfigure(struct notifier_block *nb,
+				     unsigned long action, void *data)
+{
+	if (action == HK_UPDATE_MASK) {
+		struct housekeeping_update *upd = data;
+		unsigned int type = upd->type;
+
+		if (type == HK_TYPE_WQ || type == HK_TYPE_DOMAIN) {
+			cpumask_var_t cpumask;
+
+			if (!alloc_cpumask_var(&cpumask, GFP_KERNEL)) {
+				pr_warn("workqueue: failed to allocate cpumask for housekeeping update\n");
+				return NOTIFY_BAD;
+			}
+
+			cpumask_copy(cpumask, cpu_possible_mask);
+			if (!cpumask_empty(housekeeping_cpumask(HK_TYPE_WQ)))
+				cpumask_and(cpumask, cpumask, housekeeping_cpumask(HK_TYPE_WQ));
+			if (!cpumask_empty(housekeeping_cpumask(HK_TYPE_DOMAIN)))
+				cpumask_and(cpumask, cpumask, housekeeping_cpumask(HK_TYPE_DOMAIN));
+
+			workqueue_set_unbound_cpumask(cpumask);
+
+			if (type == HK_TYPE_DOMAIN) {
+				apply_wqattrs_lock();
+				cpumask_andnot(wq_isolated_cpumask, cpu_possible_mask,
+						housekeeping_cpumask(HK_TYPE_DOMAIN));
+				apply_wqattrs_unlock();
+			}
+
+			free_cpumask_var(cpumask);
+		}
+	}
+
+	return NOTIFY_OK;
+}
+
+static struct notifier_block wq_housekeeping_nb = {
+	.notifier_call = wq_housekeeping_reconfigure,
+};
+
 /**
  * workqueue_init - bring workqueue subsystem fully online
  *
@@ -8068,6 +8109,7 @@ void __init workqueue_init(void)
 
 	wq_online = true;
 	wq_watchdog_init();
+	housekeeping_register_notifier(&wq_housekeeping_nb);
 }
 
 /*
diff --git a/mm/compaction.c b/mm/compaction.c
index 1e8f8eca318c6..574ee3c6dc942 100644
--- a/mm/compaction.c
+++ b/mm/compaction.c
@@ -24,6 +24,7 @@
 #include <linux/page_owner.h>
 #include <linux/psi.h>
 #include <linux/cpuset.h>
+#include <linux/sched/isolation.h>
 #include "internal.h"
 
 #ifdef CONFIG_COMPACTION
@@ -3246,6 +3247,7 @@ void __meminit kcompactd_run(int nid)
 		pr_err("Failed to start kcompactd on node %d\n", nid);
 		pgdat->kcompactd = NULL;
 	} else {
+		housekeeping_affine(pgdat->kcompactd, HK_TYPE_KTHREAD);
 		wake_up_process(pgdat->kcompactd);
 	}
 }
@@ -3320,6 +3322,30 @@ static const struct ctl_table vm_compaction[] = {
 	},
 };
 
+static int kcompactd_housekeeping_reconfigure(struct notifier_block *nb,
+					      unsigned long action, void *data)
+{
+	struct housekeeping_update *upd = data;
+	unsigned int type = upd->type;
+
+	if (action == HK_UPDATE_MASK && type == HK_TYPE_KTHREAD) {
+		int nid;
+
+		for_each_node_state(nid, N_MEMORY) {
+			pg_data_t *pgdat = NODE_DATA(nid);
+
+			if (pgdat->kcompactd)
+				housekeeping_affine(pgdat->kcompactd, HK_TYPE_KTHREAD);
+		}
+	}
+
+	return NOTIFY_OK;
+}
+
+static struct notifier_block kcompactd_housekeeping_nb = {
+	.notifier_call = kcompactd_housekeeping_reconfigure,
+};
+
 static int __init kcompactd_init(void)
 {
 	int nid;
@@ -3327,6 +3353,7 @@ static int __init kcompactd_init(void)
 	for_each_node_state(nid, N_MEMORY)
 		kcompactd_run(nid);
 	register_sysctl_init("vm", vm_compaction);
+	housekeeping_register_notifier(&kcompactd_housekeeping_nb);
 	return 0;
 }
 subsys_initcall(kcompactd_init)

-- 
2.43.0


^ permalink raw reply related

* [PATCH v2 09/12] cgroup/cpuset: Introduce CPUSet-driven dynamic housekeeping (DHM)
From: Qiliang Yuan @ 2026-04-13  7:43 UTC (permalink / raw)
  To: Ingo Molnar, Peter Zijlstra, Juri Lelli, Vincent Guittot,
	Dietmar Eggemann, Steven Rostedt, Ben Segall, Mel Gorman,
	Valentin Schneider, Paul E. McKenney, Frederic Weisbecker,
	Neeraj Upadhyay, Joel Fernandes, Josh Triplett, Boqun Feng,
	Uladzislau Rezki, Mathieu Desnoyers, Lai Jiangshan, Zqiang,
	Anna-Maria Behnsen, Ingo Molnar, Thomas Gleixner, Tejun Heo,
	Andrew Morton, Vlastimil Babka, Suren Baghdasaryan, Michal Hocko,
	Brendan Jackman, Johannes Weiner, Zi Yan, Waiman Long,
	Chen Ridong, Michal Koutný, Jonathan Corbet, Shuah Khan,
	Shuah Khan
  Cc: linux-kernel, rcu, linux-mm, cgroups, linux-doc, linux-kselftest,
	Qiliang Yuan
In-Reply-To: <20260413-wujing-dhm-v2-0-06df21caba5d@gmail.com>

Currently, subsystem housekeeping masks are generally static and can
only be configured via boot-time parameters (e.g., isolcpus, nohz_full).
This inflexible approach forces a system reboot whenever an orchestrator
needs to change workload isolation boundaries.

This patch introduces CPUSet-driven Dynamic Housekeeping Management (DHM)
by exposing the `cpuset.housekeeping.cpus` control file on the root cgroup.
Writing a new cpumask to this file dynamically updates the housekeeping
masks of all registered subsystems (scheduler, RCU, timers, tick, workqueues,
and managed IRQs) simultaneously, without restarting the node.

At the cpuset and isolation core level, this change implements:
1. `housekeeping_update_all_types(const struct cpumask *new_mask)` API inside
   `isolation.c` to safely allocate, update, and replace all enabled hk_type masks.
2. The `cpuset.housekeeping.cpus` attribute in `dfl_files` for the root cpuset.
3. Hooking the write operation to iterate over enabled housekeeping types
   and invoke `housekeeping_update_notify()` (the DHM notifier chain) to
   push these configuration changes live into individual kernel subsystems.

Signed-off-by: Qiliang Yuan <realwujing@gmail.com>
---
 include/linux/sched/isolation.h | 12 ++++++++++++
 kernel/cgroup/cpuset-internal.h |  1 +
 kernel/cgroup/cpuset.c          | 36 ++++++++++++++++++++++++++++++++++++
 kernel/sched/isolation.c        | 38 ++++++++++++++++++++++++++++++++++++++
 4 files changed, 87 insertions(+)

diff --git a/include/linux/sched/isolation.h b/include/linux/sched/isolation.h
index aea1dbc4d7486..299167f627895 100644
--- a/include/linux/sched/isolation.h
+++ b/include/linux/sched/isolation.h
@@ -48,6 +48,8 @@ extern void __init housekeeping_init(void);
 
 extern int housekeeping_register_notifier(struct notifier_block *nb);
 extern int housekeeping_unregister_notifier(struct notifier_block *nb);
+extern int housekeeping_update_notify(enum hk_type type, const struct cpumask *new_mask);
+extern int housekeeping_update_all_types(const struct cpumask *new_mask);
 
 #else
 
@@ -86,6 +88,16 @@ static inline int housekeeping_unregister_notifier(struct notifier_block *nb)
 {
 	return 0;
 }
+
+static inline int housekeeping_update_notify(enum hk_type type, const struct cpumask *new_mask)
+{
+	return 0;
+}
+
+static inline int housekeeping_update_all_types(const struct cpumask *new_mask)
+{
+	return 0;
+}
 #endif /* CONFIG_CPU_ISOLATION */
 
 static inline bool housekeeping_cpu(int cpu, enum hk_type type)
diff --git a/kernel/cgroup/cpuset-internal.h b/kernel/cgroup/cpuset-internal.h
index fd7d19842ded7..3ab437f54ecdf 100644
--- a/kernel/cgroup/cpuset-internal.h
+++ b/kernel/cgroup/cpuset-internal.h
@@ -60,6 +60,7 @@ typedef enum {
 	FILE_EXCLUSIVE_CPULIST,
 	FILE_EFFECTIVE_XCPULIST,
 	FILE_ISOLATED_CPULIST,
+	FILE_HOUSEKEEPING_CPULIST,
 	FILE_CPU_EXCLUSIVE,
 	FILE_MEM_EXCLUSIVE,
 	FILE_MEM_HARDWALL,
diff --git a/kernel/cgroup/cpuset.c b/kernel/cgroup/cpuset.c
index 1335e437098e8..5df19dc9bfa89 100644
--- a/kernel/cgroup/cpuset.c
+++ b/kernel/cgroup/cpuset.c
@@ -3201,6 +3201,30 @@ static void cpuset_attach(struct cgroup_taskset *tset)
 	mutex_unlock(&cpuset_mutex);
 }
 
+/*
+ * DHM interface: root cpuset allows updating global housekeeping cpumask.
+ */
+static ssize_t cpuset_write_housekeeping_cpus(struct kernfs_open_file *of,
+					      char *buf, size_t nbytes, loff_t off)
+{
+	cpumask_var_t new_mask;
+	int retval;
+
+	if (!alloc_cpumask_var(&new_mask, GFP_KERNEL))
+		return -ENOMEM;
+
+	buf = strstrip(buf);
+	retval = cpulist_parse(buf, new_mask);
+	if (retval)
+		goto out_free;
+
+	retval = housekeeping_update_all_types(new_mask);
+
+out_free:
+	free_cpumask_var(new_mask);
+	return retval ?: nbytes;
+}
+
 /*
  * Common handling for a write to a "cpus" or "mems" file.
  */
@@ -3290,6 +3314,9 @@ int cpuset_common_seq_show(struct seq_file *sf, void *v)
 	case FILE_ISOLATED_CPULIST:
 		seq_printf(sf, "%*pbl\n", cpumask_pr_args(isolated_cpus));
 		break;
+	case FILE_HOUSEKEEPING_CPULIST:
+		seq_printf(sf, "%*pbl\n", cpumask_pr_args(housekeeping_cpumask(HK_TYPE_DOMAIN)));
+		break;
 	default:
 		ret = -EINVAL;
 	}
@@ -3428,6 +3455,15 @@ static struct cftype dfl_files[] = {
 		.flags = CFTYPE_ONLY_ON_ROOT,
 	},
 
+	{
+		.name = "housekeeping.cpus",
+		.seq_show = cpuset_common_seq_show,
+		.write = cpuset_write_housekeeping_cpus,
+		.max_write_len = (100U + 6 * NR_CPUS),
+		.private = FILE_HOUSEKEEPING_CPULIST,
+		.flags = CFTYPE_ONLY_ON_ROOT,
+	},
+
 	{ }	/* terminate */
 };
 
diff --git a/kernel/sched/isolation.c b/kernel/sched/isolation.c
index 0462b41807161..a92b0bb41de3a 100644
--- a/kernel/sched/isolation.c
+++ b/kernel/sched/isolation.c
@@ -27,6 +27,7 @@ enum hk_flags {
 #define HK_FLAG_KERNEL_NOISE (HK_FLAG_TICK | HK_FLAG_TIMER | HK_FLAG_RCU | \
 			      HK_FLAG_MISC | HK_FLAG_WQ | HK_FLAG_KTHREAD)
 
+static DEFINE_MUTEX(housekeeping_mutex);
 static BLOCKING_NOTIFIER_HEAD(housekeeping_notifier_list);
 
 DEFINE_STATIC_KEY_FALSE(housekeeping_overridden);
@@ -196,6 +197,43 @@ int housekeeping_update_notify(enum hk_type type, const struct cpumask *new_mask
 }
 EXPORT_SYMBOL_GPL(housekeeping_update_notify);
 
+int housekeeping_update_all_types(const struct cpumask *new_mask)
+{
+	enum hk_type type;
+	struct cpumask *old_masks[HK_TYPE_MAX] = { NULL };
+
+	if (cpumask_empty(new_mask) || !cpumask_intersects(new_mask, cpu_online_mask))
+		return -EINVAL;
+
+	if (!housekeeping.flags)
+		static_branch_enable(&housekeeping_overridden);
+
+	mutex_lock(&housekeeping_mutex);
+	for_each_set_bit(type, &housekeeping.flags, HK_TYPE_MAX) {
+		struct cpumask *nmask = kmalloc(cpumask_size(), GFP_KERNEL);
+
+		if (!nmask) {
+			mutex_unlock(&housekeeping_mutex);
+			return -ENOMEM;
+		}
+
+		cpumask_copy(nmask, new_mask);
+		old_masks[type] = housekeeping_cpumask_dereference(type);
+		rcu_assign_pointer(housekeeping.cpumasks[type], nmask);
+	}
+	mutex_unlock(&housekeeping_mutex);
+
+	synchronize_rcu();
+
+	for_each_set_bit(type, &housekeeping.flags, HK_TYPE_MAX) {
+		housekeeping_update_notify(type, new_mask);
+		kfree(old_masks[type]);
+	}
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(housekeeping_update_all_types);
+
 void __init housekeeping_init(void)
 {
 	enum hk_type type;

-- 
2.43.0


^ permalink raw reply related


This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox