Linux Trace Kernel

Linux Trace Kernel
 help / color / mirror / Atom feed

* [PATCH v3 10/11] HID: spi-hid: add power management implementation
From: Jingyuan Liang @ 2026-04-02  1:59 UTC (permalink / raw)
  To: Jiri Kosina, Benjamin Tissoires, Jonathan Corbet, Mark Brown,
	Steven Rostedt, Masami Hiramatsu, Mathieu Desnoyers,
	Dmitry Torokhov, Rob Herring, Krzysztof Kozlowski, Conor Dooley
  Cc: linux-input, linux-doc, linux-kernel, linux-spi,
	linux-trace-kernel, devicetree, hbarnor, tfiga, Jingyuan Liang
In-Reply-To: <20260402-send-upstream-v3-0-6091c458d357@chromium.org>

Implement HID over SPI driver power management callbacks.

Signed-off-by: Jingyuan Liang <jingyliang@chromium.org>
---
 drivers/hid/spi-hid/spi-hid-acpi.c |   1 +
 drivers/hid/spi-hid/spi-hid-core.c | 107 +++++++++++++++++++++++++++++++++++++
 drivers/hid/spi-hid/spi-hid-of.c   |   1 +
 drivers/hid/spi-hid/spi-hid.h      |   1 +
 4 files changed, 110 insertions(+)

diff --git a/drivers/hid/spi-hid/spi-hid-acpi.c b/drivers/hid/spi-hid/spi-hid-acpi.c
index 298e3ba44d8a..15cfc4e6cc2f 100644
--- a/drivers/hid/spi-hid/spi-hid-acpi.c
+++ b/drivers/hid/spi-hid/spi-hid-acpi.c
@@ -238,6 +238,7 @@ static struct spi_driver spi_hid_acpi_driver = {
 	.driver = {
 		.name	= "spi_hid_acpi",
 		.owner	= THIS_MODULE,
+		.pm	= &spi_hid_core_pm,
 		.acpi_match_table = spi_hid_acpi_match,
 		.probe_type = PROBE_PREFER_ASYNCHRONOUS,
 		.dev_groups = spi_hid_groups,
diff --git a/drivers/hid/spi-hid/spi-hid-core.c b/drivers/hid/spi-hid/spi-hid-core.c
index d48175c764b9..5f7a5bb692d9 100644
--- a/drivers/hid/spi-hid/spi-hid-core.c
+++ b/drivers/hid/spi-hid/spi-hid-core.c
@@ -35,6 +35,8 @@
 #include <linux/list.h>
 #include <linux/module.h>
 #include <linux/mutex.h>
+#include <linux/pm.h>
+#include <linux/pm_wakeirq.h>
 #include <linux/slab.h>
 #include <linux/spi/spi.h>
 #include <linux/string.h>
@@ -244,6 +246,81 @@ static const char *spi_hid_power_mode_string(enum hidspi_power_state power_state
 	}
 }
 
+static void spi_hid_suspend(struct spi_hid *shid)
+{
+	int error;
+	struct device *dev = &shid->spi->dev;
+
+	guard(mutex)(&shid->power_lock);
+	if (shid->power_state == HIDSPI_OFF)
+		return;
+
+	if (shid->hid) {
+		error = hid_driver_suspend(shid->hid, PMSG_SUSPEND);
+		if (error) {
+			dev_err(dev, "%s failed to suspend hid driver: %d",
+				__func__, error);
+			return;
+		}
+	}
+
+	disable_irq(shid->spi->irq);
+
+	clear_bit(SPI_HID_READY, &shid->flags);
+
+	if (!device_may_wakeup(dev)) {
+		set_bit(SPI_HID_RESET_PENDING, &shid->flags);
+
+		shid->ops->assert_reset(shid->ops);
+
+		error = shid->ops->power_down(shid->ops);
+		if (error) {
+			dev_err(dev, "%s: could not power down.", __func__);
+			shid->regulator_error_count++;
+			shid->regulator_last_error = error;
+			return;
+		}
+
+		shid->power_state = HIDSPI_OFF;
+	}
+}
+
+static void spi_hid_resume(struct spi_hid *shid)
+{
+	int error;
+	struct device *dev = &shid->spi->dev;
+
+	guard(mutex)(&shid->power_lock);
+	if (shid->power_state == HIDSPI_ON)
+		return;
+
+	enable_irq(shid->spi->irq);
+
+	if (!device_may_wakeup(dev)) {
+		shid->ops->assert_reset(shid->ops);
+
+		shid->ops->sleep_minimal_reset_delay(shid->ops);
+
+		error = shid->ops->power_up(shid->ops);
+		if (error) {
+			dev_err(dev, "%s: could not power up.", __func__);
+			shid->regulator_error_count++;
+			shid->regulator_last_error = error;
+			return;
+		}
+		shid->power_state = HIDSPI_ON;
+
+		shid->ops->deassert_reset(shid->ops);
+	}
+
+	if (shid->hid) {
+		error = hid_driver_reset_resume(shid->hid);
+		if (error)
+			dev_err(dev, "%s: failed to reset resume hid driver: %d.",
+				__func__, error);
+	}
+}
+
 static void spi_hid_stop_hid(struct spi_hid *shid)
 {
 	struct hid_device *hid = shid->hid;
@@ -1200,6 +1277,13 @@ int spi_hid_core_probe(struct spi_device *spi, struct spihid_ops *ops,
 		dev_err(dev, "%s: unable to request threaded IRQ.", __func__);
 		return error;
 	}
+	if (device_may_wakeup(dev)) {
+		error = dev_pm_set_wake_irq(dev, spi->irq);
+		if (error) {
+			dev_err(dev, "%s: failed to set wake IRQ.", __func__);
+			return error;
+		}
+	}
 
 	error = shid->ops->power_up(shid->ops);
 	if (error) {
@@ -1231,6 +1315,29 @@ void spi_hid_core_remove(struct spi_device *spi)
 }
 EXPORT_SYMBOL_GPL(spi_hid_core_remove);
 
+static int spi_hid_core_pm_suspend(struct device *dev)
+{
+	struct spi_hid *shid = dev_get_drvdata(dev);
+
+	spi_hid_suspend(shid);
+
+	return 0;
+}
+
+static int spi_hid_core_pm_resume(struct device *dev)
+{
+	struct spi_hid *shid = dev_get_drvdata(dev);
+
+	spi_hid_resume(shid);
+
+	return 0;
+}
+
+const struct dev_pm_ops spi_hid_core_pm = {
+	SYSTEM_SLEEP_PM_OPS(spi_hid_core_pm_suspend, spi_hid_core_pm_resume)
+};
+EXPORT_SYMBOL_GPL(spi_hid_core_pm);
+
 MODULE_DESCRIPTION("HID over SPI transport driver");
 MODULE_AUTHOR("Dmitry Antipov <dmanti@microsoft.com>");
 MODULE_LICENSE("GPL");
diff --git a/drivers/hid/spi-hid/spi-hid-of.c b/drivers/hid/spi-hid/spi-hid-of.c
index 651456b6906d..80c481b77149 100644
--- a/drivers/hid/spi-hid/spi-hid-of.c
+++ b/drivers/hid/spi-hid/spi-hid-of.c
@@ -227,6 +227,7 @@ static struct spi_driver spi_hid_of_driver = {
 	.driver = {
 		.name	= "spi_hid_of",
 		.owner	= THIS_MODULE,
+		.pm	= &spi_hid_core_pm,
 		.of_match_table = spi_hid_of_match,
 		.probe_type = PROBE_PREFER_ASYNCHRONOUS,
 		.dev_groups = spi_hid_groups,
diff --git a/drivers/hid/spi-hid/spi-hid.h b/drivers/hid/spi-hid/spi-hid.h
index f5a5f4d54beb..17b2fdf192ed 100644
--- a/drivers/hid/spi-hid/spi-hid.h
+++ b/drivers/hid/spi-hid/spi-hid.h
@@ -41,5 +41,6 @@ int spi_hid_core_probe(struct spi_device *spi, struct spihid_ops *ops,
 void spi_hid_core_remove(struct spi_device *spi);
 
 extern const struct attribute_group *spi_hid_groups[];
+extern const struct dev_pm_ops spi_hid_core_pm;
 
 #endif /* SPI_HID_H */

-- 
2.53.0.1185.g05d4b7b318-goog


^ permalink raw reply related

* [PATCH v3 11/11] HID: spi-hid: add panel follower support
From: Jingyuan Liang @ 2026-04-02  1:59 UTC (permalink / raw)
  To: Jiri Kosina, Benjamin Tissoires, Jonathan Corbet, Mark Brown,
	Steven Rostedt, Masami Hiramatsu, Mathieu Desnoyers,
	Dmitry Torokhov, Rob Herring, Krzysztof Kozlowski, Conor Dooley
  Cc: linux-input, linux-doc, linux-kernel, linux-spi,
	linux-trace-kernel, devicetree, hbarnor, tfiga, Jingyuan Liang
In-Reply-To: <20260402-send-upstream-v3-0-6091c458d357@chromium.org>

Add support to spi-hid to be a panel follower.

Signed-off-by: Jingyuan Liang <jingyliang@chromium.org>
---
 drivers/hid/spi-hid/spi-hid-core.c | 199 +++++++++++++++++++++++++++++--------
 drivers/hid/spi-hid/spi-hid-core.h |   7 ++
 2 files changed, 163 insertions(+), 43 deletions(-)

diff --git a/drivers/hid/spi-hid/spi-hid-core.c b/drivers/hid/spi-hid/spi-hid-core.c
index 5f7a5bb692d9..9eedd4f1cba7 100644
--- a/drivers/hid/spi-hid/spi-hid-core.c
+++ b/drivers/hid/spi-hid/spi-hid-core.c
@@ -246,21 +246,21 @@ static const char *spi_hid_power_mode_string(enum hidspi_power_state power_state
 	}
 }
 
-static void spi_hid_suspend(struct spi_hid *shid)
+static int spi_hid_suspend(struct spi_hid *shid)
 {
 	int error;
 	struct device *dev = &shid->spi->dev;
 
 	guard(mutex)(&shid->power_lock);
 	if (shid->power_state == HIDSPI_OFF)
-		return;
+		return 0;
 
 	if (shid->hid) {
 		error = hid_driver_suspend(shid->hid, PMSG_SUSPEND);
 		if (error) {
 			dev_err(dev, "%s failed to suspend hid driver: %d",
 				__func__, error);
-			return;
+			return error;
 		}
 	}
 
@@ -278,21 +278,22 @@ static void spi_hid_suspend(struct spi_hid *shid)
 			dev_err(dev, "%s: could not power down.", __func__);
 			shid->regulator_error_count++;
 			shid->regulator_last_error = error;
-			return;
+			return error;
 		}
 
 		shid->power_state = HIDSPI_OFF;
 	}
+	return 0;
 }
 
-static void spi_hid_resume(struct spi_hid *shid)
+static int spi_hid_resume(struct spi_hid *shid)
 {
 	int error;
 	struct device *dev = &shid->spi->dev;
 
 	guard(mutex)(&shid->power_lock);
 	if (shid->power_state == HIDSPI_ON)
-		return;
+		return 0;
 
 	enable_irq(shid->spi->irq);
 
@@ -306,7 +307,7 @@ static void spi_hid_resume(struct spi_hid *shid)
 			dev_err(dev, "%s: could not power up.", __func__);
 			shid->regulator_error_count++;
 			shid->regulator_last_error = error;
-			return;
+			return error;
 		}
 		shid->power_state = HIDSPI_ON;
 
@@ -315,10 +316,13 @@ static void spi_hid_resume(struct spi_hid *shid)
 
 	if (shid->hid) {
 		error = hid_driver_reset_resume(shid->hid);
-		if (error)
+		if (error) {
 			dev_err(dev, "%s: failed to reset resume hid driver: %d.",
 				__func__, error);
+			return error;
+		}
 	}
+	return 0;
 }
 
 static void spi_hid_stop_hid(struct spi_hid *shid)
@@ -1215,6 +1219,132 @@ const struct attribute_group *spi_hid_groups[] = {
 };
 EXPORT_SYMBOL_GPL(spi_hid_groups);
 
+/*
+ * At the end of probe we initialize the device:
+ *   0) assert reset, bias the interrupt line
+ *   1) sleep minimal reset delay
+ *   2) request IRQ
+ *   3) power up the device
+ *   4) deassert reset (high)
+ * After this we expect an IRQ with a reset response.
+ */
+static int spi_hid_dev_init(struct spi_hid *shid)
+{
+	struct spi_device *spi = shid->spi;
+	struct device *dev = &spi->dev;
+	int error;
+
+	shid->ops->assert_reset(shid->ops);
+
+	shid->ops->sleep_minimal_reset_delay(shid->ops);
+
+	error = devm_request_threaded_irq(dev, spi->irq, NULL, spi_hid_dev_irq,
+					  IRQF_ONESHOT, dev_name(&spi->dev), shid);
+	if (error) {
+		dev_err(dev, "%s: unable to request threaded IRQ.", __func__);
+		return error;
+	}
+	if (device_may_wakeup(dev)) {
+		error = dev_pm_set_wake_irq(dev, spi->irq);
+		if (error) {
+			dev_err(dev, "%s: failed to set wake IRQ.", __func__);
+			return error;
+		}
+	}
+
+	error = shid->ops->power_up(shid->ops);
+	if (error) {
+		dev_err(dev, "%s: could not power up.", __func__);
+		shid->regulator_error_count++;
+		shid->regulator_last_error = error;
+		return error;
+	}
+
+	shid->ops->deassert_reset(shid->ops);
+
+	return 0;
+}
+
+static void spi_hid_panel_follower_work(struct work_struct *work)
+{
+	struct spi_hid *shid = container_of(work, struct spi_hid,
+					    panel_follower_work);
+	int error;
+
+	if (!shid->desc.hid_version)
+		error = spi_hid_dev_init(shid);
+	else
+		error = spi_hid_resume(shid);
+	if (error)
+		dev_warn(&shid->spi->dev, "Power on failed: %d", error);
+	else
+		WRITE_ONCE(shid->panel_follower_work_finished, true);
+
+	/*
+	 * The work APIs provide a number of memory ordering guarantees
+	 * including one that says that memory writes before schedule_work()
+	 * are always visible to the work function, but they don't appear to
+	 * guarantee that a write that happened in the work is visible after
+	 * cancel_work_sync(). We'll add a write memory barrier here to match
+	 * with spi_hid_panel_unpreparing() to ensure that our write to
+	 * panel_follower_work_finished is visible there.
+	 */
+	smp_wmb();
+}
+
+static int spi_hid_panel_follower_resume(struct drm_panel_follower *follower)
+{
+	struct spi_hid *shid = container_of(follower, struct spi_hid, panel_follower);
+
+	/*
+	 * Powering on a touchscreen can be a slow process. Queue the work to
+	 * the system workqueue so we don't block the panel's power up.
+	 */
+	WRITE_ONCE(shid->panel_follower_work_finished, false);
+	schedule_work(&shid->panel_follower_work);
+
+	return 0;
+}
+
+static int spi_hid_panel_follower_suspend(struct drm_panel_follower *follower)
+{
+	struct spi_hid *shid = container_of(follower, struct spi_hid, panel_follower);
+
+	cancel_work_sync(&shid->panel_follower_work);
+
+	/* Match with shid_core_panel_follower_work() */
+	smp_rmb();
+	if (!READ_ONCE(shid->panel_follower_work_finished))
+		return 0;
+
+	return spi_hid_suspend(shid);
+}
+
+static const struct drm_panel_follower_funcs
+				spi_hid_panel_follower_prepare_funcs = {
+	.panel_prepared = spi_hid_panel_follower_resume,
+	.panel_unpreparing = spi_hid_panel_follower_suspend,
+};
+
+static int spi_hid_register_panel_follower(struct spi_hid *shid)
+{
+	struct device *dev = &shid->spi->dev;
+
+	shid->panel_follower.funcs = &spi_hid_panel_follower_prepare_funcs;
+
+	/*
+	 * If we're not in control of our own power up/power down then we can't
+	 * do the logic to manage wakeups. Give a warning if a user thought
+	 * that was possible then force the capability off.
+	 */
+	if (device_can_wakeup(dev)) {
+		dev_warn(dev, "Can't wakeup if following panel\n");
+		device_set_wakeup_capable(dev, false);
+	}
+
+	return drm_panel_add_follower(dev, &shid->panel_follower);
+}
+
 int spi_hid_core_probe(struct spi_device *spi, struct spihid_ops *ops,
 		       struct spi_hid_conf *conf)
 {
@@ -1234,6 +1364,7 @@ int spi_hid_core_probe(struct spi_device *spi, struct spihid_ops *ops,
 	shid->ops = ops;
 	shid->conf = conf;
 	set_bit(SPI_HID_RESET_PENDING, &shid->flags);
+	shid->is_panel_follower = drm_is_panel_follower(&spi->dev);
 
 	spi_set_drvdata(spi, shid);
 
@@ -1247,6 +1378,7 @@ int spi_hid_core_probe(struct spi_device *spi, struct spihid_ops *ops,
 	init_completion(&shid->output_done);
 
 	INIT_WORK(&shid->reset_work, spi_hid_reset_work);
+	INIT_WORK(&shid->panel_follower_work, spi_hid_panel_follower_work);
 
 	/*
 	 * We need to allocate the buffer without knowing the maximum
@@ -1257,42 +1389,18 @@ int spi_hid_core_probe(struct spi_device *spi, struct spihid_ops *ops,
 	if (error)
 		return error;
 
-	/*
-	 * At the end of probe we initialize the device:
-	 *   0) assert reset, bias the interrupt line
-	 *   1) sleep minimal reset delay
-	 *   2) request IRQ
-	 *   3) power up the device
-	 *   4) deassert reset (high)
-	 * After this we expect an IRQ with a reset response.
-	 */
-
-	shid->ops->assert_reset(shid->ops);
-
-	shid->ops->sleep_minimal_reset_delay(shid->ops);
-
-	error = devm_request_threaded_irq(dev, spi->irq, NULL, spi_hid_dev_irq,
-					  IRQF_ONESHOT, dev_name(&spi->dev), shid);
-	if (error) {
-		dev_err(dev, "%s: unable to request threaded IRQ.", __func__);
-		return error;
-	}
-	if (device_may_wakeup(dev)) {
-		error = dev_pm_set_wake_irq(dev, spi->irq);
+	if (shid->is_panel_follower) {
+		error = spi_hid_register_panel_follower(shid);
 		if (error) {
-			dev_err(dev, "%s: failed to set wake IRQ.", __func__);
+			dev_err(dev, "%s: could not add panel follower.", __func__);
 			return error;
 		}
+	} else {
+		error = spi_hid_dev_init(shid);
+		if (error)
+			return error;
 	}
 
-	error = shid->ops->power_up(shid->ops);
-	if (error) {
-		dev_err(dev, "%s: could not power up.", __func__);
-		return error;
-	}
-
-	shid->ops->deassert_reset(shid->ops);
-
 	dev_dbg(dev, "%s: d3 -> %s.", __func__,
 		spi_hid_power_mode_string(shid->power_state));
 
@@ -1306,6 +1414,9 @@ void spi_hid_core_remove(struct spi_device *spi)
 	struct device *dev = &spi->dev;
 	int error;
 
+	if (shid->is_panel_follower)
+		drm_panel_remove_follower(&shid->panel_follower);
+
 	spi_hid_stop_hid(shid);
 
 	shid->ops->assert_reset(shid->ops);
@@ -1319,18 +1430,20 @@ static int spi_hid_core_pm_suspend(struct device *dev)
 {
 	struct spi_hid *shid = dev_get_drvdata(dev);
 
-	spi_hid_suspend(shid);
+	if (shid->is_panel_follower)
+		return 0;
 
-	return 0;
+	return spi_hid_suspend(shid);
 }
 
 static int spi_hid_core_pm_resume(struct device *dev)
 {
 	struct spi_hid *shid = dev_get_drvdata(dev);
 
-	spi_hid_resume(shid);
+	if (shid->is_panel_follower)
+		return 0;
 
-	return 0;
+	return spi_hid_resume(shid);
 }
 
 const struct dev_pm_ops spi_hid_core_pm = {
diff --git a/drivers/hid/spi-hid/spi-hid-core.h b/drivers/hid/spi-hid/spi-hid-core.h
index 293e2cfcfbf7..261b2fd7f332 100644
--- a/drivers/hid/spi-hid/spi-hid-core.h
+++ b/drivers/hid/spi-hid/spi-hid-core.h
@@ -10,6 +10,8 @@
 #include <linux/hid-over-spi.h>
 #include <linux/spi/spi.h>
 
+#include <drm/drm_panel.h>
+
 /* Protocol message size constants */
 #define SPI_HID_READ_APPROVAL_LEN		5
 #define SPI_HID_OUTPUT_HEADER_LEN		8
@@ -56,6 +58,10 @@ struct spi_hid {
 	struct spi_hid_input_buf *input;	/* Input buffer. */
 	struct spi_hid_input_buf *response;	/* Response buffer. */
 
+	struct drm_panel_follower panel_follower;
+	bool	is_panel_follower;
+	bool	panel_follower_work_finished;
+
 	u16 response_length;
 	u16 bufsize;
 
@@ -66,6 +72,7 @@ struct spi_hid {
 	unsigned long flags;	/* device flags. */
 
 	struct work_struct reset_work;
+	struct work_struct panel_follower_work;
 
 	/* Control lock to ensure complete output transaction. */
 	struct mutex output_lock;

-- 
2.53.0.1185.g05d4b7b318-goog


^ permalink raw reply related

* Re: [PATCH] bootconfig: Skip printing early params to cmdline from bootconfig
From: Masami Hiramatsu @ 2026-04-02  3:52 UTC (permalink / raw)
  To: Breno Leitao; +Cc: Steven Rostedt, linux-kernel, linux-trace-kernel
In-Reply-To: <ac0-rrQk8BoS9B5k@gmail.com>

On Wed, 1 Apr 2026 08:51:48 -0700
Breno Leitao <leitao@debian.org> wrote:

> On Wed, Apr 01, 2026 at 11:02:55PM +0900, Masami Hiramatsu (Google) wrote:
> > From: Masami Hiramatsu (Google) <mhiramat@kernel.org>
> > 
> > If user configures `kernel.key` in bootconfig, the 'key' is shown
> > in kernel cmdline (/proc/cmdline) and kernel boot parameter
> > handler associated with 'key' is invoked. However, since the
> > bootconfig does not support the parameter defined with early_param,
> > those keys are shown in '/proc/cmdline' but not handled by kernel.
> > 
> > This could easily mislead users who expected to be able to specify
> > early parameters via the boot configuration, leading them to wonder
> > why it doesn't work.
> > 
> > Let's skip printing out early params to cmdline buffer, and warn
> > if there is such parameters in bootconfig.
> > 
> > Signed-off-by: Masami Hiramatsu (Google) <mhiramat@kernel.org>
> 
> Reviewed-by: Breno Leitao <leitao@debian.org>

Thanks, but sashiko found a problem.

https://sashiko.dev/#/patchset/177505217508.1807250.22866077077504564.stgit%40mhiramat.tok.corp.google.com

| Will this inadvertently filter out parameters that have both early and
| normal handlers?
| For example, "console" is defined as both an early parameter and a normal
| parameter. If a user configures kernel.console in bootconfig, this loop
| will find the early_param entry and return true.

Let me update it. I need to check (is_early_param() && !is_normal_param()).

Thanks!

> 
> > +static bool __init is_early_param(const char *param)
> > +{
> > +	const struct obs_kernel_param *p;
> > +
> > +	for (p = __setup_start; p < __setup_end; p++) {
> > +		if (p->early && parameq(param, p->str))
> > +			return true;
> > +	}
> 
> nit: I don't think you need the parenthesis ({) for the ifs in here.
> 


-- 
Masami Hiramatsu (Google) <mhiramat@kernel.org>

^ permalink raw reply

* Re: [PATCH bpf v3 1/2] bpf: Reject sleepable kprobe_multi programs at attach time
From: Leon Hwang @ 2026-04-02  4:13 UTC (permalink / raw)
  To: Varun R Mallya, bpf
  Cc: ast, daniel, memxor, yonghong.song, jolsa, rostedt, mhiramat,
	linux-kernel, linux-trace-kernel
In-Reply-To: <20260401191126.440683-1-varunrmallya@gmail.com>

On 2/4/26 03:11, Varun R Mallya wrote:
> kprobe.multi programs run in atomic/RCU context and cannot sleep.
> However, bpf_kprobe_multi_link_attach() did not validate whether the
> program being attached had the sleepable flag set, allowing sleepable
> helpers such as bpf_copy_from_user() to be invoked from a non-sleepable
> context.
> 
> This causes a "sleeping function called from invalid context" splat:
> 
>   BUG: sleeping function called from invalid context at ./include/linux/uaccess.h:169
>   in_atomic(): 1, irqs_disabled(): 0, non_block: 0, pid: 1787, name: sudo
>   preempt_count: 1, expected: 0
>   RCU nest depth: 2, expected: 0
> 
> Fix this by rejecting sleepable programs early in
> bpf_kprobe_multi_link_attach(), before any further processing.
> 
> Fixes: 0dcac2725406 ("bpf: Add multi kprobe link")
> Signed-off-by: Varun R Mallya <varunrmallya@gmail.com>

Acked-by: Leon Hwang <leon.hwang@linux.dev>

The cover letter is missing, and the change logs are missing, too.

[...]


^ permalink raw reply

* Re: [PATCH bpf v3 2/2] selftests/bpf: Add test to ensure kprobe_multi is not sleepable
From: Leon Hwang @ 2026-04-02  4:13 UTC (permalink / raw)
  To: Varun R Mallya, bpf
  Cc: ast, daniel, memxor, yonghong.song, jolsa, rostedt, mhiramat,
	linux-kernel, linux-trace-kernel
In-Reply-To: <20260401191126.440683-2-varunrmallya@gmail.com>

Above all, I think the test should reproduce the BUG without the fix.

I update the test to reproduce the BUG, then verify that the BUG will be
rejected with the fix.

The updated test is attached at last.

On 2/4/26 03:11, Varun R Mallya wrote:
> Add a selftest to ensure that kprobe_multi programs cannot be attached
> using the BPF_F_SLEEPABLE flag. This test succeeds when the kernel
> rejects attachment of kprobe_multi when the BPF_F_SLEEPABLE flag is set.
> 
> Signed-off-by: Varun R Mallya <varunrmallya@gmail.com>
> ---
>  .../bpf/prog_tests/kprobe_multi_test.c        | 41 +++++++++++++++++++
>  .../bpf/progs/kprobe_multi_sleepable.c        | 13 ++++++
>  2 files changed, 54 insertions(+)
>  create mode 100644 tools/testing/selftests/bpf/progs/kprobe_multi_sleepable.c
> 
> diff --git a/tools/testing/selftests/bpf/prog_tests/kprobe_multi_test.c b/tools/testing/selftests/bpf/prog_tests/kprobe_multi_test.c
> index 78c974d4ea33..f02fec2b6fda 100644
> --- a/tools/testing/selftests/bpf/prog_tests/kprobe_multi_test.c
> +++ b/tools/testing/selftests/bpf/prog_tests/kprobe_multi_test.c
> @@ -10,6 +10,7 @@
>  #include "kprobe_multi_session_cookie.skel.h"
>  #include "kprobe_multi_verifier.skel.h"
>  #include "kprobe_write_ctx.skel.h"
> +#include "kprobe_multi_sleepable.skel.h"
>  #include "bpf/libbpf_internal.h"
>  #include "bpf/hashmap.h"
>  
> @@ -633,6 +634,44 @@ static void test_attach_write_ctx(void)
>  }
>  #endif
>  
> +static void test_attach_multi_sleepable(void)
> +{
> +	struct kprobe_multi_sleepable *skel;
> +	int err;
> +
> +	skel = kprobe_multi_sleepable__open();
> +	if (!ASSERT_OK_PTR(skel, "kprobe_multi_sleepable__open"))
> +		return;
> +
> +	err = bpf_program__set_flags(skel->progs.handle_kprobe_multi_sleepable,
> +				     BPF_F_SLEEPABLE);
> +	if (!ASSERT_OK(err, "bpf_program__set_flags"))
> +		goto cleanup;
> +
> +	/* Load should succeed even with BPF_F_SLEEPABLE for KPROBE types */
> +	err = kprobe_multi_sleepable__load(skel);
> +	if (!ASSERT_OK(err, "kprobe_multi_sleepable__load"))
> +		goto cleanup;
> +
> +	/* Attachment must fail for kprobe.multi + BPF_F_SLEEPABLE.
> +	 * Also chosen a stable symbol to send into opts
> +	 */
> +	LIBBPF_OPTS(bpf_kprobe_multi_opts, opts);
> +	const char *sym = "vfs_read";

They should stay with skel and err. See below.

> +
> +	opts.syms = &sym;
> +	opts.cnt = 1;
> +
> +	skel->links.handle_kprobe_multi_sleepable =
> +		bpf_program__attach_kprobe_multi_opts(skel->progs.handle_kprobe_multi_sleepable,
> +						      NULL, &opts);
> +	ASSERT_ERR_PTR(skel->links.handle_kprobe_multi_sleepable,
> +		       "bpf_program__attach_kprobe_multi_opts");

As Kumar suggested, better to also verify the error here.

	ASSERT_EQ(libbpf_get_error(skel->links.handle_kprobe_multi_sleepable),
-EINVAL,
		  "bpf_program__attach_kprobe_multi_opts error");

> +
> +cleanup:
> +	kprobe_multi_sleepable__destroy(skel);
> +}
> +

[...]

Thanks,
Leon

---

diff --git a/tools/testing/selftests/bpf/prog_tests/kprobe_multi_test.c
b/tools/testing/selftests/bpf/prog_tests/kprobe_multi_test.c
index 78c974d4ea33..d59cf840da83 100644
--- a/tools/testing/selftests/bpf/prog_tests/kprobe_multi_test.c
+++ b/tools/testing/selftests/bpf/prog_tests/kprobe_multi_test.c
@@ -10,6 +10,7 @@
 #include "kprobe_multi_session_cookie.skel.h"
 #include "kprobe_multi_verifier.skel.h"
 #include "kprobe_write_ctx.skel.h"
+#include "kprobe_multi_sleepable.skel.h"
 #include "bpf/libbpf_internal.h"
 #include "bpf/hashmap.h"

@@ -633,6 +634,52 @@ static void test_attach_write_ctx(void)
 }
 #endif

+static void test_attach_multi_sleepable(void)
+{
+       struct kprobe_multi_sleepable *skel;
+       const char *sym = "bpf_fentry_test1";
+       int err;
+       LIBBPF_OPTS(bpf_test_run_opts, topts);
+       LIBBPF_OPTS(bpf_kprobe_multi_opts, opts,
+                   .syms = &sym,
+                   .cnt = 1
+       );
+
+       skel = kprobe_multi_sleepable__open();
+       if (!ASSERT_OK_PTR(skel, "kprobe_multi_sleepable__open"))
+               return;
+
+       skel->bss->user_ptr = skel;
+
+       err =
bpf_program__set_flags(skel->progs.handle_kprobe_multi_sleepable,
+                                    BPF_F_SLEEPABLE);
+       if (!ASSERT_OK(err, "bpf_program__set_flags"))
+               goto cleanup;
+
+       /* Load should succeed even with BPF_F_SLEEPABLE for KPROBE types */
+       err = kprobe_multi_sleepable__load(skel);
+       if (!ASSERT_OK(err, "kprobe_multi_sleepable__load"))
+               goto cleanup;
+
+       /*
+        * Attachment must fail for kprobe.multi + BPF_F_SLEEPABLE.
+        * Also chosen a stable symbol to send into opts
+        */
+       skel->links.handle_kprobe_multi_sleepable =
+
bpf_program__attach_kprobe_multi_opts(skel->progs.handle_kprobe_multi_sleepable,
+                                                     NULL, &opts);
+       ASSERT_ERR_PTR(skel->links.handle_kprobe_multi_sleepable,
+                      "bpf_program__attach_kprobe_multi_opts");
+
ASSERT_EQ(libbpf_get_error(skel->links.handle_kprobe_multi_sleepable),
-EINVAL,
+                 "attach_multi_sleepable_err");
+
+       err =
bpf_prog_test_run_opts(bpf_program__fd(skel->progs.fentry), &topts);
+       ASSERT_OK(err, "bpf_prog_test_run_opts");
+
+cleanup:
+       kprobe_multi_sleepable__destroy(skel);
+}
+
 void serial_test_kprobe_multi_bench_attach(void)
 {
        if (test__start_subtest("kernel"))
@@ -676,5 +723,7 @@ void test_kprobe_multi_test(void)
                test_unique_match();
        if (test__start_subtest("attach_write_ctx"))
                test_attach_write_ctx();
+       if (test__start_subtest("attach_multi_sleepable"))
+               test_attach_multi_sleepable();
        RUN_TESTS(kprobe_multi_verifier);
 }
diff --git a/tools/testing/selftests/bpf/progs/kprobe_multi_sleepable.c
b/tools/testing/selftests/bpf/progs/kprobe_multi_sleepable.c
new file mode 100644
index 000000000000..932e1d9c72e2
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/kprobe_multi_sleepable.c
@@ -0,0 +1,25 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+void *user_ptr = 0;
+
+SEC("kprobe.multi")
+int handle_kprobe_multi_sleepable(struct pt_regs *ctx)
+{
+       int a, err;
+
+       err = bpf_copy_from_user(&a, sizeof(a), user_ptr);
+       barrier_var(a);
+       return err;
+}
+
+SEC("fentry/bpf_fentry_test1")
+int BPF_PROG(fentry)
+{
+       return 0;
+}
+
+char _license[] SEC("license") = "GPL";


^ permalink raw reply related

* Re: [PATCH bpf-next v3 0/6] bpf trampoline support "jmp" mode
From: Leon Hwang @ 2026-04-02  6:12 UTC (permalink / raw)
  To: menglong8.dong
  Cc: andrii, ast, bpf, daniel, eddyz87, haoluo, jiang.biao,
	john.fastabend, jolsa, kpsingh, linux-kernel, linux-trace-kernel,
	mark.rutland, martin.lau, mathieu.desnoyers, mhiramat, rostedt,
	sdf, song, yonghong.song, Leon Hwang
In-Reply-To: <20251118123639.688444-1-dongml2@chinatelecom.cn>

On Tue, Nov 18, 2025 at 08:36:28PM +0800, Menglong Dong wrote:
>For now, the bpf trampoline is called by the "call" instruction. However,
>it break the RSB and introduce extra overhead in x86_64 arch.
>
>For example, we hook the function "foo" with fexit, the call and return
>logic will be like this:
>  call foo -> call trampoline -> call foo-body ->
>  return foo-body -> return foo
>
>As we can see above, there are 3 call, but 2 return, which break the RSB
>balance. We can pseudo a "return" here, but it's not the best choice,
>as it will still cause once RSB miss:
>  call foo -> call trampoline -> call foo-body ->
>  return foo-body -> return dummy -> return foo
>
>The "return dummy" doesn't pair the "call trampoline", which can also
>cause the RSB miss.
>
>Therefore, we introduce the "jmp" mode for bpf trampoline, as advised by
>Alexei in [1]. And the logic will become this:
>  call foo -> jmp trampoline -> call foo-body ->
>  return foo-body -> return foo
>
>As we can see above, the RSB is totally balanced after this series.
>

Hi, this is a late footnote for this optimization.

As this optimization landed in the 6.19 kernel, the function graph feature
of bpfsnoop [1] cannot work because of the missing tracee's FP/IP for
fexit.

Before this optimization,

caller
-> call icmp_rcv                caller IP/FP
   -> call trampoline           icmp_rcv IP/FP
      -> call icmp_rcv body     trampoline IP/FP
      <- return to trampoline
   <- return to caller

After this optimization,

caller
-> call icmp_rcv                caller IP/FP
   -> jump to trampoline
      -> call icmp_rcv body     trampoline IP/FP
      <- return to trampoline
   <- return to caller

As a result, the function call stack entry for icmp_rcv has gone.

It can be confirmed by bpf_get_stack*() helpers.

$ sudo bpfsnoop -k icmp_rcv --output-stack -v

In 6.14,

0xffff8000802bda44:bpfsnoop_fn+0x6a4
0xffff8000802bda44:bpfsnoop_fn+0x6a4
0xffff8000802bd064:bpf_trampoline_6442573163+0xa4
0xffffc7825c984df0:icmp_rcv+0x8
0xffffc7825c91bcb8:ip_protocol_deliver_rcu+0x48
0xffffc7825c91bfd4:ip_local_deliver_finish+0x8c
0xffffc7825c91c0d0:ip_local_deliver+0x88

In 6.19,

0xffffffffc0209069:bpfsnoop_fn+0x449
0xffffffffc01ef2a4:bpf_trampoline_6442568724+0x64
0xffffffffb1085cda:ip_protocol_deliver_rcu+0x1ea
0xffffffffb1085d96:ip_local_deliver_finish+0x86
0xffffffffb1085e95:ip_local_deliver+0x65

So, it would surprise users who care about the tracee entry.

[1] https://github.com/bpfsnoop/bpfsnoop

Thanks,
Leon

[...]

^ permalink raw reply

* Re: [PATCH bpf-next v3 0/6] bpf trampoline support "jmp" mode
From: Menglong Dong @ 2026-04-02  7:51 UTC (permalink / raw)
  To: Leon Hwang, ast
  Cc: andrii, bpf, daniel, eddyz87, haoluo, jiang.biao, john.fastabend,
	jolsa, kpsingh, linux-kernel, linux-trace-kernel, mark.rutland,
	martin.lau, mathieu.desnoyers, mhiramat, rostedt, sdf, song,
	yonghong.song
In-Reply-To: <20260402061251.273705-1-leon.hwang@linux.dev>

On Thu, Apr 2, 2026 at 2:13 PM Leon Hwang <leon.hwang@linux.dev> wrote:
>
> On Tue, Nov 18, 2025 at 08:36:28PM +0800, Menglong Dong wrote:
> >For now, the bpf trampoline is called by the "call" instruction. However,
> >it break the RSB and introduce extra overhead in x86_64 arch.
> >
[...]
> >
> >Therefore, we introduce the "jmp" mode for bpf trampoline, as advised by
> >Alexei in [1]. And the logic will become this:
> >  call foo -> jmp trampoline -> call foo-body ->
> >  return foo-body -> return foo
> >
> >As we can see above, the RSB is totally balanced after this series.
> >
>
> Hi, this is a late footnote for this optimization.
>
> As this optimization landed in the 6.19 kernel, the function graph feature
> of bpfsnoop [1] cannot work because of the missing tracee's FP/IP for
> fexit.
>
> Before this optimization,
>
> caller
> -> call icmp_rcv                caller IP/FP
>    -> call trampoline           icmp_rcv IP/FP
>       -> call icmp_rcv body     trampoline IP/FP
>       <- return to trampoline
>    <- return to caller
>
> After this optimization,
>
> caller
> -> call icmp_rcv                caller IP/FP
>    -> jump to trampoline
>       -> call icmp_rcv body     trampoline IP/FP
>       <- return to trampoline
>    <- return to caller

Ah, you are right. The target symbol will disappear in
the stack backtrace in BPF prog, as we jump to the trampoline
directly.

We can pretend a RIP in the stack for the address of the
"function body", therefore the target symbol can show in
the stack backtrace. I have not tried it yet, but I think it should
work.

However, I'm not sure if it is worth it.

Thanks!
Menglong Dong

>
> As a result, the function call stack entry for icmp_rcv has gone.
>
> It can be confirmed by bpf_get_stack*() helpers.
>
> $ sudo bpfsnoop -k icmp_rcv --output-stack -v
>
> In 6.14,
>
> 0xffff8000802bda44:bpfsnoop_fn+0x6a4
> 0xffff8000802bda44:bpfsnoop_fn+0x6a4
> 0xffff8000802bd064:bpf_trampoline_6442573163+0xa4
> 0xffffc7825c984df0:icmp_rcv+0x8
> 0xffffc7825c91bcb8:ip_protocol_deliver_rcu+0x48
> 0xffffc7825c91bfd4:ip_local_deliver_finish+0x8c
> 0xffffc7825c91c0d0:ip_local_deliver+0x88
>
> In 6.19,
>
> 0xffffffffc0209069:bpfsnoop_fn+0x449
> 0xffffffffc01ef2a4:bpf_trampoline_6442568724+0x64
> 0xffffffffb1085cda:ip_protocol_deliver_rcu+0x1ea
> 0xffffffffb1085d96:ip_local_deliver_finish+0x86
> 0xffffffffb1085e95:ip_local_deliver+0x65
>
> So, it would surprise users who care about the tracee entry.
>
> [1] https://github.com/bpfsnoop/bpfsnoop
>
> Thanks,
> Leon
>
> [...]

^ permalink raw reply

* [RFC PATCH 0/4] trace, livepatch: Allow kprobe return overriding for livepatched functions
From: Yafang Shao @ 2026-04-02  9:26 UTC (permalink / raw)
  To: jpoimboe, jikos, mbenes, pmladek, joe.lawrence, rostedt, mhiramat,
	mathieu.desnoyers, kpsingh, mattbobrowski, song, jolsa, ast,
	daniel, andrii, martin.lau, eddyz87, memxor, yonghong.song
  Cc: live-patching, linux-kernel, linux-trace-kernel, bpf, Yafang Shao

Livepatching allows for rapid experimentation with new kernel features
without interrupting production workloads. However, static livepatches lack
the flexibility required to tune features based on task-specific attributes,
such as cgroup membership, which is critical in multi-tenant k8s
environments. Furthermore, hardcoding logic into a livepatch prevents
dynamic adjustments based on the runtime environment.

To address this, we propose a hybrid approach using BPF. Our production use
case involves:

1. Deploying a Livepatch function to serve as a stable BPF hook.

2. Utilizing bpf_override_return() to dynamically modify the return value
   of that hook based on the current task's context.

A significant challenge arises when atomic-replace is enabled. In this
mode, deploying a new livepatch changes the target function's address,
forcing a re-attachment of the BPF program. This re-attachment latency is
unacceptable in critical paths, such as those handling networking policies.

To solve this, we introduce a hybrid livepatch mode that allows specific
patches to remain non-replaceable, ensuring the function address remains
stable and the BPF program stays attached.

Furthermore, this mechanism provides a lower-maintenance alternative to
out-of-tree BPF hooks. Given the complexities of upstreaming custom BPF
hooks (e.g., [0], [1]), this hybrid mode allows for the maintenance of
stable, minimal hook points via livepatching with significantly reduced
maintenance burden.

Link: https://lwn.net/Articles/1054030/ [0]
Link: https://lwn.net/Articles/1043548/ [1]

Yafang Shao (4):
  trace: Simplify kprobe overridable function check
  trace: Allow kprobes to override livepatched functions
  livepatch: Add "replaceable" attribute to klp_patch
  livepatch: Implement livepatch hybrid mode

 include/linux/livepatch.h   |  2 ++
 kernel/livepatch/core.c     | 50 +++++++++++++++++++++++++++++++
 kernel/trace/Kconfig        | 14 +++++++++
 kernel/trace/bpf_trace.c    | 14 ++++++---
 kernel/trace/trace_kprobe.c | 49 ++++++++++++------------------
 kernel/trace/trace_probe.h  | 59 +++++++++++++++++++++++++++----------
 6 files changed, 139 insertions(+), 49 deletions(-)

-- 
2.47.3

^ permalink raw reply

* [RFC PATCH 1/4] trace: Simplify kprobe overridable function check
From: Yafang Shao @ 2026-04-02  9:26 UTC (permalink / raw)
  To: jpoimboe, jikos, mbenes, pmladek, joe.lawrence, rostedt, mhiramat,
	mathieu.desnoyers, kpsingh, mattbobrowski, song, jolsa, ast,
	daniel, andrii, martin.lau, eddyz87, memxor, yonghong.song
  Cc: live-patching, linux-kernel, linux-trace-kernel, bpf, Yafang Shao
In-Reply-To: <20260402092607.96430-1-laoar.shao@gmail.com>

Simplify the logic for checking overridable kprobe functions by removing
redundant code.

No functional change.

Signed-off-by: Yafang Shao <laoar.shao@gmail.com>
---
 kernel/trace/bpf_trace.c    | 13 ++++++---
 kernel/trace/trace_kprobe.c | 40 +++++----------------------
 kernel/trace/trace_probe.h  | 54 ++++++++++++++++++++++++++-----------
 3 files changed, 54 insertions(+), 53 deletions(-)

diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
index 0b040a417442..c901ace836cb 100644
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -1929,10 +1929,15 @@ int perf_event_attach_bpf_prog(struct perf_event *event,
 	 * Kprobe override only works if they are on the function entry,
 	 * and only if they are on the opt-in list.
 	 */
-	if (prog->kprobe_override &&
-	    (!trace_kprobe_on_func_entry(event->tp_event) ||
-	     !trace_kprobe_error_injectable(event->tp_event)))
-		return -EINVAL;
+	if (prog->kprobe_override) {
+		struct trace_kprobe *tp = trace_kprobe_primary_from_call(event->tp_event);
+
+		if (!tp)
+			return -EINVAL;
+		if (!trace_kprobe_on_func_entry(tp) ||
+		    !trace_kprobe_error_injectable(tp))
+			return -EINVAL;
+	}
 
 	mutex_lock(&bpf_event_mutex);
 
diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c
index a5dbb72528e0..768702674a5c 100644
--- a/kernel/trace/trace_kprobe.c
+++ b/kernel/trace/trace_kprobe.c
@@ -53,17 +53,6 @@ static struct dyn_event_operations trace_kprobe_ops = {
 	.match = trace_kprobe_match,
 };
 
-/*
- * Kprobe event core functions
- */
-struct trace_kprobe {
-	struct dyn_event	devent;
-	struct kretprobe	rp;	/* Use rp.kp for kprobe use */
-	unsigned long __percpu *nhit;
-	const char		*symbol;	/* symbol name */
-	struct trace_probe	tp;
-};
-
 static bool is_trace_kprobe(struct dyn_event *ev)
 {
 	return ev->ops == &trace_kprobe_ops;
@@ -212,33 +201,16 @@ unsigned long trace_kprobe_address(struct trace_kprobe *tk)
 	return addr;
 }
 
-static nokprobe_inline struct trace_kprobe *
-trace_kprobe_primary_from_call(struct trace_event_call *call)
-{
-	struct trace_probe *tp;
-
-	tp = trace_probe_primary_from_call(call);
-	if (WARN_ON_ONCE(!tp))
-		return NULL;
-
-	return container_of(tp, struct trace_kprobe, tp);
-}
-
-bool trace_kprobe_on_func_entry(struct trace_event_call *call)
+bool trace_kprobe_on_func_entry(struct trace_kprobe *tp)
 {
-	struct trace_kprobe *tk = trace_kprobe_primary_from_call(call);
-
-	return tk ? (kprobe_on_func_entry(tk->rp.kp.addr,
-			tk->rp.kp.addr ? NULL : tk->rp.kp.symbol_name,
-			tk->rp.kp.addr ? 0 : tk->rp.kp.offset) == 0) : false;
+	return !kprobe_on_func_entry(tp->rp.kp.addr,
+			tp->rp.kp.addr ? NULL : tp->rp.kp.symbol_name,
+			tp->rp.kp.addr ? 0 : tp->rp.kp.offset);
 }
 
-bool trace_kprobe_error_injectable(struct trace_event_call *call)
+bool trace_kprobe_error_injectable(struct trace_kprobe *tp)
 {
-	struct trace_kprobe *tk = trace_kprobe_primary_from_call(call);
-
-	return tk ? within_error_injection_list(trace_kprobe_address(tk)) :
-	       false;
+	return within_error_injection_list(trace_kprobe_address(tp));
 }
 
 static int register_kprobe_event(struct trace_kprobe *tk);
diff --git a/kernel/trace/trace_probe.h b/kernel/trace/trace_probe.h
index 9fc56c937130..958eb78a9068 100644
--- a/kernel/trace/trace_probe.h
+++ b/kernel/trace/trace_probe.h
@@ -30,6 +30,7 @@
 
 #include "trace.h"
 #include "trace_output.h"
+#include "trace_dynevent.h"
 
 #define MAX_TRACE_ARGS		128
 #define MAX_ARGSTR_LEN		63
@@ -210,21 +211,6 @@ DECLARE_BASIC_PRINT_TYPE_FUNC(symbol);
 #define ASSIGN_FETCH_TYPE_END {}
 #define MAX_ARRAY_LEN	64
 
-#ifdef CONFIG_KPROBE_EVENTS
-bool trace_kprobe_on_func_entry(struct trace_event_call *call);
-bool trace_kprobe_error_injectable(struct trace_event_call *call);
-#else
-static inline bool trace_kprobe_on_func_entry(struct trace_event_call *call)
-{
-	return false;
-}
-
-static inline bool trace_kprobe_error_injectable(struct trace_event_call *call)
-{
-	return false;
-}
-#endif /* CONFIG_KPROBE_EVENTS */
-
 struct probe_arg {
 	struct fetch_insn	*code;
 	bool			dynamic;/* Dynamic array (string) is used */
@@ -271,6 +257,32 @@ struct event_file_link {
 	struct list_head		list;
 };
 
+/*
+ * Kprobe event core functions
+ */
+struct trace_kprobe {
+	struct dyn_event	devent;
+	struct kretprobe	rp;	/* Use rp.kp for kprobe use */
+	unsigned long __percpu	*nhit;
+	const char		*symbol;	/* symbol name */
+	struct trace_probe	tp;
+};
+
+#ifdef CONFIG_KPROBE_EVENTS
+bool trace_kprobe_on_func_entry(struct trace_kprobe *tp);
+bool trace_kprobe_error_injectable(struct trace_kprobe *tp);
+#else
+static inline bool trace_kprobe_on_func_entry(struct trace_kprobe *tp)
+{
+	return false;
+}
+
+static inline bool trace_kprobe_error_injectable(struct trace_kprobe *tp)
+{
+	return false;
+}
+#endif /* CONFIG_KPROBE_EVENTS */
+
 static inline unsigned int trace_probe_load_flag(struct trace_probe *tp)
 {
 	return smp_load_acquire(&tp->event->flags);
@@ -329,6 +341,18 @@ trace_probe_primary_from_call(struct trace_event_call *call)
 	return list_first_entry_or_null(&tpe->probes, struct trace_probe, list);
 }
 
+static nokprobe_inline struct trace_kprobe *
+trace_kprobe_primary_from_call(struct trace_event_call *call)
+{
+	struct trace_probe *tp;
+
+	tp = trace_probe_primary_from_call(call);
+	if (WARN_ON_ONCE(!tp))
+		return NULL;
+
+	return container_of(tp, struct trace_kprobe, tp);
+}
+
 static inline struct list_head *trace_probe_probe_list(struct trace_probe *tp)
 {
 	return &tp->event->probes;
-- 
2.47.3


^ permalink raw reply related

* [RFC PATCH 2/4] trace: Allow kprobes to override livepatched functions
From: Yafang Shao @ 2026-04-02  9:26 UTC (permalink / raw)
  To: jpoimboe, jikos, mbenes, pmladek, joe.lawrence, rostedt, mhiramat,
	mathieu.desnoyers, kpsingh, mattbobrowski, song, jolsa, ast,
	daniel, andrii, martin.lau, eddyz87, memxor, yonghong.song
  Cc: live-patching, linux-kernel, linux-trace-kernel, bpf, Yafang Shao
In-Reply-To: <20260402092607.96430-1-laoar.shao@gmail.com>

Introduce the ability for kprobes to override the return values of
functions that have been livepatched. This functionality is guarded by the
CONFIG_KPROBE_OVERRIDE_KLP_FUNC configuration option.

Signed-off-by: Yafang Shao <laoar.shao@gmail.com>
---
 kernel/trace/Kconfig        | 14 ++++++++++++++
 kernel/trace/bpf_trace.c    |  3 ++-
 kernel/trace/trace_kprobe.c | 17 +++++++++++++++++
 kernel/trace/trace_probe.h  |  5 +++++
 4 files changed, 38 insertions(+), 1 deletion(-)

diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
index 49de13cae428..db712c8cb745 100644
--- a/kernel/trace/Kconfig
+++ b/kernel/trace/Kconfig
@@ -1279,6 +1279,20 @@ config HIST_TRIGGERS_DEBUG
 
           If unsure, say N.
 
+config KPROBE_OVERRIDE_KLP_FUNC
+	bool "Allow kprobes to override livepatched functions"
+	depends on KPROBES && LIVEPATCH
+	help
+	  This option allows BPF programs to use kprobes to override functions
+	  that have already been patched by Livepatch (KLP).
+
+	  Enabling this provides a mechanism to dynamically control execution
+	  flow without requiring a reboot or a new livepatch module. It
+	  effectively combines the persistence of livepatching with the
+	  programmability of BPF.
+
+	  If unsure, say N.
+
 source "kernel/trace/rv/Kconfig"
 
 endif # FTRACE
diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
index c901ace836cb..08ae2b1a912c 100644
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -1935,7 +1935,8 @@ int perf_event_attach_bpf_prog(struct perf_event *event,
 		if (!tp)
 			return -EINVAL;
 		if (!trace_kprobe_on_func_entry(tp) ||
-		    !trace_kprobe_error_injectable(tp))
+		    (!trace_kprobe_error_injectable(tp) &&
+		     !trace_kprobe_klp_func_overridable(tp)))
 			return -EINVAL;
 	}
 
diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c
index 768702674a5c..6f05451fbc76 100644
--- a/kernel/trace/trace_kprobe.c
+++ b/kernel/trace/trace_kprobe.c
@@ -213,6 +213,23 @@ bool trace_kprobe_error_injectable(struct trace_kprobe *tp)
 	return within_error_injection_list(trace_kprobe_address(tp));
 }
 
+bool trace_kprobe_klp_func_overridable(struct trace_kprobe *tp)
+{
+	bool overridable = false;
+#ifdef CONFIG_KPROBE_OVERRIDE_KLP_FUNC
+	struct module *mod;
+	unsigned long addr;
+
+	addr = trace_kprobe_address(tp);
+	rcu_read_lock();
+	mod = __module_address(addr);
+	if (mod && mod->klp)
+		overridable = true;
+	rcu_read_unlock();
+#endif
+	return overridable;
+}
+
 static int register_kprobe_event(struct trace_kprobe *tk);
 static int unregister_kprobe_event(struct trace_kprobe *tk);
 
diff --git a/kernel/trace/trace_probe.h b/kernel/trace/trace_probe.h
index 958eb78a9068..84bd2617db7c 100644
--- a/kernel/trace/trace_probe.h
+++ b/kernel/trace/trace_probe.h
@@ -271,6 +271,7 @@ struct trace_kprobe {
 #ifdef CONFIG_KPROBE_EVENTS
 bool trace_kprobe_on_func_entry(struct trace_kprobe *tp);
 bool trace_kprobe_error_injectable(struct trace_kprobe *tp);
+bool trace_kprobe_klp_func_overridable(struct trace_kprobe *tp);
 #else
 static inline bool trace_kprobe_on_func_entry(struct trace_kprobe *tp)
 {
@@ -281,6 +282,10 @@ static inline bool trace_kprobe_error_injectable(struct trace_kprobe *tp)
 {
 	return false;
 }
+static inline bool trace_kprobe_klp_func_overridable(struct trace_kprobe *tp)
+{
+	return false;
+}
 #endif /* CONFIG_KPROBE_EVENTS */
 
 static inline unsigned int trace_probe_load_flag(struct trace_probe *tp)
-- 
2.47.3


^ permalink raw reply related

* [RFC PATCH 3/4] livepatch: Add "replaceable" attribute to klp_patch
From: Yafang Shao @ 2026-04-02  9:26 UTC (permalink / raw)
  To: jpoimboe, jikos, mbenes, pmladek, joe.lawrence, rostedt, mhiramat,
	mathieu.desnoyers, kpsingh, mattbobrowski, song, jolsa, ast,
	daniel, andrii, martin.lau, eddyz87, memxor, yonghong.song
  Cc: live-patching, linux-kernel, linux-trace-kernel, bpf, Yafang Shao
In-Reply-To: <20260402092607.96430-1-laoar.shao@gmail.com>

Add a new replaceable attribute to allow the coexistence of both
atomic-replace and non-atomic-replace livepatches. If replaceable is set to
0, the livepatch will not be replaced by a subsequent atomic-replace
operation.

This is a preparatory patch for following changes.

Signed-off-by: Yafang Shao <laoar.shao@gmail.com>
---
 include/linux/livepatch.h |  2 ++
 kernel/livepatch/core.c   | 44 +++++++++++++++++++++++++++++++++++++++
 2 files changed, 46 insertions(+)

diff --git a/include/linux/livepatch.h b/include/linux/livepatch.h
index ba9e3988c07c..d88a6966e5f2 100644
--- a/include/linux/livepatch.h
+++ b/include/linux/livepatch.h
@@ -124,6 +124,7 @@ struct klp_state {
  * @objs:	object entries for kernel objects to be patched
  * @states:	system states that can get modified
  * @replace:	replace all actively used patches
+ * @replaceable:	whether this patch can be replaced or not
  * @list:	list node for global list of actively used patches
  * @kobj:	kobject for sysfs resources
  * @obj_list:	dynamic list of the object entries
@@ -138,6 +139,7 @@ struct klp_patch {
 	struct klp_object *objs;
 	struct klp_state *states;
 	bool replace;
+	bool replaceable;
 
 	/* internal */
 	struct list_head list;
diff --git a/kernel/livepatch/core.c b/kernel/livepatch/core.c
index 28d15ba58a26..04f9e84f114f 100644
--- a/kernel/livepatch/core.c
+++ b/kernel/livepatch/core.c
@@ -351,6 +351,7 @@ int klp_apply_section_relocs(struct module *pmod, Elf_Shdr *sechdrs,
  * /sys/kernel/livepatch/<patch>/transition
  * /sys/kernel/livepatch/<patch>/force
  * /sys/kernel/livepatch/<patch>/replace
+ * /sys/kernel/livepatch/<patch>/replaceable
  * /sys/kernel/livepatch/<patch>/stack_order
  * /sys/kernel/livepatch/<patch>/<object>
  * /sys/kernel/livepatch/<patch>/<object>/patched
@@ -478,17 +479,60 @@ static ssize_t stack_order_show(struct kobject *kobj,
 	return sysfs_emit(buf, "%d\n", stack_order);
 }
 
+static ssize_t replaceable_store(struct kobject *kobj, struct kobj_attribute *attr,
+				const char *buf, size_t count)
+{
+	struct klp_patch *patch;
+	bool replaceable;
+	int ret;
+
+	ret = kstrtobool(buf, &replaceable);
+	if (ret)
+		return ret;
+
+	patch = container_of(kobj, struct klp_patch, kobj);
+
+	mutex_lock(&klp_mutex);
+
+	if (patch->replaceable == replaceable)
+		goto out;
+
+	if (patch == klp_transition_patch) {
+		ret = -EAGAIN;
+		goto out;
+	}
+
+	patch->replaceable = replaceable;
+
+out:
+	mutex_unlock(&klp_mutex);
+
+	if (ret)
+		return ret;
+	return count;
+}
+static ssize_t replaceable_show(struct kobject *kobj,
+			       struct kobj_attribute *attr, char *buf)
+{
+	struct klp_patch *patch;
+
+	patch = container_of(kobj, struct klp_patch, kobj);
+	return sysfs_emit(buf, "%d\n", patch->replaceable);
+}
+
 static struct kobj_attribute enabled_kobj_attr = __ATTR_RW(enabled);
 static struct kobj_attribute transition_kobj_attr = __ATTR_RO(transition);
 static struct kobj_attribute force_kobj_attr = __ATTR_WO(force);
 static struct kobj_attribute replace_kobj_attr = __ATTR_RO(replace);
 static struct kobj_attribute stack_order_kobj_attr = __ATTR_RO(stack_order);
+static struct kobj_attribute replaceable_kobj_attr = __ATTR_RW(replaceable);
 static struct attribute *klp_patch_attrs[] = {
 	&enabled_kobj_attr.attr,
 	&transition_kobj_attr.attr,
 	&force_kobj_attr.attr,
 	&replace_kobj_attr.attr,
 	&stack_order_kobj_attr.attr,
+	&replaceable_kobj_attr.attr,
 	NULL
 };
 ATTRIBUTE_GROUPS(klp_patch);
-- 
2.47.3


^ permalink raw reply related

* [RFC PATCH 4/4] livepatch: Implement livepatch hybrid mode
From: Yafang Shao @ 2026-04-02  9:26 UTC (permalink / raw)
  To: jpoimboe, jikos, mbenes, pmladek, joe.lawrence, rostedt, mhiramat,
	mathieu.desnoyers, kpsingh, mattbobrowski, song, jolsa, ast,
	daniel, andrii, martin.lau, eddyz87, memxor, yonghong.song
  Cc: live-patching, linux-kernel, linux-trace-kernel, bpf, Yafang Shao
In-Reply-To: <20260402092607.96430-1-laoar.shao@gmail.com>

Livepatching allows for rapid experimentation with new kernel features
without interrupting production workloads. However, static livepatches lack
the flexibility required to tune features based on task-specific attributes,
such as cgroup membership, which is critical in multi-tenant k8s
environments. Furthermore, hardcoding logic into a livepatch prevents
dynamic adjustments based on the runtime environment.

To address this, we propose a hybrid approach using BPF. Our production use
case involves:

1. Deploying a Livepatch function to serve as a stable BPF hook.

2. Utilizing bpf_override_return() to dynamically modify the return value
   of that hook based on the current task's context.

A significant challenge arises when atomic-replace is enabled. In this
mode, deploying a new livepatch changes the target function's address,
forcing a re-attachment of the BPF program. This re-attachment latency is
unacceptable in critical paths, such as those handling networking policies.

To solve this, we introduce a hybrid livepatch mode that allows specific
patches to remain non-replaceable, ensuring the function address remains
stable and the BPF program stays attached.

Signed-off-by: Yafang Shao <laoar.shao@gmail.com>
---
 kernel/livepatch/core.c | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/kernel/livepatch/core.c b/kernel/livepatch/core.c
index 04f9e84f114f..5a44154131c8 100644
--- a/kernel/livepatch/core.c
+++ b/kernel/livepatch/core.c
@@ -665,6 +665,8 @@ static int klp_add_nops(struct klp_patch *patch)
 		klp_for_each_object(old_patch, old_obj) {
 			int err;

+			if (!old_patch->replaceable)
+				continue;
 			err = klp_add_object_nops(patch, old_obj);
 			if (err)
 				return err;
@@ -837,6 +839,8 @@ void klp_free_replaced_patches_async(struct klp_patch *new_patch)
 	klp_for_each_patch_safe(old_patch, tmp_patch) {
 		if (old_patch == new_patch)
 			return;
+		if (!old_patch->replaceable)
+			continue;
 		klp_free_patch_async(old_patch);
 	}
 }
@@ -1239,6 +1243,8 @@ void klp_unpatch_replaced_patches(struct klp_patch *new_patch)
 		if (old_patch == new_patch)
 			return;

+		if (!old_patch->replaceable)
+			continue;
 		old_patch->enabled = false;
 		klp_unpatch_objects(old_patch);
 	}
-- 
2.47.3

^ permalink raw reply related

* Re: [PATCH bpf v3 2/2] selftests/bpf: Add test to ensure kprobe_multi is not sleepable
From: Jiri Olsa @ 2026-04-02  9:46 UTC (permalink / raw)
  To: Kumar Kartikeya Dwivedi
  Cc: Varun R Mallya, bpf, ast, daniel, yonghong.song, rostedt,
	mhiramat, linux-kernel, linux-trace-kernel
In-Reply-To: <CAP01T74cudrCFGAJhhNUWdCS+D1Gn5yFNccaS85YcoX8vdgzBQ@mail.gmail.com>

On Thu, Apr 02, 2026 at 12:50:10AM +0200, Kumar Kartikeya Dwivedi wrote:
> On Wed, 1 Apr 2026 at 21:11, Varun R Mallya <varunrmallya@gmail.com> wrote:
> >
> > Add a selftest to ensure that kprobe_multi programs cannot be attached
> > using the BPF_F_SLEEPABLE flag. This test succeeds when the kernel
> > rejects attachment of kprobe_multi when the BPF_F_SLEEPABLE flag is set.
> >
> > Signed-off-by: Varun R Mallya <varunrmallya@gmail.com>
> > ---
> 
> Acked-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
> 
> >  .../bpf/prog_tests/kprobe_multi_test.c        | 41 +++++++++++++++++++
> >  .../bpf/progs/kprobe_multi_sleepable.c        | 13 ++++++
> >  2 files changed, 54 insertions(+)
> >  create mode 100644 tools/testing/selftests/bpf/progs/kprobe_multi_sleepable.c
> >
> > diff --git a/tools/testing/selftests/bpf/prog_tests/kprobe_multi_test.c b/tools/testing/selftests/bpf/prog_tests/kprobe_multi_test.c
> > index 78c974d4ea33..f02fec2b6fda 100644
> > --- a/tools/testing/selftests/bpf/prog_tests/kprobe_multi_test.c
> > +++ b/tools/testing/selftests/bpf/prog_tests/kprobe_multi_test.c
> > @@ -10,6 +10,7 @@
> >  #include "kprobe_multi_session_cookie.skel.h"
> >  #include "kprobe_multi_verifier.skel.h"
> >  #include "kprobe_write_ctx.skel.h"
> > +#include "kprobe_multi_sleepable.skel.h"
> >  #include "bpf/libbpf_internal.h"
> >  #include "bpf/hashmap.h"
> >
> > @@ -633,6 +634,44 @@ static void test_attach_write_ctx(void)
> >  }
> >  #endif
> >
> > +static void test_attach_multi_sleepable(void)
> > +{
> > +       struct kprobe_multi_sleepable *skel;
> > +       int err;
> > +
> > +       skel = kprobe_multi_sleepable__open();
> > +       if (!ASSERT_OK_PTR(skel, "kprobe_multi_sleepable__open"))
> > +               return;
> > +
> > +       err = bpf_program__set_flags(skel->progs.handle_kprobe_multi_sleepable,
> > +                                    BPF_F_SLEEPABLE);
> > +       if (!ASSERT_OK(err, "bpf_program__set_flags"))
> > +               goto cleanup;
> > +
> > +       /* Load should succeed even with BPF_F_SLEEPABLE for KPROBE types */
> > +       err = kprobe_multi_sleepable__load(skel);
> > +       if (!ASSERT_OK(err, "kprobe_multi_sleepable__load"))
> > +               goto cleanup;
> > +
> > +       /* Attachment must fail for kprobe.multi + BPF_F_SLEEPABLE.
> > +        * Also chosen a stable symbol to send into opts
> > +        */
> > +       LIBBPF_OPTS(bpf_kprobe_multi_opts, opts);
> > +       const char *sym = "vfs_read";
> > +
> > +       opts.syms = &sym;
> > +       opts.cnt = 1;
> > +
> > +       skel->links.handle_kprobe_multi_sleepable =
> > +               bpf_program__attach_kprobe_multi_opts(skel->progs.handle_kprobe_multi_sleepable,
> > +                                                     NULL, &opts);
> > +       ASSERT_ERR_PTR(skel->links.handle_kprobe_multi_sleepable,
> > +                      "bpf_program__attach_kprobe_multi_opts");
> 
> Nit: While vfs_read will likely remain stable, the check could
> probably be stronger to distinguish an attach error from -EINVAL?
> I added a typo to vfs_read and it still passed, because it failed to
> attach instead of getting rejected on unfixed kernel.
> May not be a big deal since vfs_read is unlikely to break.
> I verified it works by adding bpf_copy_from_user to the program and
> attaching to SYS_PREFIX sys_getpid and invoking the splat though, so
> LGTM otherwise.

why not use bpf_fentry_test2 ? you could also put it in pattern argument
and bypass opts completely (up to you)

also there's test_attach_api_fails test, please move it over there

thanks,
jirka

^ permalink raw reply

* Re: [PATCH bpf v3 1/2] bpf: Reject sleepable kprobe_multi programs at attach time
From: Jiri Olsa @ 2026-04-02  9:47 UTC (permalink / raw)
  To: Varun R Mallya
  Cc: bpf, ast, daniel, memxor, yonghong.song, rostedt, mhiramat,
	linux-kernel, linux-trace-kernel
In-Reply-To: <20260401191126.440683-1-varunrmallya@gmail.com>

On Thu, Apr 02, 2026 at 12:41:25AM +0530, Varun R Mallya wrote:
> kprobe.multi programs run in atomic/RCU context and cannot sleep.
> However, bpf_kprobe_multi_link_attach() did not validate whether the
> program being attached had the sleepable flag set, allowing sleepable
> helpers such as bpf_copy_from_user() to be invoked from a non-sleepable
> context.
> 
> This causes a "sleeping function called from invalid context" splat:
> 
>   BUG: sleeping function called from invalid context at ./include/linux/uaccess.h:169
>   in_atomic(): 1, irqs_disabled(): 0, non_block: 0, pid: 1787, name: sudo
>   preempt_count: 1, expected: 0
>   RCU nest depth: 2, expected: 0
> 
> Fix this by rejecting sleepable programs early in
> bpf_kprobe_multi_link_attach(), before any further processing.
> 
> Fixes: 0dcac2725406 ("bpf: Add multi kprobe link")
> Signed-off-by: Varun R Mallya <varunrmallya@gmail.com>

nice catch!

Acked-by: Jiri Olsa <jolsa@kernel.org>

thanks,
jirka


> ---
>  kernel/trace/bpf_trace.c | 4 ++++
>  1 file changed, 4 insertions(+)
> 
> diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
> index 0b040a417442..af7079aa0f36 100644
> --- a/kernel/trace/bpf_trace.c
> +++ b/kernel/trace/bpf_trace.c
> @@ -2752,6 +2752,10 @@ int bpf_kprobe_multi_link_attach(const union bpf_attr *attr, struct bpf_prog *pr
>  	if (!is_kprobe_multi(prog))
>  		return -EINVAL;
>  
> +	/* kprobe_multi is not allowed to be sleepable. */
> +	if (prog->sleepable)
> +		return -EINVAL;
> +
>  	/* Writing to context is not allowed for kprobes. */
>  	if (prog->aux->kprobe_write_ctx)
>  		return -EINVAL;
> -- 
> 2.53.0
> 

^ permalink raw reply

* Re: [RFC PATCH 2/4] trace: Allow kprobes to override livepatched functions
From: Menglong Dong @ 2026-04-02 12:48 UTC (permalink / raw)
  To: Yafang Shao
  Cc: jpoimboe, jikos, mbenes, pmladek, joe.lawrence, rostedt, mhiramat,
	mathieu.desnoyers, kpsingh, mattbobrowski, song, jolsa, ast,
	daniel, andrii, martin.lau, eddyz87, memxor, yonghong.song,
	Yafang Shao, live-patching, linux-kernel, linux-trace-kernel, bpf
In-Reply-To: <20260402092607.96430-3-laoar.shao@gmail.com>

On 2026/4/2 17:26, Yafang Shao wrote:
> Introduce the ability for kprobes to override the return values of
> functions that have been livepatched. This functionality is guarded by the
> CONFIG_KPROBE_OVERRIDE_KLP_FUNC configuration option.

Hi, Yafang. This is a interesting idea.

For now, the bpf_override_return() can only be used on the kernel
functions that allow error injection to prevent the BPF program from
crash the kernel. If we use it on the kernel functions that patched
by the KLP, we can crash the kernel easily by return a invalid value
with bpf_override_return(), right? (Of course, we can crash the kernel
easily with KLP too ;)

I haven't figure out the use case yet. Can KLP be used together with
the BPF program that use bpf_override_return()? The KLP will modify
the RIP on the stack, and the bpf_override_return() will modify it too.
AFAIK, there can't be two ftrace_ops that both have the
FTRACE_OPS_FL_IPMODIFY flag. Did I miss something?

It will be helpful for me to understand the use case if a selftests is
offered :)

BTW, if we allow the usage of bpf_override_return() on the KLP patched
function, we should allow the usage of BPF_MODIFY_RETURN on this
case too, right?

Thanks!
Menglong Dong

> 
> Signed-off-by: Yafang Shao <laoar.shao@gmail.com>
> ---
>  kernel/trace/Kconfig        | 14 ++++++++++++++
>  kernel/trace/bpf_trace.c    |  3 ++-
>  kernel/trace/trace_kprobe.c | 17 +++++++++++++++++
>  kernel/trace/trace_probe.h  |  5 +++++
>  4 files changed, 38 insertions(+), 1 deletion(-)
> 
> diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
> index 49de13cae428..db712c8cb745 100644
> --- a/kernel/trace/Kconfig
> +++ b/kernel/trace/Kconfig
> @@ -1279,6 +1279,20 @@ config HIST_TRIGGERS_DEBUG
>  
>            If unsure, say N.
>  
> +config KPROBE_OVERRIDE_KLP_FUNC
> +	bool "Allow kprobes to override livepatched functions"
> +	depends on KPROBES && LIVEPATCH
> +	help
> +	  This option allows BPF programs to use kprobes to override functions
> +	  that have already been patched by Livepatch (KLP).
> +
> +	  Enabling this provides a mechanism to dynamically control execution
> +	  flow without requiring a reboot or a new livepatch module. It
> +	  effectively combines the persistence of livepatching with the
> +	  programmability of BPF.
> +
> +	  If unsure, say N.
> +
>  source "kernel/trace/rv/Kconfig"
>  
>  endif # FTRACE
> diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
> index c901ace836cb..08ae2b1a912c 100644
> --- a/kernel/trace/bpf_trace.c
> +++ b/kernel/trace/bpf_trace.c
> @@ -1935,7 +1935,8 @@ int perf_event_attach_bpf_prog(struct perf_event *event,
>  		if (!tp)
>  			return -EINVAL;
>  		if (!trace_kprobe_on_func_entry(tp) ||
> -		    !trace_kprobe_error_injectable(tp))
> +		    (!trace_kprobe_error_injectable(tp) &&
> +		     !trace_kprobe_klp_func_overridable(tp)))
>  			return -EINVAL;
>  	}
>  
> diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c
> index 768702674a5c..6f05451fbc76 100644
> --- a/kernel/trace/trace_kprobe.c
> +++ b/kernel/trace/trace_kprobe.c
> @@ -213,6 +213,23 @@ bool trace_kprobe_error_injectable(struct trace_kprobe *tp)
>  	return within_error_injection_list(trace_kprobe_address(tp));
>  }
>  
> +bool trace_kprobe_klp_func_overridable(struct trace_kprobe *tp)
> +{
> +	bool overridable = false;
> +#ifdef CONFIG_KPROBE_OVERRIDE_KLP_FUNC
> +	struct module *mod;
> +	unsigned long addr;
> +
> +	addr = trace_kprobe_address(tp);
> +	rcu_read_lock();
> +	mod = __module_address(addr);
> +	if (mod && mod->klp)
> +		overridable = true;
> +	rcu_read_unlock();
> +#endif
> +	return overridable;
> +}
> +
>  static int register_kprobe_event(struct trace_kprobe *tk);
>  static int unregister_kprobe_event(struct trace_kprobe *tk);
>  
> diff --git a/kernel/trace/trace_probe.h b/kernel/trace/trace_probe.h
> index 958eb78a9068..84bd2617db7c 100644
> --- a/kernel/trace/trace_probe.h
> +++ b/kernel/trace/trace_probe.h
> @@ -271,6 +271,7 @@ struct trace_kprobe {
>  #ifdef CONFIG_KPROBE_EVENTS
>  bool trace_kprobe_on_func_entry(struct trace_kprobe *tp);
>  bool trace_kprobe_error_injectable(struct trace_kprobe *tp);
> +bool trace_kprobe_klp_func_overridable(struct trace_kprobe *tp);
>  #else
>  static inline bool trace_kprobe_on_func_entry(struct trace_kprobe *tp)
>  {
> @@ -281,6 +282,10 @@ static inline bool trace_kprobe_error_injectable(struct trace_kprobe *tp)
>  {
>  	return false;
>  }
> +static inline bool trace_kprobe_klp_func_overridable(struct trace_kprobe *tp)
> +{
> +	return false;
> +}
>  #endif /* CONFIG_KPROBE_EVENTS */
>  
>  static inline unsigned int trace_probe_load_flag(struct trace_probe *tp)
> 





^ permalink raw reply

* Re: [RFC PATCH 1/4] trace: Simplify kprobe overridable function check
From: Masami Hiramatsu @ 2026-04-02 13:13 UTC (permalink / raw)
  To: Yafang Shao
  Cc: jpoimboe, jikos, mbenes, pmladek, joe.lawrence, rostedt,
	mathieu.desnoyers, kpsingh, mattbobrowski, song, jolsa, ast,
	daniel, andrii, martin.lau, eddyz87, memxor, yonghong.song,
	live-patching, linux-kernel, linux-trace-kernel, bpf
In-Reply-To: <20260402092607.96430-2-laoar.shao@gmail.com>

On Thu,  2 Apr 2026 17:26:04 +0800
Yafang Shao <laoar.shao@gmail.com> wrote:

> Simplify the logic for checking overridable kprobe functions by removing
> redundant code.
> 
> No functional change.

NACK.

trace_kprobe must be hidden inside the trace_kprobe.c. It is not
designed to be exposed. 

Thank you,

> 
> Signed-off-by: Yafang Shao <laoar.shao@gmail.com>
> ---
>  kernel/trace/bpf_trace.c    | 13 ++++++---
>  kernel/trace/trace_kprobe.c | 40 +++++----------------------
>  kernel/trace/trace_probe.h  | 54 ++++++++++++++++++++++++++-----------
>  3 files changed, 54 insertions(+), 53 deletions(-)
> 
> diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
> index 0b040a417442..c901ace836cb 100644
> --- a/kernel/trace/bpf_trace.c
> +++ b/kernel/trace/bpf_trace.c
> @@ -1929,10 +1929,15 @@ int perf_event_attach_bpf_prog(struct perf_event *event,
>  	 * Kprobe override only works if they are on the function entry,
>  	 * and only if they are on the opt-in list.
>  	 */
> -	if (prog->kprobe_override &&
> -	    (!trace_kprobe_on_func_entry(event->tp_event) ||
> -	     !trace_kprobe_error_injectable(event->tp_event)))
> -		return -EINVAL;
> +	if (prog->kprobe_override) {
> +		struct trace_kprobe *tp = trace_kprobe_primary_from_call(event->tp_event);
> +
> +		if (!tp)
> +			return -EINVAL;
> +		if (!trace_kprobe_on_func_entry(tp) ||
> +		    !trace_kprobe_error_injectable(tp))
> +			return -EINVAL;
> +	}
>  
>  	mutex_lock(&bpf_event_mutex);
>  
> diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c
> index a5dbb72528e0..768702674a5c 100644
> --- a/kernel/trace/trace_kprobe.c
> +++ b/kernel/trace/trace_kprobe.c
> @@ -53,17 +53,6 @@ static struct dyn_event_operations trace_kprobe_ops = {
>  	.match = trace_kprobe_match,
>  };
>  
> -/*
> - * Kprobe event core functions
> - */
> -struct trace_kprobe {
> -	struct dyn_event	devent;
> -	struct kretprobe	rp;	/* Use rp.kp for kprobe use */
> -	unsigned long __percpu *nhit;
> -	const char		*symbol;	/* symbol name */
> -	struct trace_probe	tp;
> -};
> -
>  static bool is_trace_kprobe(struct dyn_event *ev)
>  {
>  	return ev->ops == &trace_kprobe_ops;
> @@ -212,33 +201,16 @@ unsigned long trace_kprobe_address(struct trace_kprobe *tk)
>  	return addr;
>  }
>  
> -static nokprobe_inline struct trace_kprobe *
> -trace_kprobe_primary_from_call(struct trace_event_call *call)
> -{
> -	struct trace_probe *tp;
> -
> -	tp = trace_probe_primary_from_call(call);
> -	if (WARN_ON_ONCE(!tp))
> -		return NULL;
> -
> -	return container_of(tp, struct trace_kprobe, tp);
> -}
> -
> -bool trace_kprobe_on_func_entry(struct trace_event_call *call)
> +bool trace_kprobe_on_func_entry(struct trace_kprobe *tp)
>  {
> -	struct trace_kprobe *tk = trace_kprobe_primary_from_call(call);
> -
> -	return tk ? (kprobe_on_func_entry(tk->rp.kp.addr,
> -			tk->rp.kp.addr ? NULL : tk->rp.kp.symbol_name,
> -			tk->rp.kp.addr ? 0 : tk->rp.kp.offset) == 0) : false;
> +	return !kprobe_on_func_entry(tp->rp.kp.addr,
> +			tp->rp.kp.addr ? NULL : tp->rp.kp.symbol_name,
> +			tp->rp.kp.addr ? 0 : tp->rp.kp.offset);
>  }
>  
> -bool trace_kprobe_error_injectable(struct trace_event_call *call)
> +bool trace_kprobe_error_injectable(struct trace_kprobe *tp)
>  {
> -	struct trace_kprobe *tk = trace_kprobe_primary_from_call(call);
> -
> -	return tk ? within_error_injection_list(trace_kprobe_address(tk)) :
> -	       false;
> +	return within_error_injection_list(trace_kprobe_address(tp));
>  }
>  
>  static int register_kprobe_event(struct trace_kprobe *tk);
> diff --git a/kernel/trace/trace_probe.h b/kernel/trace/trace_probe.h
> index 9fc56c937130..958eb78a9068 100644
> --- a/kernel/trace/trace_probe.h
> +++ b/kernel/trace/trace_probe.h
> @@ -30,6 +30,7 @@
>  
>  #include "trace.h"
>  #include "trace_output.h"
> +#include "trace_dynevent.h"
>  
>  #define MAX_TRACE_ARGS		128
>  #define MAX_ARGSTR_LEN		63
> @@ -210,21 +211,6 @@ DECLARE_BASIC_PRINT_TYPE_FUNC(symbol);
>  #define ASSIGN_FETCH_TYPE_END {}
>  #define MAX_ARRAY_LEN	64
>  
> -#ifdef CONFIG_KPROBE_EVENTS
> -bool trace_kprobe_on_func_entry(struct trace_event_call *call);
> -bool trace_kprobe_error_injectable(struct trace_event_call *call);
> -#else
> -static inline bool trace_kprobe_on_func_entry(struct trace_event_call *call)
> -{
> -	return false;
> -}
> -
> -static inline bool trace_kprobe_error_injectable(struct trace_event_call *call)
> -{
> -	return false;
> -}
> -#endif /* CONFIG_KPROBE_EVENTS */
> -
>  struct probe_arg {
>  	struct fetch_insn	*code;
>  	bool			dynamic;/* Dynamic array (string) is used */
> @@ -271,6 +257,32 @@ struct event_file_link {
>  	struct list_head		list;
>  };
>  
> +/*
> + * Kprobe event core functions
> + */
> +struct trace_kprobe {
> +	struct dyn_event	devent;
> +	struct kretprobe	rp;	/* Use rp.kp for kprobe use */
> +	unsigned long __percpu	*nhit;
> +	const char		*symbol;	/* symbol name */
> +	struct trace_probe	tp;
> +};
> +
> +#ifdef CONFIG_KPROBE_EVENTS
> +bool trace_kprobe_on_func_entry(struct trace_kprobe *tp);
> +bool trace_kprobe_error_injectable(struct trace_kprobe *tp);
> +#else
> +static inline bool trace_kprobe_on_func_entry(struct trace_kprobe *tp)
> +{
> +	return false;
> +}
> +
> +static inline bool trace_kprobe_error_injectable(struct trace_kprobe *tp)
> +{
> +	return false;
> +}
> +#endif /* CONFIG_KPROBE_EVENTS */
> +
>  static inline unsigned int trace_probe_load_flag(struct trace_probe *tp)
>  {
>  	return smp_load_acquire(&tp->event->flags);
> @@ -329,6 +341,18 @@ trace_probe_primary_from_call(struct trace_event_call *call)
>  	return list_first_entry_or_null(&tpe->probes, struct trace_probe, list);
>  }
>  
> +static nokprobe_inline struct trace_kprobe *
> +trace_kprobe_primary_from_call(struct trace_event_call *call)
> +{
> +	struct trace_probe *tp;
> +
> +	tp = trace_probe_primary_from_call(call);
> +	if (WARN_ON_ONCE(!tp))
> +		return NULL;
> +
> +	return container_of(tp, struct trace_kprobe, tp);
> +}
> +
>  static inline struct list_head *trace_probe_probe_list(struct trace_probe *tp)
>  {
>  	return &tp->event->probes;
> -- 
> 2.47.3
> 


-- 
Masami Hiramatsu (Google) <mhiramat@kernel.org>

^ permalink raw reply

* Re: [PATCH v9 2/3] tracing: Remove the backup instance automatically after read
From: Masami Hiramatsu @ 2026-04-02 13:19 UTC (permalink / raw)
  To: Steven Rostedt; +Cc: Mathieu Desnoyers, linux-kernel, linux-trace-kernel
In-Reply-To: <20260401104001.5461c5f0@gandalf.local.home>

On Wed, 1 Apr 2026 10:40:01 -0400
Steven Rostedt <rostedt@goodmis.org> wrote:

> On Wed, 1 Apr 2026 12:19:57 +0900
> Masami Hiramatsu (Google) <mhiramat@kernel.org> wrote:
> 
> > > 
> > >   CPU 0							CPU 1
> > >   -----							-----
> > >   open(trace_pipe);
> > >   read(..);
> > >   close(trace_pipe);
> > >      kick the work queue to delete it....
> > > 						rmdir();
> > > 							[instance deleted]  
> > 
> > I thought this requires trace_types_lock, and after kicked the queue,
> > can rmdir() gets the tr? (__trace_array_get() return error if
> > tr->free_on_close is set)
> 
> rmdir() doesn't use __trace_array_get(), it uses trace_array_find() which
> we shouldn't need to modify.
> 
> static int instance_rmdir(const char *name)
> {
> 	struct trace_array *tr;
> 
> 	guard(mutex)(&event_mutex);
> 	guard(mutex)(&trace_types_lock);
> 
> 	tr = trace_array_find(name);
> 	if (!tr)
> 		return -ENODEV;
> 
> 	return __remove_instance(tr);
> }

Oops, OK it must be updated too.

Thanks,

> 
> > 
> > > 
> > >   __remove_instance();
> > > 
> > >    [ now the tr is freed, and the remove will crash!]
> > > 
> > > 
> > > What would prevent this is this is to use trace_array_destroy() that checks
> > > this and also adds the proper locking:
> > > 
> > > static void trace_array_autoremove(struct work_struct *work)
> > > {
> > > 	struct trace_array *tr = container_of(work, struct trace_array, autoremove_work);
> > > 
> > > 	trace_array_destroy(tr);
> > > }  
> > 
> > OK, let's use it.
> 
> Yes, by using trace_array_destroy(), it will fix this.
> 
> Thanks,
> 
> -- Steve


-- 
Masami Hiramatsu (Google) <mhiramat@kernel.org>

^ permalink raw reply

* Re: [RFC PATCH 2/4] trace: Allow kprobes to override livepatched functions
From: Yafang Shao @ 2026-04-02 13:20 UTC (permalink / raw)
  To: Menglong Dong
  Cc: jpoimboe, jikos, mbenes, pmladek, joe.lawrence, rostedt, mhiramat,
	mathieu.desnoyers, kpsingh, mattbobrowski, song, jolsa, ast,
	daniel, andrii, martin.lau, eddyz87, memxor, yonghong.song,
	live-patching, linux-kernel, linux-trace-kernel, bpf
In-Reply-To: <2261072.irdbgypaU6@7950hx>

On Thu, Apr 2, 2026 at 8:48 PM Menglong Dong <menglong.dong@linux.dev> wrote:
>
> On 2026/4/2 17:26, Yafang Shao wrote:
> > Introduce the ability for kprobes to override the return values of
> > functions that have been livepatched. This functionality is guarded by the
> > CONFIG_KPROBE_OVERRIDE_KLP_FUNC configuration option.
>
> Hi, Yafang. This is a interesting idea.
>
> For now, the bpf_override_return() can only be used on the kernel
> functions that allow error injection to prevent the BPF program from
> crash the kernel. If we use it on the kernel functions that patched
> by the KLP, we can crash the kernel easily by return a invalid value
> with bpf_override_return(), right? (Of course, we can crash the kernel
> easily with KLP too ;)

Right.
Livepatch already grants the power to modify the kernel at will;
allowing BPF to override a patched function simply adds a layer of
runtime programmability to an existing modification.

>
> I haven't figure out the use case yet. Can KLP be used together with
> the BPF program that use bpf_override_return()?

The two mechanisms do not target the same entry point: whileKLP
modifies the original kernel function, bpf_override_return() is
applied to the newly patched function provided by the KLP module.

> The KLP will modify
> the RIP on the stack, and the bpf_override_return() will modify it too.
> AFAIK, there can't be two ftrace_ops that both have the
> FTRACE_OPS_FL_IPMODIFY flag. Did I miss something?

Correct, but as noted, they target different functions

>
> It will be helpful for me to understand the use case if a selftests is
> offered :)

Here is a recent use case from our production environment.

- The livepatch

diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index e378bbe5705f..047e937bfa6d 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -5175,12 +5175,22 @@ int bond_update_slave_arr(struct bonding
*bond, struct slave *skipslave)
        return ret;
 }

+/* noclone to avoid bond_get_slave_hook.constprop.0 */
+__attribute__((__noclone__, __noinline__))
+int bond_get_slave_hook(struct sk_buff *skb, u32 hash, unsigned int count)
+{
+       return -1;
+}

 static struct slave *bond_xmit_3ad_xor_slave_get(struct bonding *bond,
                                                 struct sk_buff *skb,
                                                 struct bond_up_slave *slaves)
 {
        struct slave *slave;
        unsigned int count;
+       int slave_idx;
        u32 hash;

        hash = bond_xmit_hash(bond, skb);
@@ -5188,6 +5198,13 @@ static struct slave
*bond_xmit_3ad_xor_slave_get(struct bonding *bond,
        if (unlikely(!count))
                return NULL;

+       /* Try BPF hook first - returns slave index directly */
+       slave_idx = bond_get_slave_hook(skb, hash, count);
+       /* If BPF hook returned valid slave index, use it */
+       if (slave_idx >= 0 && slave_idx < count) {
+               slave = slaves->arr[slave_idx];
+               return slave;
+       }
        slave = slaves->arr[hash % count];
        return slave;
 }

- The BPF program

SEC("kprobe/bond_get_slave_hook")
int BPF_KPROBE(slave_selector, struct sk_buff *skb, u32 hash, u32 count)
{
        unsigned short net_hdr_off;
        unsigned char *head;
        struct iphdr iph;
        int *slave_idx;
        __u32 daddr;

        __u16 proto = BPF_CORE_READ(skb, protocol);
        if (proto != bpf_htons(0x0800))
                return 0;

        head = BPF_CORE_READ(skb, head);
        net_hdr_off = BPF_CORE_READ(skb, network_header);

        if (bpf_probe_read_kernel(&iph, sizeof(iph), head + net_hdr_off) != 0)
                return 0;

        daddr = iph.daddr;
        slave_idx = bpf_map_lookup_elem(&ip_slave_map, &daddr);
        if (slave_idx) {
                int idx = *slave_idx;

                if (idx >= 0 && idx < (int)count)
                        bpf_override_return(ctx, idx);
        }
        return 0;
}

>
> BTW, if we allow the usage of bpf_override_return() on the KLP patched
> function, we should allow the usage of BPF_MODIFY_RETURN on this
> case too, right?

It's a possibility, but I haven't tested that specifically yet.

-- 
Regards
Yafang

^ permalink raw reply related

* Re: [PATCH v2 0/2] Fix trace remotes read with an offline CPU
From: Marc Zyngier @ 2026-04-02 13:37 UTC (permalink / raw)
  To: rostedt, mhiramat, mathieu.desnoyers, linux-trace-kernel,
	Vincent Donnefort
  Cc: kernel-team, linux-kernel
In-Reply-To: <20260401045100.3394299-1-vdonnefort@google.com>

On Wed, 01 Apr 2026 05:50:58 +0100, Vincent Donnefort wrote:
> This small series is fixing non-consuming read of a trace remote when the
> trace_buffer is created after a CPU is offline.
> 
> It also extends hotplug testing coverage to include this test case.
> 
> I have based this series on top of kvmarm/next which contains the hypervisor
> tracing patches.
> 
> [...]

Applied to next, thanks!

[1/2] tracing: Non-consuming read for trace remotes with an offline CPU
      commit: ce47b798ed1e44a6ae2c2966cdf7cba6b428083e
[2/2] tracing: selftests: Extend hotplug testing for trace remotes
      commit: ec07906bdc52848bd7dc93d1d44e642dcdc7a15a

Cheers,

	M.
-- 
Without deviation from the norm, progress is not possible.



^ permalink raw reply

* Re: [PATCH v9 2/3] tracing: Remove the backup instance automatically after read
From: Steven Rostedt @ 2026-04-02 14:52 UTC (permalink / raw)
  To: Masami Hiramatsu (Google)
  Cc: Mathieu Desnoyers, linux-kernel, linux-trace-kernel
In-Reply-To: <20260402221943.e0ba663a6a223f7f857adaf1@kernel.org>

On Thu, 2 Apr 2026 22:19:43 +0900
Masami Hiramatsu (Google) <mhiramat@kernel.org> wrote:
> > 
> > rmdir() doesn't use __trace_array_get(), it uses trace_array_find() which
> > we shouldn't need to modify.
> > 


> Oops, OK it must be updated too.

No it doesn't. Use trace_array_destroy() (as mentioned below) and all will
be fine.

-- Steve

> > > > 
> > > > What would prevent this is this is to use trace_array_destroy() that checks
> > > > this and also adds the proper locking:
> > > > 
> > > > static void trace_array_autoremove(struct work_struct *work)
> > > > {
> > > > 	struct trace_array *tr = container_of(work, struct trace_array, autoremove_work);
> > > > 
> > > > 	trace_array_destroy(tr);
> > > > }    
> > > 
> > > OK, let's use it.  
> > 
> > Yes, by using trace_array_destroy(), it will fix this.
> > 

^ permalink raw reply

* Re: [GIT PULL] RTLA changes for v7.1
From: Tomas Glozar @ 2026-04-02 15:08 UTC (permalink / raw)
  To: Steven Rostedt
  Cc: Costa Shulyupin, Wander Lairson Costa, LKML, linux-trace-kernel
In-Reply-To: <20260329122202.65a8b575@robin>

ne 29. 3. 2026 v 18:22 odesílatel Steven Rostedt <rostedt@goodmis.org> napsal:
>
> That should probably be fixed on top of v7.0-rcX so that it is not
> broken in 7.0.
>
> -- Steve
>

After merging the fix for 7.0 [1], there's now a context difference
caused by commit ea06305ff9920 (tools/rtla: Remove unneeded nr_cpus
arguments) on merging rtla-v7.1 onto the current master. The context
difference merges cleanly via three-way merge:

$ git merge rtla-v7.1
Auto-merging tools/tracing/rtla/src/timerlat_bpf.h
Merge made by the 'ort' strategy.
...

Do you prefer me to rebase this PR on top of 7.0-rc6 once it's tagged
or to leave the pull request as is and perhaps add a note to your PR
to Linus the merge difference is expected?

[1] https://lore.kernel.org/all/177490453553.1933951.12021005257041359513.pr-tracker-bot@kernel.org/

Tomas

^ permalink raw reply

* Re: NULL pointer dereference when booting ppc64_guest_defconfig in QEMU on -next
From: Mathieu Desnoyers @ 2026-04-02 15:30 UTC (permalink / raw)
  To: Andrew Morton, Ritesh Harjani (IBM)
  Cc: Harry Yoo (Oracle), linuxppc-dev, Harry Yoo, Nathan Chancellor,
	Thomas Weißschuh, Michal Clapinski, Thomas Gleixner,
	Steven Rostedt, Masami Hiramatsu, linux-mm, linux-trace-kernel,
	linux-kernel, Srikar Dronamraju, Madhavan Srinivasan
In-Reply-To: <20260320192153.759d6fec57f04fb653a0dac7@linux-foundation.org>

On 2026-03-20 22:21, Andrew Morton wrote:
> On Sat, 21 Mar 2026 06:42:41 +0530 Ritesh Harjani (IBM) <ritesh.list@gmail.com> wrote:
> 
>> Looks like this is causing regressions in linux-next with warnings
>> similar to what Harry also pointed out. Do we have any solution for
>> this, or are we planning to hold on to this patch[1] and maybe even
>> remove it temporarily from linux-next, until this is fixed?
> 
> Yes, I'll disable this patchset.

Hi Andrew,

I have prepared fixes for this issue. On which branch should I rebase
them ? Do you still have the HPCC series in your branch or should I
send it anew ?

Thanks,

Mathieu

-- 
Mathieu Desnoyers
EfficiOS Inc.
https://www.efficios.com

^ permalink raw reply

* Re: [PATCH v6 4/4] selftests/ftrace: Add accept cases for fprobe list syntax
From: Ryan Chung @ 2026-04-02 15:45 UTC (permalink / raw)
  To: Masami Hiramatsu
  Cc: rostedt, corbet, shuah, mathieu.desnoyers, linux-kernel,
	linux-trace-kernel, linux-doc, linux-kselftest
In-Reply-To: <20260324131204.735c60133288e94718f20d31@kernel.org>

Hi Masami,

Thank you for your feedback. Unfortunately, I am not in the position
to continue working on this patch series for the foreseeable future.
If you or anyone else on the list would like to pick it up and carry
it forward, you are welcome to do so. I appreciate your time and
effort on this.

Best regards,
Seokwoo Chung

On Tue, 24 Mar 2026 at 00:12, Masami Hiramatsu <mhiramat@kernel.org> wrote:
>
> On Thu,  5 Feb 2026 08:58:42 -0500
> "Seokwoo Chung (Ryan)" <seokwoo.chung130@gmail.com> wrote:
>
> > Add fprobe_list.tc to test the comma-separated symbol list syntax
> > with :entry/:exit suffixes.  Three scenarios are covered:
> >
> >   1. List with default (entry) behavior and ! exclusion
> >   2. List with explicit :entry suffix
> >   3. List with :exit suffix for return probes
>
>
> Could you also add wildcard pattern test?
>
> >
> > Each test verifies that the correct functions appear in
> > enabled_functions and that excluded (!) symbols are absent.
> >
> > Note: The existing tests add_remove_fprobe.tc, fprobe_syntax_errors.tc,
> > and add_remove_fprobe_repeat.tc check their "requires" line against the
> > tracefs README for the old "%return" syntax pattern.  Since the README
> > now documents ":entry|:exit" instead, these tests report UNSUPPORTED.
> > Their "requires" lines need updating in a follow-up patch.
>
> This means you'll break the selftest. please fix those test first.
> (This fix must be done before "tracing/fprobe: Support comma-separated
> symbols and :entry/:exit" so that we can safely bisect it.)
>
> Thank you,
>
>
> >
> > Signed-off-by: Seokwoo Chung (Ryan) <seokwoo.chung130@gmail.com>
> > ---
> >  .../ftrace/test.d/dynevent/fprobe_list.tc     | 92 +++++++++++++++++++
> >  1 file changed, 92 insertions(+)
> >  create mode 100644 tools/testing/selftests/ftrace/test.d/dynevent/fprobe_list.tc
> >
> > diff --git a/tools/testing/selftests/ftrace/test.d/dynevent/fprobe_list.tc b/tools/testing/selftests/ftrace/test.d/dynevent/fprobe_list.tc
> > new file mode 100644
> > index 000000000000..45e57c6f487d
> > --- /dev/null
> > +++ b/tools/testing/selftests/ftrace/test.d/dynevent/fprobe_list.tc
> > @@ -0,0 +1,92 @@
> > +#!/bin/sh
> > +# SPDX-License-Identifier: GPL-2.0
> > +# description: Fprobe event list syntax and :entry/:exit suffixes
> > +# requires: dynamic_events "f[:[<group>/][<event>]] <func-name>[:entry|:exit] [<args>]":README
> > +
> > +# Setup symbols to test. These are common kernel functions.
> > +PLACE=vfs_read
> > +PLACE2=vfs_write
> > +PLACE3=vfs_open
> > +
> > +echo 0 > events/enable
> > +echo > dynamic_events
> > +
> > +# Get baseline count of enabled functions (should be 0 if clean, but be safe)
> > +if [ -f enabled_functions ]; then
> > +     ocnt=`cat enabled_functions | wc -l`
> > +else
> > +     ocnt=0
> > +fi
> > +
> > +# Test 1: List default (entry) with exclusion
> > +# Target: Trace vfs_read and vfs_open, but EXCLUDE vfs_write
> > +echo "f:test/list_entry $PLACE,!$PLACE2,$PLACE3" >> dynamic_events
> > +grep -q "test/list_entry" dynamic_events
> > +test -d events/test/list_entry
> > +
> > +echo 1 > events/test/list_entry/enable
> > +
> > +grep -q "$PLACE" enabled_functions
> > +grep -q "$PLACE3" enabled_functions
> > +! grep -q "$PLACE2" enabled_functions
> > +
> > +# Check count (Baseline + 2 new functions)
> > +cnt=`cat enabled_functions | wc -l`
> > +if [ $cnt -ne $((ocnt + 2)) ]; then
> > +     exit_fail
> > +fi
> > +
> > +# Cleanup Test 1
> > +echo 0 > events/test/list_entry/enable
> > +echo "-:test/list_entry" >> dynamic_events
> > +! grep -q "test/list_entry" dynamic_events
> > +
> > +# Count should return to baseline
> > +cnt=`cat enabled_functions | wc -l`
> > +if [ $cnt -ne $ocnt ]; then
> > +     exit_fail
> > +fi
> > +
> > +# Test 2: List with explicit :entry suffix
> > +# (Should behave exactly like Test 1)
> > +echo "f:test/list_entry_exp $PLACE,!$PLACE2,$PLACE3:entry" >> dynamic_events
> > +grep -q "test/list_entry_exp" dynamic_events
> > +test -d events/test/list_entry_exp
> > +
> > +echo 1 > events/test/list_entry_exp/enable
> > +
> > +grep -q "$PLACE" enabled_functions
> > +grep -q "$PLACE3" enabled_functions
> > +! grep -q "$PLACE2" enabled_functions
> > +
> > +cnt=`cat enabled_functions | wc -l`
> > +if [ $cnt -ne $((ocnt + 2)) ]; then
> > +     exit_fail
> > +fi
> > +
> > +# Cleanup Test 2
> > +echo 0 > events/test/list_entry_exp/enable
> > +echo "-:test/list_entry_exp" >> dynamic_events
> > +
> > +# Test 3: List with :exit suffix
> > +echo "f:test/list_exit $PLACE,!$PLACE2,$PLACE3:exit" >> dynamic_events
> > +grep -q "test/list_exit" dynamic_events
> > +test -d events/test/list_exit
> > +
> > +echo 1 > events/test/list_exit/enable
> > +
> > +# Even for return probes, enabled_functions lists the attached symbols
> > +grep -q "$PLACE" enabled_functions
> > +grep -q "$PLACE3" enabled_functions
> > +! grep -q "$PLACE2" enabled_functions
> > +
> > +cnt=`cat enabled_functions | wc -l`
> > +if [ $cnt -ne $((ocnt + 2)) ]; then
> > +     exit_fail
> > +fi
> > +
> > +# Cleanup Test 3
> > +echo 0 > events/test/list_exit/enable
> > +echo "-:test/list_exit" >> dynamic_events
> > +
> > +clear_trace
> > --
> > 2.43.0
> >
>
>
> --
> Masami Hiramatsu (Google) <mhiramat@kernel.org>

^ permalink raw reply

* Re: [PATCH RFC v4 10/44] KVM: guest_memfd: Add support for KVM_SET_MEMORY_ATTRIBUTES2
From: Ackerley Tng @ 2026-04-02 16:20 UTC (permalink / raw)
  To: Michael Roth
  Cc: aik, andrew.jones, binbin.wu, brauner, chao.p.peng, david,
	ira.weiny, jmattson, jroedel, jthoughton, oupton, pankaj.gupta,
	qperret, rick.p.edgecombe, rientjes, shivankg, steven.price,
	tabba, willy, wyihan, yan.y.zhao, forkloop, pratyush,
	suzuki.poulose, aneesh.kumar, Paolo Bonzini, Sean Christopherson,
	Thomas Gleixner, Ingo Molnar, Borislav Petkov, Dave Hansen, x86,
	H. Peter Anvin, Steven Rostedt, Masami Hiramatsu,
	Mathieu Desnoyers, Jonathan Corbet, Shuah Khan, Shuah Khan,
	Vishal Annapurve, Andrew Morton, Chris Li, Kairui Song,
	Kemeng Shi, Nhat Pham, Baoquan He, Barry Song, Axel Rasmussen,
	Yuanchu Xie, Wei Xu, Jason Gunthorpe, Vlastimil Babka, kvm,
	linux-kernel, linux-trace-kernel, linux-doc, linux-kselftest,
	linux-mm
In-Reply-To: <CAEvNRgFkusZeKxGctUpTTbYjdi7nZL1ZZar-gT7XRUOCZ2xtpw@mail.gmail.com>

Ackerley Tng <ackerleytng@google.com> writes:

>
> [...snip...]
>

>> In the case of SNP, there is a
>> documentation/parameter check in snp_launch_update() that needs to be
>> relaxed in order for userspace to be able to pass in a NULL 'src'
>> parameter (since, for in-place conversion, it would be initialized in place
>> as shared memory prior to the call, since by the time kvm_gmem_poulate()
>> it will have been set to private and therefore cannot be faulted in via
>> GUP (and if it could, we'd be unecessarily copying the src back on top
>> of itself since src/dst are the same).
>
>
> [...snip...]
>
>
> Btw, if snp_launch_update() is going to accept a NULL src parameter and
> launch-update the src in-place:
>
> + Will userspace have to set that memory to private before calling launch
>   update?
>     + If yes, then would we need some other mode of conversion that is
>       not ZERO and not quite PRESERVE (since PRESERVE is defined as that
>       the guest will see what the host wrote post-encryption, but it
>       sounds like launch update is doing the encryption)
> + Or should launch update be called when that memory is shared? Will
>   launch update then also set that memory to private in guest_memfd?
>

Update after today's guest_memfd biweekly:

guest_memfd's populate will first check that the memory is shared, then
also set the memory to private after the populate.

KVM must not make assumptions about any memory that is private, so it
should actually only be operating on memory that is shared. This is
aligned with pre-in-place-conversion, since before this series, there
was no way to populate from private memory anyway.

>>
>> [...snip...]
>>

^ permalink raw reply

* Re: [GIT PULL] RTLA changes for v7.1
From: Steven Rostedt @ 2026-04-02 16:26 UTC (permalink / raw)
  To: Tomas Glozar
  Cc: Costa Shulyupin, Wander Lairson Costa, LKML, linux-trace-kernel
In-Reply-To: <CAP4=nvRZ2iEVxSgwHwumgonrhdvkYiG9KsCh0S2kwBwnemMi6A@mail.gmail.com>

On Thu, 2 Apr 2026 17:08:36 +0200
Tomas Glozar <tglozar@redhat.com> wrote:

> After merging the fix for 7.0 [1], there's now a context difference
> caused by commit ea06305ff9920 (tools/rtla: Remove unneeded nr_cpus
> arguments) on merging rtla-v7.1 onto the current master. The context
> difference merges cleanly via three-way merge:
> 
> $ git merge rtla-v7.1
> Auto-merging tools/tracing/rtla/src/timerlat_bpf.h
> Merge made by the 'ort' strategy.
> ...
> 
> Do you prefer me to rebase this PR on top of 7.0-rc6 once it's tagged
> or to leave the pull request as is and perhaps add a note to your PR
> to Linus the merge difference is expected?

This is fine as is. Linus is used to this. He's even OK with minor merge
conflicts. The only time you really need to tell Linus about something is
if the merge conflicts or a merge causes something to break but merges
cleanly (like removing an extra #endif)

-- Steve

^ permalink raw reply

page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox