Linux Documentation
 help / color / mirror / Atom feed
* [RFC PATCH v2 12/14] selftests/kcov_dataflow: add rust_ffi_contract test module
From: Yunseong Kim @ 2026-06-11 16:21 UTC (permalink / raw)
  To: Ingo Molnar, Peter Zijlstra, Juri Lelli, Vincent Guittot,
	Dietmar Eggemann, Steven Rostedt, Ben Segall, Mel Gorman,
	Valentin Schneider, K Prateek Nayak, Andrey Konovalov,
	Alexander Potapenko, Dmitry Vyukov, Andrew Morton, Miguel Ojeda,
	Boqun Feng, Gary Guo, Björn Roy Baron, Benno Lossin,
	Andreas Hindborg, Alice Ryhl, Trevor Gross, Danilo Krummrich,
	Nathan Chancellor, Nicolas Schier, Nick Desaulniers,
	Bill Wendling, Justin Stitt, Kees Cook, David Hildenbrand,
	Lorenzo Stoakes, Liam R. Howlett, Vlastimil Babka, Mike Rapoport,
	Suren Baghdasaryan, Michal Hocko, Shuah Khan, Jonathan Corbet,
	Shuah Khan, Yunseong Kim
  Cc: linux-kernel, kasan-dev, rust-for-linux, linux-kbuild, llvm,
	linux-mm, linux-kselftest, workflows, linux-doc, Yeoreum Yun
In-Reply-To: <20260611-b4-kcov-dataflow-v2-v2-0-0a261da3987c@est.tech>

Demonstrates FFI contract violation detection. A C callee returns
success (0) but leaves buffer=NULL, violating the postcondition
"ret==0 implies buffer!=NULL". kcov_dataflow captures struct fields
at the boundary proving the violation without a crash or KASAN report.

Test:

  make LLVM=1 CC=clang \
    M=tools/testing/selftests/kcov_dataflow/rust_ffi_contract modules
  vng --user root --exec \
    "python3 tools/testing/selftests/kcov_dataflow/trigger-view.py \
      rust_ffi_contract -C 8 --ko \
      tools/testing/selftests/kcov_dataflow/rust_ffi_contract/rust_ffi_contract.ko"

Result:

  vfs_write(0x0)
  0x0 = full_proxy_write()
  full_proxy_write(0x0, 0x1, 0x0)
  0x8200080 = __debugfs_file_get()
  __debugfs_file_get(0x0)
  0x0 = __debugfs_file_get()
  0x0 = rust_ffi_trigger_write [rust_ffi_contract]()
  rust_ffi_trigger_write [rust_ffi_contract](0x0, 0x1, 0x0)
    ffi_alloc_buf [rust_ffi_contract](0xffffffff912288ad, 0x100, 0x0, 0x1)
    0x0 = ffi_alloc_buf [rust_ffi_contract]()
    _printk(0x6f635f6966663601)
      vprintk(0x6f635f6966663601, 0x8)
        vprintk_default(0x6f635f6966663601, 0x8)
          vprintk_emit(0x0, 0xffffffff, 0x0)
          0x0 = panic_on_this_cpu()
        0x0 = _prb_read_valid()
      0x0 = prb_read_valid()
    0x0 = console_unlock()
  0x3f = vprintk_emit()
  0x3f = vprintk_default()
  0x3f = vprintk()
  0x3f = _printk()
  ffi_check_result [rust_ffi_contract](0x0)
    _printk(0x6f635f6966663301)
      vprintk(0x6f635f6966663301, 0x8)
        vprintk_default(0x6f635f6966663301, 0x8)
          vprintk_emit(0x0, 0xffffffff, 0x0)
          0x0 = panic_on_this_cpu()
        0x0 = _prb_read_valid()
      0x0 = prb_read_valid()
    0x0 = console_unlock()
  0x3f = vprintk_emit()
  0x3f = vprintk_default()
  0x3f = vprintk()
  0x3f = _printk()
  0xfffffff2 = ffi_check_result [rust_ffi_contract]()
  0x1 = rust_ffi_trigger_write [rust_ffi_contract]()
  0x1 = full_proxy_write()
  0x1 = vfs_write()
  0x1 = ksys_write()
  0x1 = __x64_sys_write()
  0x0 = fpregs_assert_state_consistent()
  0xba5748 = __x64_sys_close()
  file_close_fd(0x4)
  0x0 = file_close_fd()

Cc: Alexander Potapenko <glider@google.com>
Assisted-by: Claude:claude-opus-4-6 [kiro-chat]
Link: https://github.com/yskzalloc/kcov-dataflow/actions
Signed-off-by: Yunseong Kim <yunseong.kim@est.tech>
---
 tools/testing/selftests/kcov_dataflow/Makefile     |   2 +-
 tools/testing/selftests/kcov_dataflow/README.rst   |   8 ++
 .../kcov_dataflow/run_rust_ffi_contract.sh         |  35 +++++++
 .../kcov_dataflow/rust_ffi_contract/Makefile       |   3 +
 .../rust_ffi_contract/rust_ffi_contract.c          | 111 +++++++++++++++++++++
 5 files changed, 158 insertions(+), 1 deletion(-)

diff --git a/tools/testing/selftests/kcov_dataflow/Makefile b/tools/testing/selftests/kcov_dataflow/Makefile
index 3a42c54e954d..6412c90edfa1 100644
--- a/tools/testing/selftests/kcov_dataflow/Makefile
+++ b/tools/testing/selftests/kcov_dataflow/Makefile
@@ -1,4 +1,4 @@
 # SPDX-License-Identifier: GPL-2.0
 TEST_GEN_PROGS := user_ioctl/user_ioctl
-TEST_PROGS := run_eight_args_c.sh
+TEST_PROGS := run_eight_args_c.sh run_rust_ffi_contract.sh
 include ../lib.mk
diff --git a/tools/testing/selftests/kcov_dataflow/README.rst b/tools/testing/selftests/kcov_dataflow/README.rst
index 61a41f3bd596..06a0c805cc74 100644
--- a/tools/testing/selftests/kcov_dataflow/README.rst
+++ b/tools/testing/selftests/kcov_dataflow/README.rst
@@ -48,3 +48,11 @@ eight_args_rust/
 
         make LLVM=1 CC=clang M=tools/testing/selftests/kcov_dataflow/eight_args_rust modules
         python3 trigger-view.py eight_args_rust
+
+rust_ffi_contract/
+    Demonstrates FFI contract violation detection. A callee returns
+    success but leaves buffer=NULL. kcov_dataflow captures struct
+    fields proving the violation::
+
+        make LLVM=1 CC=clang M=tools/testing/selftests/kcov_dataflow/rust_ffi_contract modules
+        python3 trigger-view.py rust_ffi_contract
diff --git a/tools/testing/selftests/kcov_dataflow/run_rust_ffi_contract.sh b/tools/testing/selftests/kcov_dataflow/run_rust_ffi_contract.sh
new file mode 100755
index 000000000000..8662e532296b
--- /dev/null
+++ b/tools/testing/selftests/kcov_dataflow/run_rust_ffi_contract.sh
@@ -0,0 +1,35 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+# Test rust_ffi_contract module capture via kcov_dataflow
+DIR="$(dirname "$0")"
+KO="$DIR/rust_ffi_contract/rust_ffi_contract.ko"
+
+if [ ! -f "$KO" ]; then
+	echo "SKIP: $KO not found"
+	echo "Build: make LLVM=1 CC=clang M=...rust_ffi_contract modules""
+	exit 4  # kselftest SKIP
+fi
+
+if [ ! -e /sys/kernel/debug/kcov_dataflow ]; then
+	echo "SKIP: kcov_dataflow not available"
+	exit 4
+fi
+
+OUTPUT=$(python3 "$DIR/trigger-view.py" rust_ffi_contract --ko "$KO" --raw 2>&1)
+RC=$?
+
+if [ $RC -ne 0 ]; then
+	echo "FAIL: trigger-and-view exited with $RC"
+	echo "$OUTPUT"
+	exit 1
+fi
+
+RECORDS=$(echo "$OUTPUT" | grep -c "^\[ENTRY\]\|^\[RET")
+if [ "$RECORDS" -gt 0 ]; then
+	echo "PASS: captured $RECORDS records from rust_ffi_contract"
+	exit 0
+else
+	echo "FAIL: no records captured"
+	echo "$OUTPUT"
+	exit 1
+fi
diff --git a/tools/testing/selftests/kcov_dataflow/rust_ffi_contract/Makefile b/tools/testing/selftests/kcov_dataflow/rust_ffi_contract/Makefile
new file mode 100644
index 000000000000..d2a0261070b1
--- /dev/null
+++ b/tools/testing/selftests/kcov_dataflow/rust_ffi_contract/Makefile
@@ -0,0 +1,3 @@
+# SPDX-License-Identifier: GPL-2.0
+obj-m := rust_ffi_contract.o
+KCOV_DATAFLOW_rust_ffi_contract.o := y
diff --git a/tools/testing/selftests/kcov_dataflow/rust_ffi_contract/rust_ffi_contract.c b/tools/testing/selftests/kcov_dataflow/rust_ffi_contract/rust_ffi_contract.c
new file mode 100644
index 000000000000..9cbb17c42195
--- /dev/null
+++ b/tools/testing/selftests/kcov_dataflow/rust_ffi_contract/rust_ffi_contract.c
@@ -0,0 +1,111 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * rust_ffi_contract.c - Demonstrates kcov_dataflow detecting an FFI
+ * contract violation at a function boundary.
+ *
+ * The pattern: caller passes a struct pointer to callee. Callee's
+ * contract says "returns 0 implies out->buffer is valid". A bug in
+ * the async path returns 0 but leaves buffer=NULL.
+ *
+ * kcov_dataflow captures:
+ *   [ENTRY] ffi_alloc_buf(alloc={.buffer=NULL, .data_size=0}, 256, 16, 1)
+ *   [RET]   ffi_alloc_buf() = 0
+ *   [ENTRY] ffi_check_result(alloc={.buffer=NULL, ...})
+ *                             ^ proves contract violated
+ *
+ * Write to /sys/kernel/debug/kcov_dataflow_test/rust_ffi_trigger to run.
+ */
+#include <linux/module.h>
+#include <linux/debugfs.h>
+#include <linux/slab.h>
+
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("FFI contract violation detection via kcov_dataflow");
+
+struct ffi_alloc {
+	void *buffer;
+	u64 data_size;
+	u32 free_async;
+	u32 flags;
+};
+
+/* Prototypes */
+int ffi_alloc_buf(struct ffi_alloc *alloc, u64 data_size,
+		  u64 offsets_size, int is_async);
+int ffi_check_result(struct ffi_alloc *alloc);
+
+/*
+ * Callee with contract: returns 0 implies alloc->buffer is valid.
+ * BUG: async path with free_async==0 returns 0 but buffer stays NULL.
+ */
+noinline int ffi_alloc_buf(struct ffi_alloc *alloc, u64 data_size,
+			   u64 offsets_size, int is_async)
+{
+	if (!is_async) {
+		alloc->buffer = kmalloc(data_size, GFP_KERNEL);
+		if (!alloc->buffer)
+			return -ENOMEM;
+		return 0;
+	}
+	/* BUG: returns success but buffer is NULL when pool empty */
+	if (alloc->free_async == 0) {
+		alloc->buffer = NULL;
+		return 0; /* contract violation */
+	}
+	alloc->buffer = kmalloc(data_size, GFP_KERNEL);
+	alloc->free_async--;
+	return 0;
+}
+EXPORT_SYMBOL(ffi_alloc_buf);
+
+/* Caller that trusts the contract */
+noinline int ffi_check_result(struct ffi_alloc *alloc)
+{
+	if (!alloc->buffer) {
+		pr_err("ffi_contract: VIOLATION detected - buffer is NULL after success\n");
+		return -EFAULT;
+	}
+	kfree(alloc->buffer);
+	return 0;
+}
+EXPORT_SYMBOL(ffi_check_result);
+
+static struct dentry *test_dir;
+
+static ssize_t rust_ffi_trigger_write(struct file *f, const char __user *buf,
+				 size_t count, loff_t *ppos)
+{
+	struct ffi_alloc alloc = { .buffer = NULL, .data_size = 0,
+				   .free_async = 0, .flags = 0 };
+	int ret;
+
+	/* Trigger the bug: is_async=1, free_async=0 */
+	ret = ffi_alloc_buf(&alloc, 256, 16, 1);
+	pr_info("ffi_contract: ffi_alloc_buf returned %d, buffer=%p\n",
+		ret, alloc.buffer);
+
+	if (ret == 0)
+		ffi_check_result(&alloc);
+
+	return count;
+}
+
+static const struct file_operations rust_ffi_trigger_fops = {
+	.write = rust_ffi_trigger_write,
+};
+
+static int __init ffi_contract_init(void)
+{
+	test_dir = debugfs_create_dir("kcov_dataflow_test", NULL);
+	debugfs_create_file("rust_ffi_trigger", 0200, test_dir, NULL,
+			    &rust_ffi_trigger_fops);
+	return 0;
+}
+
+static void __exit ffi_contract_exit(void)
+{
+	debugfs_remove_recursive(test_dir);
+}
+
+module_init(ffi_contract_init);
+module_exit(ffi_contract_exit);

-- 
2.43.0


^ permalink raw reply related

* [RFC PATCH v2 11/14] selftests/kcov_dataflow: add eight_args_rust test module
From: Yunseong Kim @ 2026-06-11 16:21 UTC (permalink / raw)
  To: Ingo Molnar, Peter Zijlstra, Juri Lelli, Vincent Guittot,
	Dietmar Eggemann, Steven Rostedt, Ben Segall, Mel Gorman,
	Valentin Schneider, K Prateek Nayak, Andrey Konovalov,
	Alexander Potapenko, Dmitry Vyukov, Andrew Morton, Miguel Ojeda,
	Boqun Feng, Gary Guo, Björn Roy Baron, Benno Lossin,
	Andreas Hindborg, Alice Ryhl, Trevor Gross, Danilo Krummrich,
	Nathan Chancellor, Nicolas Schier, Nick Desaulniers,
	Bill Wendling, Justin Stitt, Kees Cook, David Hildenbrand,
	Lorenzo Stoakes, Liam R. Howlett, Vlastimil Babka, Mike Rapoport,
	Suren Baghdasaryan, Michal Hocko, Shuah Khan, Jonathan Corbet,
	Shuah Khan, Yunseong Kim
  Cc: linux-kernel, kasan-dev, rust-for-linux, linux-kbuild, llvm,
	linux-mm, linux-kselftest, workflows, linux-doc, Yeoreum Yun
In-Reply-To: <20260611-b4-kcov-dataflow-v2-v2-0-0a261da3987c@est.tech>

Rust module exercising 1-8 argument functions plus struct pointer.
Verifies register-passed (1-6) and stack-passed (7-8) arguments.

Test:

  make LLVM=1 CC=clang RUSTC=$RUSTC RUST_LIB_SRC=$RUST_LIB_SRC \
    M=tools/testing/selftests/kcov_dataflow/eight_args_rust modules
  vng --user root --exec \
    "python3 tools/testing/selftests/kcov_dataflow/trigger-view.py \
      eight_args_rust -C 8 --ko \
      tools/testing/selftests/kcov_dataflow/eight_args_rust/eight_args_rust.ko"

Result:

  ksys_write(0x0, 0x1)
    fdget_pos(0x4)
    0xffff891481d2bc00 = fdget_pos()
  0x0 = vfs_write()
  vfs_write(0x0, 0x1, 0x0)
  0x0 = _RNvCs3p16QzTwthP_15eight_args_rust13write_handler [eight_args_rust]()
  _RNvCs3p16QzTwthP_15eight_args_rust13write_handler [eight_args_rust](0x0, 0x1, 0x0)
    rdf_func2 [eight_args_rust](0x11, 0x22)
    0x33 = rdf_func2 [eight_args_rust]()
    rdf_func3 [eight_args_rust](0x11, 0x22, 0x33)
    0x66 = rdf_func3 [eight_args_rust]()
    rdf_func4 [eight_args_rust](0x11, 0x22, 0x33, 0x44)
    0xaa = rdf_func4 [eight_args_rust]()
    rdf_func5 [eight_args_rust](0x11, 0x22, 0x33, 0x44, 0x55)
    0xff = rdf_func5 [eight_args_rust]()
    rdf_func6 [eight_args_rust](0x11, 0x22, 0x33, 0x44, 0x55, 0x66)
    0x165 = rdf_func6 [eight_args_rust]()
    rdf_func7 [eight_args_rust](0x11, 0x22, 0x33, 0x44, 0x55, 0x66, 0x77)
    0x1dc = rdf_func7 [eight_args_rust]()
    rdf_func8 [eight_args_rust](0x11, 0x22, 0x33, 0x44, 0x55, 0x66, 0x77, 0x88)
    0x264 = rdf_func8 [eight_args_rust]()
    rdf_func_struct [eight_args_rust](0xaaaa)
    0x16665 = rdf_func_struct [eight_args_rust]()
  0x1 = _RNvCs3p16QzTwthP_15eight_args_rust13write_handler [eight_args_rust]()
  0x1 = vfs_write()
  0x1 = ksys_write()
  0x1 = __x64_sys_write()
  0x0 = fpregs_assert_state_consistent()
  0xba5748 = __x64_sys_close()
  file_close_fd(0x4)
  0x0 = file_close_fd()
  0x0 = filp_flush()

Cc: Alexander Potapenko <glider@google.com>
Assisted-by: Claude:claude-opus-4-6 [kiro-chat]
Link: https://github.com/yskzalloc/kcov-dataflow/actions
Signed-off-by: Yunseong Kim <yunseong.kim@est.tech>
---
 tools/testing/selftests/kcov_dataflow/README.rst   |   7 +
 .../kcov_dataflow/eight_args_rust/Makefile         |   3 +
 .../eight_args_rust/eight_args_rust.rs             | 143 +++++++++++++++++++++
 .../selftests/kcov_dataflow/run_eight_args_rust.sh |  35 +++++
 4 files changed, 188 insertions(+)

diff --git a/tools/testing/selftests/kcov_dataflow/README.rst b/tools/testing/selftests/kcov_dataflow/README.rst
index e93b4e573504..61a41f3bd596 100644
--- a/tools/testing/selftests/kcov_dataflow/README.rst
+++ b/tools/testing/selftests/kcov_dataflow/README.rst
@@ -41,3 +41,10 @@ eight_args_c/
 
         make LLVM=1 CC=clang M=tools/testing/selftests/kcov_dataflow/eight_args_c modules
         python3 trigger-view.py eight_args_c
+
+eight_args_rust/
+    Rust equivalent of eight_args_c. Captures arguments at -O2 where
+    drgn/vmcore cannot. Requires CONFIG_RUST::
+
+        make LLVM=1 CC=clang M=tools/testing/selftests/kcov_dataflow/eight_args_rust modules
+        python3 trigger-view.py eight_args_rust
diff --git a/tools/testing/selftests/kcov_dataflow/eight_args_rust/Makefile b/tools/testing/selftests/kcov_dataflow/eight_args_rust/Makefile
new file mode 100644
index 000000000000..c1e9ea2c5622
--- /dev/null
+++ b/tools/testing/selftests/kcov_dataflow/eight_args_rust/Makefile
@@ -0,0 +1,3 @@
+# SPDX-License-Identifier: GPL-2.0
+obj-m := eight_args_rust.o
+KCOV_DATAFLOW_eight_args_rust.o := y
diff --git a/tools/testing/selftests/kcov_dataflow/eight_args_rust/eight_args_rust.rs b/tools/testing/selftests/kcov_dataflow/eight_args_rust/eight_args_rust.rs
new file mode 100644
index 000000000000..3026265cda97
--- /dev/null
+++ b/tools/testing/selftests/kcov_dataflow/eight_args_rust/eight_args_rust.rs
@@ -0,0 +1,143 @@
+// SPDX-License-Identifier: GPL-2.0
+//! Verify kcov_dataflow captures 1-8 argument Rust functions at -O2.
+//!
+//! This is the Rust equivalent of eight_args_c. Since rustc elides DWARF
+//! variable locations at -O2, drgn/vmcore cannot observe these arguments.
+//! kcov_dataflow captures them via the post-compilation pipeline.
+//!
+//! Write to /sys/kernel/debug/kcov_dataflow_test/trigger_rust to invoke.
+
+#![allow(missing_docs)]
+
+use kernel::prelude::*;
+use kernel::c_str;
+
+module! {
+    type: EightArgsRust,
+    name: "eight_args_rust",
+    authors: ["kcov-dataflow"],
+    description: "1-8 arg Rust verification for kcov_dataflow",
+    license: "GPL",
+}
+
+#[repr(C)]
+pub struct Pair {
+    pub x: u32,
+    pub y: u32,
+}
+
+#[no_mangle]
+#[inline(never)]
+pub extern "C" fn rdf_func1(a1: u64) -> u64 { a1 }
+
+#[no_mangle]
+#[inline(never)]
+pub extern "C" fn rdf_func2(a1: u64, a2: u64) -> u64 { a1 + a2 }
+
+#[no_mangle]
+#[inline(never)]
+pub extern "C" fn rdf_func3(a1: u64, a2: u64, a3: u64) -> u64 {
+    a1 + a2 + a3
+}
+
+#[no_mangle]
+#[inline(never)]
+pub extern "C" fn rdf_func4(a1: u64, a2: u64, a3: u64, a4: u64) -> u64 {
+    a1 + a2 + a3 + a4
+}
+
+#[no_mangle]
+#[inline(never)]
+pub extern "C" fn rdf_func5(a1: u64, a2: u64, a3: u64, a4: u64, a5: u64) -> u64 {
+    a1 + a2 + a3 + a4 + a5
+}
+
+#[no_mangle]
+#[inline(never)]
+pub extern "C" fn rdf_func6(
+    a1: u64, a2: u64, a3: u64, a4: u64, a5: u64, a6: u64,
+) -> u64 {
+    a1 + a2 + a3 + a4 + a5 + a6
+}
+
+#[no_mangle]
+#[inline(never)]
+pub extern "C" fn rdf_func7(
+    a1: u64, a2: u64, a3: u64, a4: u64, a5: u64, a6: u64, a7: u64,
+) -> u64 {
+    a1 + a2 + a3 + a4 + a5 + a6 + a7
+}
+
+#[no_mangle]
+#[inline(never)]
+pub extern "C" fn rdf_func8(
+    a1: u64, a2: u64, a3: u64, a4: u64, a5: u64, a6: u64, a7: u64, a8: u64,
+) -> u64 {
+    a1 + a2 + a3 + a4 + a5 + a6 + a7 + a8
+}
+
+#[no_mangle]
+#[inline(never)]
+pub extern "C" fn rdf_func_struct(p: *const Pair) -> u64 {
+    unsafe { (*p).x as u64 + (*p).y as u64 }
+}
+
+unsafe extern "C" fn write_handler(
+    _file: *mut kernel::bindings::file,
+    _buf: *const core::ffi::c_char,
+    count: usize,
+    _ppos: *mut kernel::bindings::loff_t,
+) -> kernel::ffi::c_long {
+    let p = Pair { x: 0xAAAA, y: 0xBBBB };
+
+    let mut sum: u64 = 0;
+    sum = sum.wrapping_add(rdf_func1(0x11));
+    sum = sum.wrapping_add(rdf_func2(0x11, 0x22));
+    sum = sum.wrapping_add(rdf_func3(0x11, 0x22, 0x33));
+    sum = sum.wrapping_add(rdf_func4(0x11, 0x22, 0x33, 0x44));
+    sum = sum.wrapping_add(rdf_func5(0x11, 0x22, 0x33, 0x44, 0x55));
+    sum = sum.wrapping_add(rdf_func6(0x11, 0x22, 0x33, 0x44, 0x55, 0x66));
+    sum = sum.wrapping_add(rdf_func7(0x11, 0x22, 0x33, 0x44, 0x55, 0x66, 0x77));
+    sum = sum.wrapping_add(rdf_func8(0x11, 0x22, 0x33, 0x44, 0x55, 0x66, 0x77, 0x88));
+    sum = sum.wrapping_add(rdf_func_struct(&p as *const Pair));
+    core::hint::black_box(sum);
+
+    count as kernel::ffi::c_long
+}
+
+#[repr(transparent)]
+struct SyncFops(kernel::bindings::file_operations);
+unsafe impl Sync for SyncFops {}
+
+static FOPS: SyncFops = SyncFops(kernel::bindings::file_operations {
+    write: Some(unsafe { core::mem::transmute(write_handler as *const ()) }),
+    ..unsafe { core::mem::zeroed() }
+});
+
+struct EightArgsRust {
+    d: *mut kernel::bindings::dentry,
+}
+
+impl kernel::Module for EightArgsRust {
+    fn init(_module: &'static ThisModule) -> Result<Self> {
+        let d = unsafe {
+            kernel::bindings::debugfs_create_file_unsafe(
+                c_str!("trigger_rust").as_char_ptr(),
+                0o222,
+                core::ptr::null_mut(),
+                core::ptr::null_mut(),
+                &FOPS.0,
+            )
+        };
+        Ok(Self { d })
+    }
+}
+
+impl Drop for EightArgsRust {
+    fn drop(&mut self) {
+        unsafe { kernel::bindings::debugfs_remove(self.d) };
+    }
+}
+
+unsafe impl Send for EightArgsRust {}
+unsafe impl Sync for EightArgsRust {}
diff --git a/tools/testing/selftests/kcov_dataflow/run_eight_args_rust.sh b/tools/testing/selftests/kcov_dataflow/run_eight_args_rust.sh
new file mode 100755
index 000000000000..c5f11866e19d
--- /dev/null
+++ b/tools/testing/selftests/kcov_dataflow/run_eight_args_rust.sh
@@ -0,0 +1,35 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+# Test eight_args_rust module capture via kcov_dataflow
+DIR="$(dirname "$0")"
+KO="$DIR/eight_args_rust/eight_args_rust.ko"
+
+if [ ! -f "$KO" ]; then
+	echo "SKIP: $KO not found"
+	echo "Build: make LLVM=1 CC=clang RUSTC=\$RUSTC M=...eight_args_rust modules""
+	exit 4  # kselftest SKIP
+fi
+
+if [ ! -e /sys/kernel/debug/kcov_dataflow ]; then
+	echo "SKIP: kcov_dataflow not available"
+	exit 4
+fi
+
+OUTPUT=$(python3 "$DIR/trigger-view.py" eight_args_rust --ko "$KO" --raw 2>&1)
+RC=$?
+
+if [ $RC -ne 0 ]; then
+	echo "FAIL: trigger-and-view exited with $RC"
+	echo "$OUTPUT"
+	exit 1
+fi
+
+RECORDS=$(echo "$OUTPUT" | grep -c "^\[ENTRY\]\|^\[RET")
+if [ "$RECORDS" -gt 0 ]; then
+	echo "PASS: captured $RECORDS records from eight_args_rust"
+	exit 0
+else
+	echo "FAIL: no records captured"
+	echo "$OUTPUT"
+	exit 1
+fi

-- 
2.43.0


^ permalink raw reply related

* [RFC PATCH v2 10/14] selftests/kcov_dataflow: add eight_args_c test module
From: Yunseong Kim @ 2026-06-11 16:21 UTC (permalink / raw)
  To: Ingo Molnar, Peter Zijlstra, Juri Lelli, Vincent Guittot,
	Dietmar Eggemann, Steven Rostedt, Ben Segall, Mel Gorman,
	Valentin Schneider, K Prateek Nayak, Andrey Konovalov,
	Alexander Potapenko, Dmitry Vyukov, Andrew Morton, Miguel Ojeda,
	Boqun Feng, Gary Guo, Björn Roy Baron, Benno Lossin,
	Andreas Hindborg, Alice Ryhl, Trevor Gross, Danilo Krummrich,
	Nathan Chancellor, Nicolas Schier, Nick Desaulniers,
	Bill Wendling, Justin Stitt, Kees Cook, David Hildenbrand,
	Lorenzo Stoakes, Liam R. Howlett, Vlastimil Babka, Mike Rapoport,
	Suren Baghdasaryan, Michal Hocko, Shuah Khan, Jonathan Corbet,
	Shuah Khan, Yunseong Kim
  Cc: linux-kernel, kasan-dev, rust-for-linux, linux-kbuild, llvm,
	linux-mm, linux-kselftest, workflows, linux-doc, Yeoreum Yun
In-Reply-To: <20260611-b4-kcov-dataflow-v2-v2-0-0a261da3987c@est.tech>

C module exercising 1-8 argument functions plus struct pointer.
Verifies register-passed (1-6) and stack-passed (7-8) arguments.

Test:

  make LLVM=1 CC=clang \
    M=tools/testing/selftests/kcov_dataflow/eight_args_c modules
  vng --user root --exec \
    "python3 tools/testing/selftests/kcov_dataflow/trigger-view.py \
      eight_args_c -C 8 --ko \
      tools/testing/selftests/kcov_dataflow/eight_args_c/eight_args_c.ko"

Result:

  # Loaded eight_args_c
  # Captured 6195 words
  # 578 records
  # showing 65 records with context=8 around eight_args_c

  vfs_write(0x0)
  0x0 = full_proxy_write()
  full_proxy_write(0x0, 0x1, 0x0)
  0x8200080 = __debugfs_file_get()
  __debugfs_file_get(0x0)
  0x0 = __debugfs_file_get()
  0x0 = trigger_write [eight_args_c]()
  trigger_write [eight_args_c](0x0, 0x1, 0x0)
    df_func2 [eight_args_c](0x11, 0x22)
    0x33 = df_func2 [eight_args_c]()
    df_func3 [eight_args_c](0x11, 0x22, 0x33)
    0x66 = df_func3 [eight_args_c]()
    df_func4 [eight_args_c](0x11, 0x22, 0x33, 0x44)
    0xaa = df_func4 [eight_args_c]()
    df_func5 [eight_args_c](0x11, 0x22, 0x33, 0x44, 0x55)
    0xff = df_func5 [eight_args_c]()
    df_func6 [eight_args_c](0x11, 0x22, 0x33, 0x44, 0x55, 0x66)
    0x165 = df_func6 [eight_args_c]()
    df_func7 [eight_args_c](0x11, 0x22, 0x33, 0x44, 0x55, 0x66, 0x77)
    0x1dc = df_func7 [eight_args_c]()
    df_func8 [eight_args_c](0x11, 0x22, 0x33, 0x44, 0x55, 0x66, 0x77, 0x88)
    0x264 = df_func8 [eight_args_c]()
    df_func_struct [eight_args_c](0xaaaa)
    0x16665 = df_func_struct [eight_args_c]()
  0x1 = trigger_write [eight_args_c]()
  0x1 = full_proxy_write()
  0x1 = vfs_write()
  0x1 = ksys_write()
  0x1 = __x64_sys_write()
  0x0 = fpregs_assert_state_consistent()
  0xba5748 = __x64_sys_close()
  file_close_fd(0x4)
  0x0 = file_close_fd()

Cc: Alexander Potapenko <glider@google.com>
Assisted-by: Claude:claude-opus-4-6 [kiro-chat]
Link: https://github.com/yskzalloc/kcov-dataflow/actions
Signed-off-by: Yunseong Kim <yunseong.kim@est.tech>
---
 tools/testing/selftests/kcov_dataflow/Makefile     |  1 +
 tools/testing/selftests/kcov_dataflow/README.rst   |  6 ++
 .../selftests/kcov_dataflow/eight_args_c/Makefile  |  3 +
 .../kcov_dataflow/eight_args_c/eight_args_c.c      | 95 ++++++++++++++++++++++
 .../selftests/kcov_dataflow/run_eight_args_c.sh    | 35 ++++++++
 5 files changed, 140 insertions(+)

diff --git a/tools/testing/selftests/kcov_dataflow/Makefile b/tools/testing/selftests/kcov_dataflow/Makefile
index b9fc1c5f0104..3a42c54e954d 100644
--- a/tools/testing/selftests/kcov_dataflow/Makefile
+++ b/tools/testing/selftests/kcov_dataflow/Makefile
@@ -1,3 +1,4 @@
 # SPDX-License-Identifier: GPL-2.0
 TEST_GEN_PROGS := user_ioctl/user_ioctl
+TEST_PROGS := run_eight_args_c.sh
 include ../lib.mk
diff --git a/tools/testing/selftests/kcov_dataflow/README.rst b/tools/testing/selftests/kcov_dataflow/README.rst
index 8b650a62acb1..e93b4e573504 100644
--- a/tools/testing/selftests/kcov_dataflow/README.rst
+++ b/tools/testing/selftests/kcov_dataflow/README.rst
@@ -35,3 +35,9 @@ trigger-view.py
 
         python3 trigger-view.py <module_name>
         python3 trigger-view.py <module_name> --raw
+
+eight_args_c/
+    C module with 1-8 argument functions + struct pointer::
+
+        make LLVM=1 CC=clang M=tools/testing/selftests/kcov_dataflow/eight_args_c modules
+        python3 trigger-view.py eight_args_c
diff --git a/tools/testing/selftests/kcov_dataflow/eight_args_c/Makefile b/tools/testing/selftests/kcov_dataflow/eight_args_c/Makefile
new file mode 100644
index 000000000000..aad45c7e3863
--- /dev/null
+++ b/tools/testing/selftests/kcov_dataflow/eight_args_c/Makefile
@@ -0,0 +1,3 @@
+# SPDX-License-Identifier: GPL-2.0
+obj-m := eight_args_c.o
+KCOV_DATAFLOW_eight_args_c.o := y
diff --git a/tools/testing/selftests/kcov_dataflow/eight_args_c/eight_args_c.c b/tools/testing/selftests/kcov_dataflow/eight_args_c/eight_args_c.c
new file mode 100644
index 000000000000..09fbbbf8d14b
--- /dev/null
+++ b/tools/testing/selftests/kcov_dataflow/eight_args_c/eight_args_c.c
@@ -0,0 +1,95 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * eight_args_c.c - Verify kcov_dataflow captures 1-8 argument functions.
+ *
+ * Write to /sys/kernel/debug/kcov_dataflow_test/trigger to invoke all
+ * eight functions and a struct-pointer function. Use with the
+ * kcov_dataflow selftest to verify correct capture of register-passed
+ * (1-6) and stack-passed (7-8) arguments on x86_64.
+ */
+#include <linux/module.h>
+#include <linux/debugfs.h>
+
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("KCOV dataflow 8-argument stress test");
+
+struct pair {
+	u32 x;
+	u32 y;
+};
+
+/* Prototypes */
+u64 df_func1(u64 a1);
+u64 df_func2(u64 a1, u64 a2);
+u64 df_func3(u64 a1, u64 a2, u64 a3);
+u64 df_func4(u64 a1, u64 a2, u64 a3, u64 a4);
+u64 df_func5(u64 a1, u64 a2, u64 a3, u64 a4, u64 a5);
+u64 df_func6(u64 a1, u64 a2, u64 a3, u64 a4, u64 a5, u64 a6);
+u64 df_func7(u64 a1, u64 a2, u64 a3, u64 a4, u64 a5, u64 a6, u64 a7);
+u64 df_func8(u64 a1, u64 a2, u64 a3, u64 a4, u64 a5, u64 a6, u64 a7,
+	     u64 a8);
+u64 df_func_struct(struct pair *p);
+
+/* Implementations - noinline ensures trace callbacks are emitted */
+#define DEF_FUNC(name, ret_expr, ...)				\
+noinline u64 name(__VA_ARGS__) { return (ret_expr); }		\
+EXPORT_SYMBOL(name)
+
+DEF_FUNC(df_func1, a1, u64 a1);
+DEF_FUNC(df_func2, a1 + a2, u64 a1, u64 a2);
+DEF_FUNC(df_func3, a1 + a2 + a3, u64 a1, u64 a2, u64 a3);
+DEF_FUNC(df_func4, a1 + a2 + a3 + a4, u64 a1, u64 a2, u64 a3, u64 a4);
+DEF_FUNC(df_func5, a1 + a2 + a3 + a4 + a5,
+	 u64 a1, u64 a2, u64 a3, u64 a4, u64 a5);
+DEF_FUNC(df_func6, a1 + a2 + a3 + a4 + a5 + a6,
+	 u64 a1, u64 a2, u64 a3, u64 a4, u64 a5, u64 a6);
+DEF_FUNC(df_func7, a1 + a2 + a3 + a4 + a5 + a6 + a7,
+	 u64 a1, u64 a2, u64 a3, u64 a4, u64 a5, u64 a6, u64 a7);
+DEF_FUNC(df_func8, a1 + a2 + a3 + a4 + a5 + a6 + a7 + a8,
+	 u64 a1, u64 a2, u64 a3, u64 a4, u64 a5, u64 a6, u64 a7, u64 a8);
+
+noinline u64 df_func_struct(struct pair *p)
+{
+	return (u64)p->x + (u64)p->y;
+}
+EXPORT_SYMBOL(df_func_struct);
+
+static struct dentry *test_dir;
+
+static ssize_t trigger_write(struct file *f, const char __user *buf,
+			     size_t count, loff_t *ppos)
+{
+	struct pair p = { .x = 0xAAAA, .y = 0xBBBB };
+	volatile u64 sum = 0;
+
+	sum += df_func1(0x11);
+	sum += df_func2(0x11, 0x22);
+	sum += df_func3(0x11, 0x22, 0x33);
+	sum += df_func4(0x11, 0x22, 0x33, 0x44);
+	sum += df_func5(0x11, 0x22, 0x33, 0x44, 0x55);
+	sum += df_func6(0x11, 0x22, 0x33, 0x44, 0x55, 0x66);
+	sum += df_func7(0x11, 0x22, 0x33, 0x44, 0x55, 0x66, 0x77);
+	sum += df_func8(0x11, 0x22, 0x33, 0x44, 0x55, 0x66, 0x77, 0x88);
+	sum += df_func_struct(&p);
+
+	return count;
+}
+
+static const struct file_operations trigger_fops = {
+	.write = trigger_write,
+};
+
+static int __init eight_args_init(void)
+{
+	test_dir = debugfs_create_dir("kcov_dataflow_test", NULL);
+	debugfs_create_file("trigger", 0200, test_dir, NULL, &trigger_fops);
+	return 0;
+}
+
+static void __exit eight_args_exit(void)
+{
+	debugfs_remove_recursive(test_dir);
+}
+
+module_init(eight_args_init);
+module_exit(eight_args_exit);
diff --git a/tools/testing/selftests/kcov_dataflow/run_eight_args_c.sh b/tools/testing/selftests/kcov_dataflow/run_eight_args_c.sh
new file mode 100755
index 000000000000..d24092e920ff
--- /dev/null
+++ b/tools/testing/selftests/kcov_dataflow/run_eight_args_c.sh
@@ -0,0 +1,35 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+# Test eight_args_c module capture via kcov_dataflow
+DIR="$(dirname "$0")"
+KO="$DIR/eight_args_c/eight_args_c.ko"
+
+if [ ! -f "$KO" ]; then
+	echo "SKIP: $KO not found"
+	echo "Build: make LLVM=1 CC=clang M=...eight_args_c modules"
+	exit 4  # kselftest SKIP
+fi
+
+if [ ! -e /sys/kernel/debug/kcov_dataflow ]; then
+	echo "SKIP: kcov_dataflow not available"
+	exit 4
+fi
+
+OUTPUT=$(python3 "$DIR/trigger-view.py" eight_args_c --ko "$KO" --raw 2>&1)
+RC=$?
+
+if [ $RC -ne 0 ]; then
+	echo "FAIL: trigger-and-view exited with $RC"
+	echo "$OUTPUT"
+	exit 1
+fi
+
+RECORDS=$(echo "$OUTPUT" | grep -c "^\[ENTRY\]\|^\[RET")
+if [ "$RECORDS" -gt 0 ]; then
+	echo "PASS: captured $RECORDS records from eight_args_c"
+	exit 0
+else
+	echo "FAIL: no records captured"
+	echo "$OUTPUT"
+	exit 1
+fi

-- 
2.43.0


^ permalink raw reply related

* [RFC PATCH v2 09/14] selftests/kcov_dataflow: add ioctl interface selftest
From: Yunseong Kim @ 2026-06-11 16:21 UTC (permalink / raw)
  To: Ingo Molnar, Peter Zijlstra, Juri Lelli, Vincent Guittot,
	Dietmar Eggemann, Steven Rostedt, Ben Segall, Mel Gorman,
	Valentin Schneider, K Prateek Nayak, Andrey Konovalov,
	Alexander Potapenko, Dmitry Vyukov, Andrew Morton, Miguel Ojeda,
	Boqun Feng, Gary Guo, Björn Roy Baron, Benno Lossin,
	Andreas Hindborg, Alice Ryhl, Trevor Gross, Danilo Krummrich,
	Nathan Chancellor, Nicolas Schier, Nick Desaulniers,
	Bill Wendling, Justin Stitt, Kees Cook, David Hildenbrand,
	Lorenzo Stoakes, Liam R. Howlett, Vlastimil Babka, Mike Rapoport,
	Suren Baghdasaryan, Michal Hocko, Shuah Khan, Jonathan Corbet,
	Shuah Khan, Yunseong Kim
  Cc: linux-kernel, kasan-dev, rust-for-linux, linux-kbuild, llvm,
	linux-mm, linux-kselftest, workflows, linux-doc, Yeoreum Yun
In-Reply-To: <20260611-b4-kcov-dataflow-v2-v2-0-0a261da3987c@est.tech>

Add kselftest_harness-based test in user_ioctl/ covering the
kcov_dataflow ioctl interface (9 TAP cases): init, mmap, enable,
disable, error paths, double-enable rejection, and record capture.

Test:

  make -C tools/testing/selftests/kcov_dataflow
  ./user_ioctl/user_ioctl

Result:

  TAP version 13
  1..9
  # Starting 9 tests from 1 test cases.
  #  RUN           kcov_dataflow.init_track ...
  #            OK  kcov_dataflow.init_track
  ok 1 kcov_dataflow.init_track
  #  RUN           kcov_dataflow.init_track_too_small ...
  #            OK  kcov_dataflow.init_track_too_small
  ok 2 kcov_dataflow.init_track_too_small
  #  RUN           kcov_dataflow.init_track_double ...
  #            OK  kcov_dataflow.init_track_double
  ok 3 kcov_dataflow.init_track_double
  #  RUN           kcov_dataflow.mmap_before_init ...
  #            OK  kcov_dataflow.mmap_before_init
  ok 4 kcov_dataflow.mmap_before_init
  #  RUN           kcov_dataflow.enable_disable ...
  #            OK  kcov_dataflow.enable_disable
  ok 5 kcov_dataflow.enable_disable
  #  RUN           kcov_dataflow.enable_without_mmap ...
  #            OK  kcov_dataflow.enable_without_mmap
  ok 6 kcov_dataflow.enable_without_mmap
  #  RUN           kcov_dataflow.disable_without_enable ...
  #            OK  kcov_dataflow.disable_without_enable
  ok 7 kcov_dataflow.disable_without_enable
  #  RUN           kcov_dataflow.double_enable ...
  #            OK  kcov_dataflow.double_enable
  ok 8 kcov_dataflow.double_enable
  #  RUN           kcov_dataflow.records_captured ...
  #            OK  kcov_dataflow.records_captured

Cc: Alexander Potapenko <glider@google.com>
Assisted-by: Claude:claude-opus-4-6 [kiro-chat]
Link: https://github.com/yskzalloc/kcov-dataflow/actions
Signed-off-by: Yunseong Kim <yunseong.kim@est.tech>
---
 tools/testing/selftests/kcov_dataflow/.gitignore   |   8 ++
 tools/testing/selftests/kcov_dataflow/Makefile     |   3 +
 tools/testing/selftests/kcov_dataflow/README.rst   |  37 +++++
 .../kcov_dataflow/user_ioctl/user_ioctl.c          | 156 +++++++++++++++++++++
 4 files changed, 204 insertions(+)

diff --git a/tools/testing/selftests/kcov_dataflow/.gitignore b/tools/testing/selftests/kcov_dataflow/.gitignore
new file mode 100644
index 000000000000..f71fc89580f8
--- /dev/null
+++ b/tools/testing/selftests/kcov_dataflow/.gitignore
@@ -0,0 +1,8 @@
+# SPDX-License-Identifier: GPL-2.0
+user_ioctl/user_ioctl
+*.o
+*.ko
+*.mod
+*.mod.c
+Module.symvers
+modules.order
diff --git a/tools/testing/selftests/kcov_dataflow/Makefile b/tools/testing/selftests/kcov_dataflow/Makefile
new file mode 100644
index 000000000000..b9fc1c5f0104
--- /dev/null
+++ b/tools/testing/selftests/kcov_dataflow/Makefile
@@ -0,0 +1,3 @@
+# SPDX-License-Identifier: GPL-2.0
+TEST_GEN_PROGS := user_ioctl/user_ioctl
+include ../lib.mk
diff --git a/tools/testing/selftests/kcov_dataflow/README.rst b/tools/testing/selftests/kcov_dataflow/README.rst
new file mode 100644
index 000000000000..8b650a62acb1
--- /dev/null
+++ b/tools/testing/selftests/kcov_dataflow/README.rst
@@ -0,0 +1,37 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+KCOV-Dataflow Selftests
+========================
+
+This directory contains selftests for the KCOV-Dataflow subsystem
+(``/sys/kernel/debug/kcov_dataflow``).
+
+Prerequisites
+-------------
+
+Build the kernel with::
+
+    CONFIG_KCOV=y
+    CONFIG_KCOV_DATAFLOW_ARGS=y
+    CONFIG_KCOV_DATAFLOW_RET=y
+    CONFIG_DEBUG_INFO=y
+
+For full capture, also enable::
+
+    CONFIG_KCOV_DATAFLOW_INSTRUMENT_ALL=y
+
+Tests
+-----
+
+user_ioctl/user_ioctl.c
+    Automated ioctl interface test (9 TAP cases)::
+
+        make -C tools/testing/selftests/kcov_dataflow
+        ./user_ioctl/user_ioctl
+
+trigger-view.py
+    Loads a test module via finit_module() with recording active,
+    prints captured records with symbol resolution::
+
+        python3 trigger-view.py <module_name>
+        python3 trigger-view.py <module_name> --raw
diff --git a/tools/testing/selftests/kcov_dataflow/user_ioctl/user_ioctl.c b/tools/testing/selftests/kcov_dataflow/user_ioctl/user_ioctl.c
new file mode 100644
index 000000000000..48448bc02d2f
--- /dev/null
+++ b/tools/testing/selftests/kcov_dataflow/user_ioctl/user_ioctl.c
@@ -0,0 +1,156 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * kcov_dataflow_test.c - Selftest for /sys/kernel/debug/kcov_dataflow
+ *
+ * Verifies the ioctl interface: open, INIT_TRACK, mmap, ENABLE, DISABLE.
+ * With INSTRUMENT_ALL, also verifies that records are produced for
+ * syscalls executed while recording is active.
+ */
+#include <stdio.h>
+#include <stdlib.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <sys/ioctl.h>
+#include <sys/mman.h>
+#include <stdint.h>
+#include <string.h>
+#include <errno.h>
+
+#include "../../kselftest_harness.h"
+
+#define KCOV_DF_INIT_TRACK	_IOR('d', 1, unsigned long)
+#define KCOV_DF_ENABLE		_IO('d', 100)
+#define KCOV_DF_DISABLE		_IO('d', 101)
+
+#define BUF_SIZE 65536
+
+#define DF_TYPE_ENTRY	0xE
+#define DF_TYPE_RET	0xF
+
+FIXTURE(kcov_dataflow) {
+	int fd;
+	uint64_t *buf;
+};
+
+FIXTURE_SETUP(kcov_dataflow)
+{
+	self->fd = open("/sys/kernel/debug/kcov_dataflow", O_RDWR);
+	if (self->fd < 0)
+		SKIP(return, "kcov_dataflow not available (need CONFIG_KCOV_DATAFLOW_ARGS)");
+	self->buf = MAP_FAILED;
+}
+
+FIXTURE_TEARDOWN(kcov_dataflow)
+{
+	if (self->buf != MAP_FAILED)
+		munmap(self->buf, BUF_SIZE * sizeof(uint64_t));
+	if (self->fd >= 0)
+		close(self->fd);
+}
+
+TEST_F(kcov_dataflow, init_track)
+{
+	int ret = ioctl(self->fd, KCOV_DF_INIT_TRACK, (unsigned long)BUF_SIZE);
+
+	ASSERT_EQ(0, ret);
+}
+
+TEST_F(kcov_dataflow, init_track_too_small)
+{
+	int ret = ioctl(self->fd, KCOV_DF_INIT_TRACK, 1UL);
+
+	ASSERT_EQ(-1, ret);
+	ASSERT_EQ(EINVAL, errno);
+}
+
+TEST_F(kcov_dataflow, init_track_double)
+{
+	ASSERT_EQ(0, ioctl(self->fd, KCOV_DF_INIT_TRACK, (unsigned long)BUF_SIZE));
+	ASSERT_EQ(-1, ioctl(self->fd, KCOV_DF_INIT_TRACK, (unsigned long)BUF_SIZE));
+	ASSERT_EQ(EBUSY, errno);
+}
+
+TEST_F(kcov_dataflow, mmap_before_init)
+{
+	self->buf = mmap(NULL, BUF_SIZE * sizeof(uint64_t),
+			 PROT_READ | PROT_WRITE, MAP_SHARED, self->fd, 0);
+	ASSERT_EQ(MAP_FAILED, self->buf);
+}
+
+TEST_F(kcov_dataflow, enable_disable)
+{
+	ASSERT_EQ(0, ioctl(self->fd, KCOV_DF_INIT_TRACK, (unsigned long)BUF_SIZE));
+	self->buf = mmap(NULL, BUF_SIZE * sizeof(uint64_t),
+			 PROT_READ | PROT_WRITE, MAP_SHARED, self->fd, 0);
+	ASSERT_NE(MAP_FAILED, self->buf);
+	ASSERT_EQ(0, ioctl(self->fd, KCOV_DF_ENABLE, 0));
+	ASSERT_EQ(0, ioctl(self->fd, KCOV_DF_DISABLE, 0));
+}
+
+TEST_F(kcov_dataflow, enable_without_mmap)
+{
+	ASSERT_EQ(0, ioctl(self->fd, KCOV_DF_INIT_TRACK, (unsigned long)BUF_SIZE));
+	/* enable works even without mmap (mmap is optional for setup) */
+	ASSERT_EQ(0, ioctl(self->fd, KCOV_DF_ENABLE, 0));
+	ASSERT_EQ(0, ioctl(self->fd, KCOV_DF_DISABLE, 0));
+}
+
+TEST_F(kcov_dataflow, disable_without_enable)
+{
+	ASSERT_EQ(0, ioctl(self->fd, KCOV_DF_INIT_TRACK, (unsigned long)BUF_SIZE));
+	ASSERT_EQ(-1, ioctl(self->fd, KCOV_DF_DISABLE, 0));
+	ASSERT_EQ(EINVAL, errno);
+}
+
+TEST_F(kcov_dataflow, double_enable)
+{
+	int fd2;
+
+	ASSERT_EQ(0, ioctl(self->fd, KCOV_DF_INIT_TRACK, (unsigned long)BUF_SIZE));
+	self->buf = mmap(NULL, BUF_SIZE * sizeof(uint64_t),
+			 PROT_READ | PROT_WRITE, MAP_SHARED, self->fd, 0);
+	ASSERT_NE(MAP_FAILED, self->buf);
+	ASSERT_EQ(0, ioctl(self->fd, KCOV_DF_ENABLE, 0));
+
+	/* Second fd should fail to enable (task already active) */
+	fd2 = open("/sys/kernel/debug/kcov_dataflow", O_RDWR);
+	ASSERT_GE(fd2, 0);
+	ASSERT_EQ(0, ioctl(fd2, KCOV_DF_INIT_TRACK, (unsigned long)BUF_SIZE));
+	ASSERT_EQ(-1, ioctl(fd2, KCOV_DF_ENABLE, 0));
+	ASSERT_EQ(EBUSY, errno);
+	close(fd2);
+
+	ASSERT_EQ(0, ioctl(self->fd, KCOV_DF_DISABLE, 0));
+}
+
+TEST_F(kcov_dataflow, records_captured)
+{
+	uint64_t count;
+
+	ASSERT_EQ(0, ioctl(self->fd, KCOV_DF_INIT_TRACK, (unsigned long)BUF_SIZE));
+	self->buf = mmap(NULL, BUF_SIZE * sizeof(uint64_t),
+			 PROT_READ | PROT_WRITE, MAP_SHARED, self->fd, 0);
+	ASSERT_NE(MAP_FAILED, self->buf);
+	ASSERT_EQ(0, ioctl(self->fd, KCOV_DF_ENABLE, 0));
+
+	/* Trigger some kernel code in this task */
+	getpid();
+
+	ASSERT_EQ(0, ioctl(self->fd, KCOV_DF_DISABLE, 0));
+
+	count = self->buf[0];
+	/*
+	 * With INSTRUMENT_ALL, getpid() produces records.
+	 * Without it, count may be 0 (no instrumented code).
+	 * Either way, the interface works correctly.
+	 */
+	if (count > 0) {
+		uint64_t hdr = self->buf[1];
+		unsigned int type = (hdr >> 28) & 0xF;
+
+		/* First record should be ENTRY or RET */
+		ASSERT_TRUE(type == DF_TYPE_ENTRY || type == DF_TYPE_RET);
+	}
+}
+
+TEST_HARNESS_MAIN

-- 
2.43.0


^ permalink raw reply related

* [RFC PATCH v2 08/14] selftests/kcov_dataflow: add trigger-view.py
From: Yunseong Kim @ 2026-06-11 16:21 UTC (permalink / raw)
  To: Ingo Molnar, Peter Zijlstra, Juri Lelli, Vincent Guittot,
	Dietmar Eggemann, Steven Rostedt, Ben Segall, Mel Gorman,
	Valentin Schneider, K Prateek Nayak, Andrey Konovalov,
	Alexander Potapenko, Dmitry Vyukov, Andrew Morton, Miguel Ojeda,
	Boqun Feng, Gary Guo, Björn Roy Baron, Benno Lossin,
	Andreas Hindborg, Alice Ryhl, Trevor Gross, Danilo Krummrich,
	Nathan Chancellor, Nicolas Schier, Nick Desaulniers,
	Bill Wendling, Justin Stitt, Kees Cook, David Hildenbrand,
	Lorenzo Stoakes, Liam R. Howlett, Vlastimil Babka, Mike Rapoport,
	Suren Baghdasaryan, Michal Hocko, Shuah Khan, Jonathan Corbet,
	Shuah Khan, Yunseong Kim
  Cc: linux-kernel, kasan-dev, rust-for-linux, linux-kbuild, llvm,
	linux-mm, linux-kselftest, workflows, linux-doc, Yeoreum Yun
In-Reply-To: <20260611-b4-kcov-dataflow-v2-v2-0-0a261da3987c@est.tech>

Add a Python script that loads a test module, triggers its debugfs
entry with kcov_dataflow recording active, then pretty-prints captured
records as a nested call tree with kallsyms symbol resolution.

Features:
- 8MB ring buffer (1M u64 words) for INSTRUMENT_ALL kernels
- Enable recording after module load, before trigger (avoids VFS noise)
- Variable-length record parsing using header-encoded field count
- Module-only filtering via kallsyms symbol lookup
- --context/-C N: show N records before/after each module function call
- --raw: print raw records instead of call tree
- Architecture-aware syscall numbers (x86_64 and arm64)

Usage:

  python3 trigger-view.py eight_args_c \
    --ko eight_args_c/eight_args_c.ko

  python3 trigger-view.py eight_args_rust \
    --ko eight_args_rust/eight_args_rust.ko

  python3 trigger-view.py rust_ffi_contract \
    --ko rust_ffi_contract/rust_ffi_contract.ko

Cc: Alexander Potapenko <glider@google.com>
Assisted-by: Claude:claude-opus-4-6 [kiro-chat]
Link: https://github.com/yskzalloc/kcov-dataflow/actions
Signed-off-by: Yunseong Kim <yunseong.kim@est.tech>
---
 .../selftests/kcov_dataflow/trigger-view.py        | 377 +++++++++++++++++++++
 1 file changed, 377 insertions(+)

diff --git a/tools/testing/selftests/kcov_dataflow/trigger-view.py b/tools/testing/selftests/kcov_dataflow/trigger-view.py
new file mode 100755
index 000000000000..a3274e472dc1
--- /dev/null
+++ b/tools/testing/selftests/kcov_dataflow/trigger-view.py
@@ -0,0 +1,377 @@
+#!/usr/bin/env python3
+# SPDX-License-Identifier: GPL-2.0
+"""
+trigger-view.py - Load a module with kcov_dataflow
+recording active, then pretty-print captured records.
+
+Usage:
+    python3 trigger-view.py eight_args_c
+    python3 trigger-view.py rust_ffi_contract
+    python3 trigger-view.py eight_args_c --raw
+
+The script:
+  1. Opens /sys/kernel/debug/kcov_dataflow
+  2. Inits and mmaps the buffer
+  3. Enables recording for this process
+  4. Loads the module via finit_module() -- init runs in our context
+  5. Disables recording
+  6. Unloads the module
+  7. Parses and prints captured records with kallsyms resolution
+"""
+import os
+import sys
+import struct
+import ctypes
+import ctypes.util
+import argparse
+import fcntl
+
+# Constants
+DF_TYPE_ENTRY = 0xE
+DF_TYPE_RET = 0xF
+MAGIC_BAD = 0xBADADD85
+BUF_SIZE = 1048576  # 1M words = 8MB
+
+# Ioctl numbers
+def _IOR(t, nr, size):
+    return (2 << 30) | (ord(t) << 8) | nr | (size << 16)
+
+def _IO(t, nr):
+    return (ord(t) << 8) | nr
+
+KCOV_DF_INIT_TRACK = _IOR('d', 1, 8)
+KCOV_DF_ENABLE = _IO('d', 100)
+KCOV_DF_DISABLE = _IO('d', 101)
+
+# syscall numbers (x86_64)
+import platform
+_machine = platform.machine()
+if _machine == "aarch64":
+    SYS_FINIT_MODULE = 273
+    SYS_DELETE_MODULE = 106
+else:  # x86_64
+    SYS_FINIT_MODULE = 313
+    SYS_DELETE_MODULE = 176
+
+SELFTEST_DIR = os.path.dirname(os.path.abspath(__file__))
+
+
+def load_kallsyms():
+    """Load kernel symbols for PC resolution."""
+    syms = []
+    try:
+        with open("/proc/kallsyms") as f:
+            for line in f:
+                parts = line.split()
+                if len(parts) >= 3:
+                    addr = int(parts[0], 16)
+                    name = parts[2]
+                    mod = parts[3].strip("[]") if len(parts) > 3 else ""
+                    syms.append((addr, name, mod))
+    except (PermissionError, FileNotFoundError):
+        pass
+    syms.sort()
+    return syms
+
+
+def symbolize(pc, syms):
+    """Find nearest symbol <= pc."""
+    if not syms:
+        return f"0x{pc:x}"
+    lo, hi = 0, len(syms) - 1
+    while lo < hi:
+        mid = (lo + hi + 1) // 2
+        if syms[mid][0] <= pc:
+            lo = mid
+        else:
+            hi = mid - 1
+    addr, name, mod = syms[lo]
+    if addr > pc:
+        return f"0x{pc:x}"
+    offset = pc - addr
+    if mod:
+        return f"{name}+0x{offset:x} [{mod}]" if offset else f"{name} [{mod}]"
+    return f"{name}+0x{offset:x}" if offset else name
+
+
+def format_val(v):
+    """Format a captured value."""
+    if v == MAGIC_BAD:
+        return "FAULT"
+    if v == 0:
+        return "0x0"
+    return f"0x{v:x}"
+
+
+def find_module(name):
+    """Find the .ko file for the given test name."""
+    ko_path = os.path.join(SELFTEST_DIR, name, f"{name}_mod.ko")
+    if os.path.exists(ko_path):
+        return ko_path
+    # Try without _mod suffix
+    ko_path = os.path.join(SELFTEST_DIR, name, f"{name}.ko")
+    if os.path.exists(ko_path):
+        return ko_path
+    # Search for any .ko in the directory
+    mod_dir = os.path.join(SELFTEST_DIR, name)
+    if os.path.isdir(mod_dir):
+        for f in os.listdir(mod_dir):
+            if f.endswith(".ko"):
+                return os.path.join(mod_dir, f)
+    return None
+
+
+def finit_module(ko_path):
+    """Load a kernel module via finit_module syscall."""
+    libc = ctypes.CDLL(ctypes.util.find_library("c"), use_errno=True)
+    fd = os.open(ko_path, os.O_RDONLY)
+    ret = libc.syscall(SYS_FINIT_MODULE, fd, b"", 0)
+    os.close(fd)
+    if ret != 0:
+        errno = ctypes.get_errno()
+        raise OSError(errno, f"finit_module({ko_path}): {os.strerror(errno)}")
+
+
+def delete_module(name):
+    """Unload a kernel module."""
+    libc = ctypes.CDLL(ctypes.util.find_library("c"), use_errno=True)
+    ret = libc.syscall(SYS_DELETE_MODULE, name.encode(), 0)
+    if ret != 0:
+        errno = ctypes.get_errno()
+        raise OSError(errno, f"delete_module({name}): {os.strerror(errno)}")
+
+
+def parse_records(buf, total_words):
+    """Parse the ring buffer into a list of records."""
+    records = []
+    pos = 1
+    while pos + 3 <= total_words and pos < BUF_SIZE:
+        hdr = buf[pos]
+
+        # Valid headers fit in 32 bits (upper 32 must be zero)
+        if hdr >> 32:
+            pos += 1
+            continue
+
+        rtype = (hdr >> 28) & 0xF
+
+        if rtype not in (DF_TYPE_ENTRY, DF_TYPE_RET):
+            pos += 1
+            continue
+
+        pc = buf[pos + 1]
+        meta = buf[pos + 2]
+        seq = hdr & 0x00FFFFFF
+        num_vals = (hdr >> 24) & 0xF
+        if num_vals == 0:
+            num_vals = 1
+
+        # Valid records always have a non-zero PC (kernel text address)
+        if pc == 0:
+            pos += 1
+            continue
+
+        val = buf[pos + 3] if pos + 3 < BUF_SIZE else 0
+        records.append({
+            "type": rtype,
+            "seq": seq,
+            "pc": pc,
+            "meta": meta,
+            "val": val,
+        })
+        pos += 3 + num_vals
+    return records
+
+
+def print_raw(records, syms):
+    """Print records in raw format."""
+    for r in records:
+        sym = symbolize(r["pc"], syms)
+        t = "ENTRY" if r["type"] == DF_TYPE_ENTRY else "RET  "
+        arg_idx = (r["meta"] >> 56) & 0xFF
+        size = (r["meta"] >> 48) & 0xFF
+        print(f"[{t}] seq={r['seq']:3d} {sym} "
+              f"arg[{arg_idx}]({size}) = {format_val(r['val'])}")
+
+
+def print_tree(records, syms):
+    """Print records as indented call tree matching converted.txt format."""
+    depth = 0
+    # Group consecutive ENTRY records by PC to collect all args
+    i = 0
+    while i < len(records):
+        r = records[i]
+        sym = symbolize(r["pc"], syms)
+
+        if r["type"] == DF_TYPE_ENTRY:
+            # Collect all args for this call (same PC, consecutive entries)
+            args = []
+            pc = r["pc"]
+            while i < len(records) and records[i]["type"] == DF_TYPE_ENTRY \
+                    and records[i]["pc"] == pc:
+                args.append(format_val(records[i]["val"]))
+                i += 1
+            indent = "  " * depth
+            print(f"{indent}{sym}({', '.join(args)})")
+            depth += 1
+        else:
+            depth = max(0, depth - 1)
+            indent = "  " * depth
+            print(f"{indent}{format_val(r['val'])} = {sym}()")
+            i += 1
+
+
+def main():
+    parser = argparse.ArgumentParser(
+        description="Load a test module with kcov_dataflow and view records")
+    parser.add_argument("module", help="Test module name (e.g. eight_args_c)")
+    parser.add_argument("--raw", action="store_true",
+                        help="Print raw records instead of tree")
+    parser.add_argument("--ko", help="Explicit path to .ko file")
+    parser.add_argument("--context", "-C", type=int, default=0,
+                        help="Show N lines before/after each module record")
+    args = parser.parse_args()
+
+    # Find module
+    if args.ko:
+        ko_path = args.ko
+    else:
+        ko_path = find_module(args.module)
+    if not ko_path or not os.path.exists(ko_path):
+        print(f"Cannot find module for '{args.module}'", file=sys.stderr)
+        print(f"Build it first: make LLVM=1 CC=clang "
+              f"M=tools/testing/selftests/kcov_dataflow/{args.module} modules",
+              file=sys.stderr)
+        sys.exit(1)
+
+    # Open kcov_dataflow
+    # Ensure kallsyms shows real addresses
+    try:
+        with open("/proc/sys/kernel/kptr_restrict", "w") as f:
+            f.write("0")
+    except (PermissionError, FileNotFoundError):
+        pass
+
+    try:
+        df_fd = os.open("/sys/kernel/debug/kcov_dataflow", os.O_RDWR)
+    except OSError as e:
+        print(f"Cannot open kcov_dataflow: {e}", file=sys.stderr)
+        sys.exit(1)
+
+    # Init + mmap
+    fcntl.ioctl(df_fd, KCOV_DF_INIT_TRACK, BUF_SIZE)
+    libc = ctypes.CDLL(ctypes.util.find_library("c"), use_errno=True)
+    libc.mmap.restype = ctypes.c_void_p
+    libc.mmap.argtypes = [
+        ctypes.c_void_p, ctypes.c_size_t, ctypes.c_int,
+        ctypes.c_int, ctypes.c_int, ctypes.c_long
+    ]
+    buf_ptr = libc.mmap(None, BUF_SIZE * 8, 0x3, 0x01, df_fd, 0)
+    if buf_ptr == ctypes.c_void_p(-1).value:
+        print("mmap failed", file=sys.stderr)
+        sys.exit(1)
+    buf = (ctypes.c_uint64 * BUF_SIZE).from_address(buf_ptr)
+
+    # Load module first (generates noise with INSTRUMENT_ALL)
+    mod_name = os.path.basename(ko_path).replace(".ko", "")
+    try:
+        finit_module(ko_path)
+        print(f"# Loaded {mod_name}")
+    except OSError as e:
+        print(f"Failed to load module: {e}", file=sys.stderr)
+        sys.exit(1)
+
+    # Get module .text address for PC filtering
+    mod_text_start = 0
+    try:
+        with open(f"/sys/module/{mod_name}/sections/.text") as f:
+            mod_text_start = int(f.read().strip(), 16)
+    except (FileNotFoundError, ValueError, PermissionError):
+        pass
+
+    # Enable recording AFTER load, BEFORE trigger (avoids VFS/loader noise)
+    fcntl.ioctl(df_fd, KCOV_DF_ENABLE, 0)
+    buf[0] = 0
+
+    # Trigger the module's debugfs file to invoke test functions
+    trigger_paths = [
+        f"/sys/kernel/debug/kcov_dataflow_test/trigger",
+        f"/sys/kernel/debug/kcov_dataflow_test/rust_ffi_trigger",
+        f"/sys/kernel/debug/trigger_rust",
+        f"/sys/kernel/debug/{mod_name}/trigger",
+    ]
+    for tp in trigger_paths:
+        try:
+            with open(tp, "w") as f:
+                f.write("1")
+            break
+        except (FileNotFoundError, PermissionError):
+            continue
+
+    fcntl.ioctl(df_fd, KCOV_DF_DISABLE, 0)
+
+    # Read kallsyms while module is still loaded (symbols available)
+    syms = load_kallsyms()
+
+    # Unload
+    try:
+        delete_module(mod_name)
+    except OSError:
+        pass
+
+    # Parse and display
+    total = int(buf[0])
+    print(f"# Captured {total} words")
+    records = parse_records(buf, total)
+    print(f"# {len(records)} records")
+
+    # Filter to module records using kallsyms
+    # Build set of module symbol addresses for fast lookup
+    mod_syms = set()
+    for addr, name, mod in syms:
+        if mod == mod_name and addr != 0:
+            mod_syms.add(addr)
+
+    def is_module_pc(pc):
+        """Check if PC belongs to mod_name via kallsyms."""
+        if mod_syms:
+            # Binary search: find nearest symbol <= pc, check module
+            lo, hi = 0, len(syms) - 1
+            while lo < hi:
+                mid = (lo + hi + 1) // 2
+                if syms[mid][0] <= pc:
+                    lo = mid
+                else:
+                    hi = mid - 1
+            return syms[lo][2] == mod_name
+        # Fallback: if no module symbols (kptr_restrict), use .text start
+        return mod_text_start and pc >= mod_text_start
+
+    if syms or mod_text_start:
+        if args.context > 0:
+            module_indices = set()
+            for i, r in enumerate(records):
+                if is_module_pc(r["pc"]):
+                    for j in range(max(0, i - args.context),
+                                   min(len(records), i + args.context + 1)):
+                        module_indices.add(j)
+            records = [records[i] for i in sorted(module_indices)]
+            print(f"# showing {len(records)} records with context={args.context} "
+                  f"around {mod_name}\n")
+        else:
+            module_records = [r for r in records if is_module_pc(r["pc"])]
+            print(f"# {len(module_records)} from {mod_name}\n")
+            records = module_records
+    else:
+        print("")
+
+    if args.raw:
+        print_raw(records, syms)
+    else:
+        print_tree(records, syms)
+
+    os.close(df_fd)
+
+
+if __name__ == "__main__":
+    main()

-- 
2.43.0


^ permalink raw reply related

* [RFC PATCH v2 07/14] kcov: exclude kcov_dataflow.o from sanitizer instrumentation
From: Yunseong Kim @ 2026-06-11 16:21 UTC (permalink / raw)
  To: Ingo Molnar, Peter Zijlstra, Juri Lelli, Vincent Guittot,
	Dietmar Eggemann, Steven Rostedt, Ben Segall, Mel Gorman,
	Valentin Schneider, K Prateek Nayak, Andrey Konovalov,
	Alexander Potapenko, Dmitry Vyukov, Andrew Morton, Miguel Ojeda,
	Boqun Feng, Gary Guo, Björn Roy Baron, Benno Lossin,
	Andreas Hindborg, Alice Ryhl, Trevor Gross, Danilo Krummrich,
	Nathan Chancellor, Nicolas Schier, Nick Desaulniers,
	Bill Wendling, Justin Stitt, Kees Cook, David Hildenbrand,
	Lorenzo Stoakes, Liam R. Howlett, Vlastimil Babka, Mike Rapoport,
	Suren Baghdasaryan, Michal Hocko, Shuah Khan, Jonathan Corbet,
	Shuah Khan, Yunseong Kim
  Cc: linux-kernel, kasan-dev, rust-for-linux, linux-kbuild, llvm,
	linux-mm, linux-kselftest, workflows, linux-doc, Yeoreum Yun
In-Reply-To: <20260611-b4-kcov-dataflow-v2-v2-0-0a261da3987c@est.tech>

Exclude kcov_dataflow.o from KCOV, KASAN, KCSAN, UBSAN, and KMSAN
instrumentation, matching the exclusions already applied to kcov.o.
Without this, sanitizers instrumenting the dataflow callbacks would
cause infinite recursion.

Signed-off-by: Yunseong Kim <yunseong.kim@est.tech>
---
 kernel/Makefile | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/kernel/Makefile b/kernel/Makefile
index b70e524c4074..307b7fd1e1f9 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -44,6 +44,12 @@ KCSAN_SANITIZE_kcov.o := n
 UBSAN_SANITIZE_kcov.o := n
 KMSAN_SANITIZE_kcov.o := n
 
+KCOV_INSTRUMENT_kcov_dataflow.o := n
+KASAN_SANITIZE_kcov_dataflow.o := n
+KCSAN_SANITIZE_kcov_dataflow.o := n
+UBSAN_SANITIZE_kcov_dataflow.o := n
+KMSAN_SANITIZE_kcov_dataflow.o := n
+
 CONTEXT_ANALYSIS_kcov.o := y
 CFLAGS_kcov.o := $(call cc-option, -fno-conserve-stack) -fno-stack-protector
 

-- 
2.43.0


^ permalink raw reply related

* [RFC PATCH v2 06/14] kcov: clean up dataflow state on task exit
From: Yunseong Kim @ 2026-06-11 16:21 UTC (permalink / raw)
  To: Ingo Molnar, Peter Zijlstra, Juri Lelli, Vincent Guittot,
	Dietmar Eggemann, Steven Rostedt, Ben Segall, Mel Gorman,
	Valentin Schneider, K Prateek Nayak, Andrey Konovalov,
	Alexander Potapenko, Dmitry Vyukov, Andrew Morton, Miguel Ojeda,
	Boqun Feng, Gary Guo, Björn Roy Baron, Benno Lossin,
	Andreas Hindborg, Alice Ryhl, Trevor Gross, Danilo Krummrich,
	Nathan Chancellor, Nicolas Schier, Nick Desaulniers,
	Bill Wendling, Justin Stitt, Kees Cook, David Hildenbrand,
	Lorenzo Stoakes, Liam R. Howlett, Vlastimil Babka, Mike Rapoport,
	Suren Baghdasaryan, Michal Hocko, Shuah Khan, Jonathan Corbet,
	Shuah Khan, Yunseong Kim
  Cc: linux-kernel, kasan-dev, rust-for-linux, linux-kbuild, llvm,
	linux-mm, linux-kselftest, workflows, linux-doc, Yeoreum Yun,
	sashiko-bot
In-Reply-To: <20260611-b4-kcov-dataflow-v2-v2-0-0a261da3987c@est.tech>

If a task exits without calling KCOV_DF_DISABLE, the kcov_df_enabled
flag and area pointer remain set on the freed task_struct. If that
memory is reallocated, subsequent writes could corrupt arbitrary memory.

Add kcov_dataflow_task_exit() which clears the dataflow fields, called
from kernel/exit.c alongside kcov_task_exit(). This matches how
kcov_task_exit() cleans up the legacy kcov state.

Reported-by: sashiko-bot <sashiko-bot@kernel.org>
Closes: https://sashiko.dev/#/patchset/20260603-kcov-dataflow-next-20260603-v2-0-fee0939de2c4%40est.tech
Signed-off-by: Yunseong Kim <yunseong.kim@est.tech>
---
 include/linux/kcov.h   |  2 ++
 kernel/exit.c          |  1 +
 kernel/kcov_dataflow.c | 11 +++++++++++
 3 files changed, 14 insertions(+)

diff --git a/include/linux/kcov.h b/include/linux/kcov.h
index e9822b02982b..07d7823e5d6f 100644
--- a/include/linux/kcov.h
+++ b/include/linux/kcov.h
@@ -30,8 +30,10 @@ void kcov_task_exit(struct task_struct *t);
 
 #if defined(CONFIG_KCOV_DATAFLOW_ARGS) || defined(CONFIG_KCOV_DATAFLOW_RET)
 void kcov_dataflow_task_init(struct task_struct *t);
+void kcov_dataflow_task_exit(struct task_struct *t);
 #else
 static inline void kcov_dataflow_task_init(struct task_struct *t) {}
+static inline void kcov_dataflow_task_exit(struct task_struct *t) {}
 #endif
 
 #define kcov_prepare_switch(t)			\
diff --git a/kernel/exit.c b/kernel/exit.c
index 1056422bc101..af2314500791 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -935,6 +935,7 @@ void __noreturn do_exit(long code)
 		kthread_do_exit(kthread, code);
 
 	kcov_task_exit(tsk);
+	kcov_dataflow_task_exit(tsk);
 	kmsan_task_exit(tsk);
 
 	synchronize_group_exit(tsk, code);
diff --git a/kernel/kcov_dataflow.c b/kernel/kcov_dataflow.c
index 7cfe2495275a..df037b7e90eb 100644
--- a/kernel/kcov_dataflow.c
+++ b/kernel/kcov_dataflow.c
@@ -196,6 +196,17 @@ void kcov_dataflow_task_init(struct task_struct *t)
 	t->kcov_df_enabled = false;
 }
 
+/* Called from kernel/exit.c to clear state on task exit. */
+void kcov_dataflow_task_exit(struct task_struct *t)
+{
+	if (t->kcov_df_enabled) {
+		t->kcov_df_enabled = false;
+		barrier();
+		t->kcov_df_area = NULL;
+		t->kcov_df_size = 0;
+	}
+}
+
 /* File operations for /sys/kernel/debug/kcov_dataflow */
 
 static int kcov_df_open(struct inode *inode, struct file *filep)

-- 
2.43.0


^ permalink raw reply related

* [RFC PATCH v2 05/14] kcov: clear dataflow fields on fork
From: Yunseong Kim @ 2026-06-11 16:21 UTC (permalink / raw)
  To: Ingo Molnar, Peter Zijlstra, Juri Lelli, Vincent Guittot,
	Dietmar Eggemann, Steven Rostedt, Ben Segall, Mel Gorman,
	Valentin Schneider, K Prateek Nayak, Andrey Konovalov,
	Alexander Potapenko, Dmitry Vyukov, Andrew Morton, Miguel Ojeda,
	Boqun Feng, Gary Guo, Björn Roy Baron, Benno Lossin,
	Andreas Hindborg, Alice Ryhl, Trevor Gross, Danilo Krummrich,
	Nathan Chancellor, Nicolas Schier, Nick Desaulniers,
	Bill Wendling, Justin Stitt, Kees Cook, David Hildenbrand,
	Lorenzo Stoakes, Liam R. Howlett, Vlastimil Babka, Mike Rapoport,
	Suren Baghdasaryan, Michal Hocko, Shuah Khan, Jonathan Corbet,
	Shuah Khan, Yunseong Kim
  Cc: linux-kernel, kasan-dev, rust-for-linux, linux-kbuild, llvm,
	linux-mm, linux-kselftest, workflows, linux-doc, Yeoreum Yun,
	sashiko-bot
In-Reply-To: <20260611-b4-kcov-dataflow-v2-v2-0-0a261da3987c@est.tech>

dup_task_struct() copies the parent task_struct byte-for-byte. Without
explicitly clearing the dataflow fields, a forked child inherits the
parent's kcov_df_enabled flag and buffer pointer, leading to two tasks
writing to the same buffer and a potential use-after-free if the parent
closes the trace file.

Add kcov_dataflow_task_init() in kernel/kcov_dataflow.c and call it from
kernel/fork.c alongside kcov_task_init(), matching how kcov_stop() clears
the legacy kcov fields during fork.

Reported-by: sashiko-bot <sashiko-bot@kernel.org>
Closes: https://sashiko.dev/#/patchset/20260603-kcov-dataflow-next-20260603-v2-0-fee0939de2c4%40est.tech
Signed-off-by: Yunseong Kim <yunseong.kim@est.tech>
---
 include/linux/kcov.h   |  6 ++++++
 kernel/fork.c          |  1 +
 kernel/kcov_dataflow.c | 10 ++++++++++
 3 files changed, 17 insertions(+)

diff --git a/include/linux/kcov.h b/include/linux/kcov.h
index 895b761b2db1..e9822b02982b 100644
--- a/include/linux/kcov.h
+++ b/include/linux/kcov.h
@@ -28,6 +28,12 @@ enum kcov_mode {
 void kcov_task_init(struct task_struct *t);
 void kcov_task_exit(struct task_struct *t);
 
+#if defined(CONFIG_KCOV_DATAFLOW_ARGS) || defined(CONFIG_KCOV_DATAFLOW_RET)
+void kcov_dataflow_task_init(struct task_struct *t);
+#else
+static inline void kcov_dataflow_task_init(struct task_struct *t) {}
+#endif
+
 #define kcov_prepare_switch(t)			\
 do {						\
 	(t)->kcov_mode |= KCOV_IN_CTXSW;	\
diff --git a/kernel/fork.c b/kernel/fork.c
index 892a95214c54..a5741de07979 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -980,6 +980,7 @@ static struct task_struct *dup_task_struct(struct task_struct *orig, int node)
 	tsk->worker_private = NULL;
 
 	kcov_task_init(tsk);
+	kcov_dataflow_task_init(tsk);
 	kmsan_task_create(tsk);
 	kmap_local_fork(tsk);
 
diff --git a/kernel/kcov_dataflow.c b/kernel/kcov_dataflow.c
index 27587b8ceeab..7cfe2495275a 100644
--- a/kernel/kcov_dataflow.c
+++ b/kernel/kcov_dataflow.c
@@ -32,6 +32,7 @@
 #include <linux/debugfs.h>
 #include <linux/uaccess.h>
 #include <linux/refcount.h>
+#include <linux/kcov.h>
 
 #define KCOV_DF_TYPE_ENTRY	0xE0000000ULL
 #define KCOV_DF_TYPE_RET	0xF0000000ULL
@@ -186,6 +187,15 @@ __sanitizer_cov_trace_ret(u64 pc, u32 ret_size, void *ret_val,
 EXPORT_SYMBOL(__sanitizer_cov_trace_ret);
 #endif
 
+/* Called from kernel/fork.c to clear inherited state. */
+void kcov_dataflow_task_init(struct task_struct *t)
+{
+	t->kcov_df_area = NULL;
+	t->kcov_df_size = 0;
+	t->kcov_df_seq = 0;
+	t->kcov_df_enabled = false;
+}
+
 /* File operations for /sys/kernel/debug/kcov_dataflow */
 
 static int kcov_df_open(struct inode *inode, struct file *filep)

-- 
2.43.0


^ permalink raw reply related

* [RFC PATCH v2 04/14] kcov: reject enable on multiple dataflow fds simultaneously
From: Yunseong Kim @ 2026-06-11 16:21 UTC (permalink / raw)
  To: Ingo Molnar, Peter Zijlstra, Juri Lelli, Vincent Guittot,
	Dietmar Eggemann, Steven Rostedt, Ben Segall, Mel Gorman,
	Valentin Schneider, K Prateek Nayak, Andrey Konovalov,
	Alexander Potapenko, Dmitry Vyukov, Andrew Morton, Miguel Ojeda,
	Boqun Feng, Gary Guo, Björn Roy Baron, Benno Lossin,
	Andreas Hindborg, Alice Ryhl, Trevor Gross, Danilo Krummrich,
	Nathan Chancellor, Nicolas Schier, Nick Desaulniers,
	Bill Wendling, Justin Stitt, Kees Cook, David Hildenbrand,
	Lorenzo Stoakes, Liam R. Howlett, Vlastimil Babka, Mike Rapoport,
	Suren Baghdasaryan, Michal Hocko, Shuah Khan, Jonathan Corbet,
	Shuah Khan, Yunseong Kim
  Cc: linux-kernel, kasan-dev, rust-for-linux, linux-kbuild, llvm,
	linux-mm, linux-kselftest, workflows, linux-doc, Yeoreum Yun,
	sashiko-bot
In-Reply-To: <20260611-b4-kcov-dataflow-v2-v2-0-0a261da3987c@est.tech>

A task could enable tracing on multiple kcov_dataflow file descriptors,
corrupting the internal tracking state when one is subsequently closed.

Check current->kcov_df_enabled before allowing KCOV_DF_ENABLE and
return -EBUSY if already active. This matches kcov's check of
t->kcov != NULL in the KCOV_ENABLE path.

Reported-by: sashiko-bot <sashiko-bot@kernel.org>
Closes: https://sashiko.dev/#/patchset/20260603-kcov-dataflow-next-20260603-v2-0-fee0939de2c4%40est.tech
Signed-off-by: Yunseong Kim <yunseong.kim@est.tech>
---
 kernel/kcov_dataflow.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/kernel/kcov_dataflow.c b/kernel/kcov_dataflow.c
index 5248293280d5..27587b8ceeab 100644
--- a/kernel/kcov_dataflow.c
+++ b/kernel/kcov_dataflow.c
@@ -287,8 +287,8 @@ static long kcov_df_ioctl(struct file *filep, unsigned int cmd, unsigned long ar
 		break;
 
 	case KCOV_DF_ENABLE:
-		if (!df->area || df->t) {
-			res = -EINVAL;
+		if (!df->area || df->t || current->kcov_df_enabled) {
+			res = -EBUSY;
 			break;
 		}
 		df->t = current;

-- 
2.43.0


^ permalink raw reply related

* [RFC PATCH v2 03/14] kcov: add barriers to recursion guard in kcov_df_write
From: Yunseong Kim @ 2026-06-11 16:21 UTC (permalink / raw)
  To: Ingo Molnar, Peter Zijlstra, Juri Lelli, Vincent Guittot,
	Dietmar Eggemann, Steven Rostedt, Ben Segall, Mel Gorman,
	Valentin Schneider, K Prateek Nayak, Andrey Konovalov,
	Alexander Potapenko, Dmitry Vyukov, Andrew Morton, Miguel Ojeda,
	Boqun Feng, Gary Guo, Björn Roy Baron, Benno Lossin,
	Andreas Hindborg, Alice Ryhl, Trevor Gross, Danilo Krummrich,
	Nathan Chancellor, Nicolas Schier, Nick Desaulniers,
	Bill Wendling, Justin Stitt, Kees Cook, David Hildenbrand,
	Lorenzo Stoakes, Liam R. Howlett, Vlastimil Babka, Mike Rapoport,
	Suren Baghdasaryan, Michal Hocko, Shuah Khan, Jonathan Corbet,
	Shuah Khan, Yunseong Kim
  Cc: linux-kernel, kasan-dev, rust-for-linux, linux-kbuild, llvm,
	linux-mm, linux-kselftest, workflows, linux-doc, Yeoreum Yun
In-Reply-To: <20260611-b4-kcov-dataflow-v2-v2-0-0a261da3987c@est.tech>

The recursion guard (bit-31 of kcov_df_seq) prevents reentry when
copy_from_kernel_nofault() or other called functions are instrumented
with INSTRUMENT_ALL. Without compiler barriers, the guard set/clear
can be reordered relative to the function body, making the protection
ineffective under optimization.

Add barrier() after setting the guard and before clearing it, ensuring
the compiler does not move instrumented operations outside the guarded
region.

Cc: Peter Zijlstra <peterz@infradead.org>
Signed-off-by: Yunseong Kim <yunseong.kim@est.tech>
---
 kernel/kcov_dataflow.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/kernel/kcov_dataflow.c b/kernel/kcov_dataflow.c
index df7e8bf70bfa..5248293280d5 100644
--- a/kernel/kcov_dataflow.c
+++ b/kernel/kcov_dataflow.c
@@ -86,6 +86,7 @@ kcov_df_write(u64 type_marker, u64 pc, u64 meta, void *ptr,
 	if (t->kcov_df_seq & (1U << 31))
 		return;
 	t->kcov_df_seq |= (1U << 31);
+	barrier();
 
 	area = (u64 *)t->kcov_df_area;
 	if (!area)
@@ -147,6 +148,7 @@ kcov_df_write(u64 type_marker, u64 pc, u64 meta, void *ptr,
 		}
 	}
 out:
+	barrier();
 	t->kcov_df_seq &= ~(1U << 31);
 }
 

-- 
2.43.0


^ permalink raw reply related

* [RFC PATCH v2 02/14] kcov: fix INIT_TRACK race in kcov_dataflow
From: Yunseong Kim @ 2026-06-11 16:21 UTC (permalink / raw)
  To: Ingo Molnar, Peter Zijlstra, Juri Lelli, Vincent Guittot,
	Dietmar Eggemann, Steven Rostedt, Ben Segall, Mel Gorman,
	Valentin Schneider, K Prateek Nayak, Andrey Konovalov,
	Alexander Potapenko, Dmitry Vyukov, Andrew Morton, Miguel Ojeda,
	Boqun Feng, Gary Guo, Björn Roy Baron, Benno Lossin,
	Andreas Hindborg, Alice Ryhl, Trevor Gross, Danilo Krummrich,
	Nathan Chancellor, Nicolas Schier, Nick Desaulniers,
	Bill Wendling, Justin Stitt, Kees Cook, David Hildenbrand,
	Lorenzo Stoakes, Liam R. Howlett, Vlastimil Babka, Mike Rapoport,
	Suren Baghdasaryan, Michal Hocko, Shuah Khan, Jonathan Corbet,
	Shuah Khan, Yunseong Kim
  Cc: linux-kernel, kasan-dev, rust-for-linux, linux-kbuild, llvm,
	linux-mm, linux-kselftest, workflows, linux-doc, Yeoreum Yun,
	sashiko-bot
In-Reply-To: <20260611-b4-kcov-dataflow-v2-v2-0-0a261da3987c@est.tech>

Two threads calling KCOV_DF_INIT_TRACK concurrently could both observe
df->area == NULL, drop the lock to allocate, and then both assign their
allocation to df->area, leaking one buffer.

Fix by rechecking df->area after re-acquiring the lock. If another
thread won the race, free the allocation and return -EBUSY. This
matches the pattern used by KCOV_INIT_TRACE in kernel/kcov.c.

Reported-by: sashiko-bot <sashiko-bot@kernel.org>
Closes: https://sashiko.dev/#/patchset/20260603-kcov-dataflow-next-20260603-v2-0-fee0939de2c4%40est.tech
Signed-off-by: Yunseong Kim <yunseong.kim@est.tech>
---
 kernel/kcov_dataflow.c | 19 ++++++++++++++-----
 1 file changed, 14 insertions(+), 5 deletions(-)

diff --git a/kernel/kcov_dataflow.c b/kernel/kcov_dataflow.c
index 721f742cbfe5..df7e8bf70bfa 100644
--- a/kernel/kcov_dataflow.c
+++ b/kernel/kcov_dataflow.c
@@ -268,11 +268,20 @@ static long kcov_df_ioctl(struct file *filep, unsigned int cmd, unsigned long ar
 			break;
 		}
 		spin_unlock_irqrestore(&df->lock, flags);
-		df->area = vmalloc_user(size * sizeof(u64));
-		if (!df->area)
-			return -ENOMEM;
-		spin_lock_irqsave(&df->lock, flags);
-		df->size = size;
+		{
+			void *area = vmalloc_user(size * sizeof(u64));
+
+			if (!area)
+				return -ENOMEM;
+			spin_lock_irqsave(&df->lock, flags);
+			if (df->area) {
+				spin_unlock_irqrestore(&df->lock, flags);
+				vfree(area);
+				return -EBUSY;
+			}
+			df->area = area;
+			df->size = size;
+		}
 		break;
 
 	case KCOV_DF_ENABLE:

-- 
2.43.0


^ permalink raw reply related

* [RFC PATCH v2 01/14] kcov: add per-task dataflow tracking for function arguments/return values
From: Yunseong Kim @ 2026-06-11 16:21 UTC (permalink / raw)
  To: Ingo Molnar, Peter Zijlstra, Juri Lelli, Vincent Guittot,
	Dietmar Eggemann, Steven Rostedt, Ben Segall, Mel Gorman,
	Valentin Schneider, K Prateek Nayak, Andrey Konovalov,
	Alexander Potapenko, Dmitry Vyukov, Andrew Morton, Miguel Ojeda,
	Boqun Feng, Gary Guo, Björn Roy Baron, Benno Lossin,
	Andreas Hindborg, Alice Ryhl, Trevor Gross, Danilo Krummrich,
	Nathan Chancellor, Nicolas Schier, Nick Desaulniers,
	Bill Wendling, Justin Stitt, Kees Cook, David Hildenbrand,
	Lorenzo Stoakes, Liam R. Howlett, Vlastimil Babka, Mike Rapoport,
	Suren Baghdasaryan, Michal Hocko, Shuah Khan, Jonathan Corbet,
	Shuah Khan, Yunseong Kim
  Cc: linux-kernel, kasan-dev, rust-for-linux, linux-kbuild, llvm,
	linux-mm, linux-kselftest, workflows, linux-doc, Yeoreum Yun
In-Reply-To: <20260611-b4-kcov-dataflow-v2-v2-0-0a261da3987c@est.tech>

Add a new tracking mechanism that captures function arguments/return
values at instrumented function boundaries via submitted as an LLVM
RFC SanitizerCoverage callbacks:

  __sanitizer_cov_trace_args
  __sanitizer_cov_trace_ret

This requires a custom LLVM/Clang build with the trace-args/ret passes:

LLVM RFC:

  https://discourse.llvm.org/t/rfc-sanitizercoverage-add-fsanitize-coverage-trace-args-trace-ret/91026

LLVM PR:

  https://github.com/llvm/llvm-project/pull/201410

Clone and build toolchain:

  git clone --recursive --depth 1 --shallow-submodules \
    --jobs `nproc` https://github.com/yskzalloc/kcov-dataflow.git
  cd kcov-dataflow

  cd llvm-project
  cmake -S llvm -B build -G Ninja \
    -DCMAKE_BUILD_TYPE=Release \
    -DCMAKE_C_COMPILER=clang \
    -DCMAKE_CXX_COMPILER=clang++ \
    -DLLVM_ENABLE_LLD=ON \
    -DLLVM_ENABLE_PROJECTS="clang;lld" \
    -DLLVM_TARGETS_TO_BUILD="X86;AArch64"
  ninja -C build
  cd ..

Build and boot kernel (using virtme-ng):

  export PATH=$PWD/llvm-project/build/bin:$PATH
  cd linux
  vng --build \
    --configitem CONFIG_KCOV=y \
    --configitem CONFIG_KCOV_DATAFLOW_ARGS=y \
    --configitem CONFIG_KCOV_DATAFLOW_RET=y \
    --configitem CONFIG_KCOV_DATAFLOW_INSTRUMENT_ALL=y \
    --configitem CONFIG_DEBUG_INFO=y \
    --configitem CONFIG_RUST=y # for rust module kselftest
    LLVM=1 CC=clang

Core implementation in kernel/kcov_dataflow.c (separating from kcov.c
as Alexander's request):
  - Per-task lock-free ring buffer via debugfs kcov_dataflow device
  - READ_ONCE/WRITE_ONCE atomic pattern (tested on arm64)
  - copy_from_kernel_nofault() for safe struct field reads
  - in_task() guard rejects interrupt context
  - Bit-31 recursion guard prevents INSTRUMENT_ALL re-entry

Build system (scripts/Makefile.kcov, scripts/Makefile.lib):
  - CFLAGS_KCOV_DATAFLOW: -fsanitize-coverage=trace-args,trace-ret
  - RUSTFLAGS_KCOV_DATAFLOW: -Cllvm-args=-sanitizer-coverage-trace-args/ret
  - Per-file opt-in: KCOV_DATAFLOW_file.o := y
  - Respects KCOV_INSTRUMENT := n for noinstr exclusion
  - CONFIG_KCOV_DATAFLOW_INSTRUMENT_ALL for whole-kernel

Kconfig (lib/Kconfig.debug):
  - CONFIG_KCOV_DATAFLOW_ARGS / CONFIG_KCOV_DATAFLOW_RET
  - Depends on CONFIG_KCOV and CONFIG_DEBUG_INFO
  - CONFIG_KCOV_DATAFLOW_NO_INLINE (default n)
  - CONFIG_KCOV_DATAFLOW_INSTRUMENT_ALL

Also fix rust/kernel/str.rs unused import (flags::* -> flags::GFP_KERNEL)
which newer rustc (1.98-nightly) rejects as a hard error.

Rust support requires rustc built against the custom LLVM with
trace-args/ret passes compiled in:

  https://github.com/yskzalloc/rust

Link: https://github.com/yskzalloc/kcov-dataflow/
Cc: Alexander Potapenko <glider@google.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Nicolas Schier <nsc@kernel.org>
Signed-off-by: Yunseong Kim <yunseong.kim@est.tech>
---
 include/linux/sched.h  |  10 ++
 kernel/Makefile        |   3 +
 kernel/kcov.c          |   2 +
 kernel/kcov_dataflow.c | 324 +++++++++++++++++++++++++++++++++++++++++++++++++
 lib/Kconfig.debug      |  43 +++++++
 rust/kernel/str.rs     |   2 +-
 scripts/Makefile.kcov  |  12 ++
 scripts/Makefile.lib   |   9 ++
 8 files changed, 404 insertions(+), 1 deletion(-)

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 373bcc0598d1..4b8aa73b3b67 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1541,6 +1541,16 @@ struct task_struct {
 	/* KCOV sequence number: */
 	int				kcov_sequence;
 
+#if defined(CONFIG_KCOV_DATAFLOW_ARGS) || defined(CONFIG_KCOV_DATAFLOW_RET)
+	/* KCOV dataflow per-task sequence counter for TLV records: */
+	u32				kcov_df_seq;
+
+	/* KCOV dataflow: separate buffer for trace-args/trace-ret */
+	unsigned int			kcov_df_size;
+	void				*kcov_df_area;
+	bool				kcov_df_enabled;
+#endif
+
 	/* Collect coverage from softirq context: */
 	unsigned int			kcov_softirq;
 #endif
diff --git a/kernel/Makefile b/kernel/Makefile
index 1e1a31673577..b70e524c4074 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -98,6 +98,9 @@ obj-$(CONFIG_AUDIT) += audit.o auditfilter.o
 obj-$(CONFIG_AUDITSYSCALL) += auditsc.o audit_watch.o audit_fsnotify.o audit_tree.o
 obj-$(CONFIG_GCOV_KERNEL) += gcov/
 obj-$(CONFIG_KCOV) += kcov.o
+ifneq ($(CONFIG_KCOV_DATAFLOW_ARGS)$(CONFIG_KCOV_DATAFLOW_RET),)
+obj-y += kcov_dataflow.o
+endif
 obj-$(CONFIG_KPROBES) += kprobes.o
 obj-$(CONFIG_FAIL_FUNCTION) += fail_function.o
 obj-$(CONFIG_KGDB) += debug/
diff --git a/kernel/kcov.c b/kernel/kcov.c
index 1df373fb562b..0a859ee8334f 100644
--- a/kernel/kcov.c
+++ b/kernel/kcov.c
@@ -353,6 +353,8 @@ void notrace __sanitizer_cov_trace_switch(kcov_u64 val, void *arg)
 EXPORT_SYMBOL(__sanitizer_cov_trace_switch);
 #endif /* ifdef CONFIG_KCOV_ENABLE_COMPARISONS */
 
+
+
 static void kcov_start(struct task_struct *t, struct kcov *kcov,
 			unsigned int size, void *area, enum kcov_mode mode,
 			int sequence)
diff --git a/kernel/kcov_dataflow.c b/kernel/kcov_dataflow.c
new file mode 100644
index 000000000000..721f742cbfe5
--- /dev/null
+++ b/kernel/kcov_dataflow.c
@@ -0,0 +1,324 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * KCOV Dataflow: per-task function argument/return value capture.
+ *
+ * Exposes /sys/kernel/debug/kcov_dataflow, completely independent from
+ * /sys/kernel/debug/kcov. Own buffer, own ioctl, own mmap.
+ *
+ * TLV record layout (all u64):
+ *   area[0]: total u64 words written (counter)
+ *   [pos+0]: type_and_seq (0xE=entry, 0xF=return in upper 4 bits)
+ *   [pos+1]: PC
+ *   [pos+2]: meta (arg_idx | arg_size | ptr)
+ *   [pos+3..N]: field values read via copy_from_kernel_nofault()
+ */
+#define pr_fmt(fmt) "kcov_dataflow: " fmt
+
+#define DISABLE_BRANCH_PROFILING
+#include <linux/atomic.h>
+#include <linux/compiler.h>
+#include <linux/errno.h>
+#include <linux/export.h>
+#include <linux/types.h>
+#include <linux/file.h>
+#include <linux/fs.h>
+#include <linux/init.h>
+#include <linux/mm.h>
+#include <linux/preempt.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/spinlock.h>
+#include <linux/vmalloc.h>
+#include <linux/debugfs.h>
+#include <linux/uaccess.h>
+#include <linux/refcount.h>
+
+#define KCOV_DF_TYPE_ENTRY	0xE0000000ULL
+#define KCOV_DF_TYPE_RET	0xF0000000ULL
+#define KCOV_DF_MAGIC_BAD	0xBADADD85ULL
+#define KCOV_DF_IS_ERR(p)	((unsigned long)(p) >= (unsigned long)-4095UL)
+
+/* Ioctl commands for /sys/kernel/debug/kcov_dataflow */
+#define KCOV_DF_INIT_TRACK	_IOR('d', 1, unsigned long)
+#define KCOV_DF_ENABLE		_IO('d', 100)
+#define KCOV_DF_DISABLE		_IO('d', 101)
+
+struct kcov_dataflow {
+	refcount_t	refcount;
+	spinlock_t	lock;
+	unsigned int	size;	/* in u64 words */
+	void		*area;
+	struct task_struct *t;
+};
+
+static void kcov_df_put(struct kcov_dataflow *df)
+{
+	if (refcount_dec_and_test(&df->refcount)) {
+		vfree(df->area);
+		kfree(df);
+	}
+}
+
+/*
+ * Core write function for dataflow records.
+ * Uses the same READ_ONCE/WRITE_ONCE pattern as write_comp_data() in kcov.c.
+ */
+static noinline notrace __no_sanitize_coverage void
+kcov_df_write(u64 type_marker, u64 pc, u64 meta, void *ptr,
+	      u64 *offsets, u32 num_fields)
+{
+	struct task_struct *t = current;
+	u64 *area;
+	unsigned long count, start_index, end_pos, max_pos;
+	u32 record_len, seq, i;
+
+	if (!t->kcov_df_enabled)
+		return;
+
+	if (!in_task())
+		return;
+
+	/*
+	 * Prevent recursion: functions called by this callback
+	 * (copy_from_kernel_nofault) may be instrumented. Use the
+	 * sequence counter's high bit as a per-task guard.
+	 */
+	if (t->kcov_df_seq & (1U << 31))
+		return;
+	t->kcov_df_seq |= (1U << 31);
+
+	area = (u64 *)t->kcov_df_area;
+	if (!area)
+		goto out;
+
+	max_pos = t->kcov_df_size * sizeof(u64);
+
+	/* Record: header(1) + pc(1) + meta(1) + fields or scalar(max 1) */
+	record_len = 3 + (num_fields > 0 ? num_fields : 1);
+
+	count = READ_ONCE(area[0]);
+
+	start_index = 1 + count;
+	end_pos = (start_index + record_len) * sizeof(u64);
+	if (unlikely(end_pos > max_pos))
+		goto out;
+
+	WRITE_ONCE(area[0], count + record_len);
+	barrier();
+
+	seq = ++t->kcov_df_seq;
+	area[start_index] = type_marker |
+			    ((u64)(record_len - 3) << 24) |
+			    (seq & 0x00FFFFFFULL);
+	area[start_index + 1] = pc;
+	area[start_index + 2] = meta;
+
+	if (num_fields == 0) {
+		u64 val = 0;
+		u32 sz = (meta >> 48) & 0xFF;
+
+		if (sz > sizeof(val))
+			sz = sizeof(val);
+		if (ptr && !KCOV_DF_IS_ERR(ptr))
+			copy_from_kernel_nofault(&val, ptr, sz);
+		area[start_index + 3] = val;
+	} else {
+		if (KCOV_DF_IS_ERR(ptr)) {
+			for (i = 0; i < num_fields; i++)
+				area[start_index + 3 + i] = KCOV_DF_MAGIC_BAD;
+			goto out;
+		}
+		for (i = 0; i < num_fields; i++) {
+			u64 off, sz, val = KCOV_DF_MAGIC_BAD;
+			void *fa;
+
+			if (copy_from_kernel_nofault(&off, &offsets[i * 2], sizeof(off)) ||
+			    copy_from_kernel_nofault(&sz, &offsets[i * 2 + 1], sizeof(sz))) {
+				area[start_index + 3 + i] = KCOV_DF_MAGIC_BAD;
+				continue;
+			}
+			fa = (void *)((unsigned long)ptr + off);
+			val = 0;
+			if (sz <= sizeof(val))
+				copy_from_kernel_nofault(&val, fa, sz);
+			else
+				copy_from_kernel_nofault(&val, fa, sizeof(val));
+			area[start_index + 3 + i] = val;
+		}
+	}
+out:
+	t->kcov_df_seq &= ~(1U << 31);
+}
+
+#ifdef CONFIG_KCOV_DATAFLOW_ARGS
+noinline void notrace __no_sanitize_coverage
+__sanitizer_cov_trace_args(u64 pc, u32 arg_idx, u32 arg_size, void *arg_ptr,
+			   u64 *offsets, u32 num_fields);
+
+noinline void notrace __no_sanitize_coverage
+__sanitizer_cov_trace_args(u64 pc, u32 arg_idx, u32 arg_size, void *arg_ptr,
+			   u64 *offsets, u32 num_fields)
+{
+	u64 meta = ((u64)arg_idx << 56) | ((u64)arg_size << 48) |
+		   ((u64)(unsigned long)arg_ptr & 0xFFFFFFFFFFFFULL);
+	kcov_df_write(KCOV_DF_TYPE_ENTRY, pc, meta, arg_ptr,
+		      offsets, num_fields);
+}
+EXPORT_SYMBOL(__sanitizer_cov_trace_args);
+#endif
+
+#ifdef CONFIG_KCOV_DATAFLOW_RET
+noinline void notrace __no_sanitize_coverage
+__sanitizer_cov_trace_ret(u64 pc, u32 ret_size, void *ret_val,
+			  u64 *offsets, u32 num_fields);
+
+noinline void notrace __no_sanitize_coverage
+__sanitizer_cov_trace_ret(u64 pc, u32 ret_size, void *ret_val,
+			  u64 *offsets, u32 num_fields)
+{
+	u64 meta = ((u64)ret_size << 48) |
+		   ((u64)(unsigned long)ret_val & 0xFFFFFFFFFFFFULL);
+	kcov_df_write(KCOV_DF_TYPE_RET, pc, meta, ret_val,
+		      offsets, num_fields);
+}
+EXPORT_SYMBOL(__sanitizer_cov_trace_ret);
+#endif
+
+/* File operations for /sys/kernel/debug/kcov_dataflow */
+
+static int kcov_df_open(struct inode *inode, struct file *filep)
+{
+	struct kcov_dataflow *df;
+
+	df = kzalloc(sizeof(*df), GFP_KERNEL);
+	if (!df)
+		return -ENOMEM;
+	spin_lock_init(&df->lock);
+	refcount_set(&df->refcount, 1);
+	filep->private_data = df;
+	return nonseekable_open(inode, filep);
+}
+
+static int kcov_df_close(struct inode *inode, struct file *filep)
+{
+	struct kcov_dataflow *df = filep->private_data;
+	unsigned long flags;
+
+	spin_lock_irqsave(&df->lock, flags);
+	if (df->t == current) {
+		current->kcov_df_enabled = false;
+		current->kcov_df_area = NULL;
+		current->kcov_df_size = 0;
+		df->t = NULL;
+	}
+	spin_unlock_irqrestore(&df->lock, flags);
+	kcov_df_put(df);
+	return 0;
+}
+
+static int kcov_df_mmap(struct file *filep, struct vm_area_struct *vma)
+{
+	struct kcov_dataflow *df = filep->private_data;
+	unsigned long size, off;
+	struct page *page;
+	unsigned long flags;
+	void *area;
+	int res = 0;
+
+	spin_lock_irqsave(&df->lock, flags);
+	size = df->size * sizeof(u64);
+	if (!df->area || vma->vm_pgoff != 0 ||
+	    vma->vm_end - vma->vm_start != size) {
+		res = -EINVAL;
+		goto out;
+	}
+	area = df->area;
+	spin_unlock_irqrestore(&df->lock, flags);
+
+	vm_flags_set(vma, VM_DONTEXPAND);
+	for (off = 0; off < size; off += PAGE_SIZE) {
+		page = vmalloc_to_page(area + off);
+		res = vm_insert_page(vma, vma->vm_start + off, page);
+		if (res)
+			return res;
+	}
+	return 0;
+out:
+	spin_unlock_irqrestore(&df->lock, flags);
+	return res;
+}
+
+static long kcov_df_ioctl(struct file *filep, unsigned int cmd, unsigned long arg)
+{
+	struct kcov_dataflow *df = filep->private_data;
+	unsigned long flags;
+	unsigned long size;
+	int res = 0;
+
+	spin_lock_irqsave(&df->lock, flags);
+	switch (cmd) {
+	case KCOV_DF_INIT_TRACK:
+		if (df->area) {
+			res = -EBUSY;
+			break;
+		}
+		size = arg;
+		if (size < 2 || size > (128 << 20) / sizeof(u64)) {
+			res = -EINVAL;
+			break;
+		}
+		spin_unlock_irqrestore(&df->lock, flags);
+		df->area = vmalloc_user(size * sizeof(u64));
+		if (!df->area)
+			return -ENOMEM;
+		spin_lock_irqsave(&df->lock, flags);
+		df->size = size;
+		break;
+
+	case KCOV_DF_ENABLE:
+		if (!df->area || df->t) {
+			res = -EINVAL;
+			break;
+		}
+		df->t = current;
+		current->kcov_df_area = df->area;
+		current->kcov_df_size = df->size;
+		current->kcov_df_seq = 0;
+		barrier();
+		current->kcov_df_enabled = true;
+		break;
+
+	case KCOV_DF_DISABLE:
+		if (df->t != current) {
+			res = -EINVAL;
+			break;
+		}
+		current->kcov_df_enabled = false;
+		barrier();
+		current->kcov_df_area = NULL;
+		current->kcov_df_size = 0;
+		df->t = NULL;
+		break;
+
+	default:
+		res = -ENOTTY;
+	}
+	spin_unlock_irqrestore(&df->lock, flags);
+	return res;
+}
+
+static const struct file_operations kcov_df_fops = {
+	.open		= kcov_df_open,
+	.unlocked_ioctl	= kcov_df_ioctl,
+	.compat_ioctl	= kcov_df_ioctl,
+	.mmap		= kcov_df_mmap,
+	.release	= kcov_df_close,
+};
+
+static int __init kcov_dataflow_init(void)
+{
+	debugfs_create_file_unsafe("kcov_dataflow", 0600, NULL, NULL,
+				   &kcov_df_fops);
+	return 0;
+}
+device_initcall(kcov_dataflow_init);
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index e2f976c3301b..a402f829f9f9 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -2261,6 +2261,49 @@ config KCOV_SELFTEST
 	  On test failure, causes the kernel to panic. Recommended to be
 	  enabled, ensuring critical functionality works as intended.
 
+config KCOV_DATAFLOW_ARGS
+	bool "Enable KCOV dataflow: function argument capture"
+	depends on KCOV
+	depends on DEBUG_INFO
+	depends on $(cc-option,-fsanitize-coverage=trace-args)
+	help
+	  Captures function arguments at entry via /sys/kernel/debug/kcov_dataflow.
+	  Struct pointer arguments are auto-expanded using compiler DebugInfo
+	  metadata, recording individual field values at runtime.
+	  Enable per-module with: KCOV_DATAFLOW_file.o := y in the Makefile.
+	  Requires clang with -fsanitize-coverage=trace-args support.
+
+config KCOV_DATAFLOW_RET
+	bool "Enable KCOV dataflow: return value capture"
+	depends on KCOV
+	depends on DEBUG_INFO
+	depends on $(cc-option,-fsanitize-coverage=trace-ret)
+	help
+	  Captures function return values via /sys/kernel/debug/kcov_dataflow.
+	  Struct pointer returns are auto-expanded using compiler DebugInfo
+	  metadata, recording individual field values at runtime.
+	  Enable per-module with: KCOV_DATAFLOW_file.o := y in the Makefile.
+	  Requires clang with -fsanitize-coverage=trace-ret support.
+
+config KCOV_DATAFLOW_NO_INLINE
+	bool "Disable inlining for dataflow-instrumented files"
+	default n
+	depends on KCOV_DATAFLOW_ARGS || KCOV_DATAFLOW_RET
+	help
+	  Adds -fno-inline to files instrumented with KCOV_DATAFLOW.
+	  This ensures every function boundary is preserved, giving
+	  complete argument visibility. Disable for lower overhead at the
+	  cost of losing argument records for inlined functions.
+
+config KCOV_DATAFLOW_INSTRUMENT_ALL
+	bool "Instrument all kernel code with dataflow coverage"
+	depends on KCOV_DATAFLOW_ARGS || KCOV_DATAFLOW_RET
+	help
+	  Instrument all kernel objects with trace-args/trace-ret
+	  automatically. Individual files or directories can opt out
+	  with KCOV_DATAFLOW_file.o := n or KCOV_DATAFLOW := n.
+	  Warning: significantly increases code size and boot time.
+
 config DEBUG_AID_FOR_SYZBOT
        bool "Additional debug code for syzbot"
        default n
diff --git a/rust/kernel/str.rs b/rust/kernel/str.rs
index a435674f05ea..f447a25c67c9 100644
--- a/rust/kernel/str.rs
+++ b/rust/kernel/str.rs
@@ -3,7 +3,7 @@
 //! String representations.
 
 use crate::{
-    alloc::{flags::*, AllocError, KVec},
+    alloc::{flags::GFP_KERNEL, AllocError, KVec},
     error::{to_result, Result},
     fmt::{self, Write},
     prelude::*,
diff --git a/scripts/Makefile.kcov b/scripts/Makefile.kcov
index 78305a84ba9d..a459c119795f 100644
--- a/scripts/Makefile.kcov
+++ b/scripts/Makefile.kcov
@@ -9,3 +9,15 @@ kcov-rflags-$(CONFIG_KCOV_ENABLE_COMPARISONS)	+= -Cllvm-args=-sanitizer-coverage
 
 export CFLAGS_KCOV := $(kcov-flags-y)
 export RUSTFLAGS_KCOV := $(kcov-rflags-y)
+
+# KCOV dataflow: trace function args and return values
+kcov-dataflow-flags-y := -fsanitize-coverage=trace-args,trace-ret
+kcov-dataflow-flags-$(CONFIG_KCOV_DATAFLOW_NO_INLINE) += -fno-inline
+
+# Rust: only add the trace-args/ret llvm-args (sancov-module pass and level=3
+# are already provided by RUSTFLAGS_KCOV since KCOV_DATAFLOW depends on KCOV).
+kcov-dataflow-rflags-y := -Cllvm-args=-sanitizer-coverage-trace-args
+kcov-dataflow-rflags-y += -Cllvm-args=-sanitizer-coverage-trace-ret
+
+export CFLAGS_KCOV_DATAFLOW := $(kcov-dataflow-flags-y)
+export RUSTFLAGS_KCOV_DATAFLOW := $(kcov-dataflow-rflags-y)
diff --git a/scripts/Makefile.lib b/scripts/Makefile.lib
index 0a4fdd8bd975..b64fabb88ab9 100644
--- a/scripts/Makefile.lib
+++ b/scripts/Makefile.lib
@@ -88,6 +88,15 @@ _c_flags += $(if $(patsubst n%,, \
 _rust_flags += $(if $(patsubst n%,, \
 	$(KCOV_INSTRUMENT_$(target-stem).o)$(KCOV_INSTRUMENT)$(if $(is-kernel-object),$(CONFIG_KCOV_INSTRUMENT_ALL))), \
 	$(RUSTFLAGS_KCOV))
+# KCOV dataflow respects KCOV_INSTRUMENT := n (noinstr exclusion)
+_c_flags += $(if $(patsubst n%,, \
+	$(KCOV_INSTRUMENT_$(target-stem).o)$(KCOV_INSTRUMENT)$(if $(is-kernel-object),y)),$(if $(patsubst n%,, \
+	$(KCOV_DATAFLOW_$(target-stem).o)$(KCOV_DATAFLOW)$(if $(is-kernel-object),$(CONFIG_KCOV_DATAFLOW_INSTRUMENT_ALL))), \
+	$(CFLAGS_KCOV_DATAFLOW)))
+_rust_flags += $(if $(patsubst n%,, \
+	$(KCOV_INSTRUMENT_$(target-stem).o)$(KCOV_INSTRUMENT)$(if $(is-kernel-object),y)),$(if $(patsubst n%,, \
+	$(KCOV_DATAFLOW_$(target-stem).o)$(KCOV_DATAFLOW)$(if $(is-kernel-object),$(CONFIG_KCOV_DATAFLOW_INSTRUMENT_ALL))), \
+	$(RUSTFLAGS_KCOV_DATAFLOW)))
 endif
 
 #

-- 
2.43.0


^ permalink raw reply related

* [RFC PATCH v2 00/14] kcov: add per-task dataflow tracking for function arguments/return values
From: Yunseong Kim @ 2026-06-11 16:21 UTC (permalink / raw)
  To: Ingo Molnar, Peter Zijlstra, Juri Lelli, Vincent Guittot,
	Dietmar Eggemann, Steven Rostedt, Ben Segall, Mel Gorman,
	Valentin Schneider, K Prateek Nayak, Andrey Konovalov,
	Alexander Potapenko, Dmitry Vyukov, Andrew Morton, Miguel Ojeda,
	Boqun Feng, Gary Guo, Björn Roy Baron, Benno Lossin,
	Andreas Hindborg, Alice Ryhl, Trevor Gross, Danilo Krummrich,
	Nathan Chancellor, Nicolas Schier, Nick Desaulniers,
	Bill Wendling, Justin Stitt, Kees Cook, David Hildenbrand,
	Lorenzo Stoakes, Liam R. Howlett, Vlastimil Babka, Mike Rapoport,
	Suren Baghdasaryan, Michal Hocko, Shuah Khan, Jonathan Corbet,
	Shuah Khan, Yunseong Kim
  Cc: linux-kernel, kasan-dev, rust-for-linux, linux-kbuild, llvm,
	linux-mm, linux-kselftest, workflows, linux-doc, Yeoreum Yun,
	sashiko-bot

Introduce kcov_dataflow, a per-task dataflow tracking mechanism for function
arguments/return values at instrumented function boundaries.

Motivation
==========

First, Coverage-guided kernel fuzzers use KCOV edge coverage as their
sole feedback signal. This cannot distinguish two executions of the same
function with different argument values. Fuzzers plateau on stateful
subsystems where security-critical behavior depends on runtime values
rather than control-flow topology.

Second, Existing tracing tools address parts of this challenge:

 1. Per-Task Wide-Scale Tracing Contexts (ftrace / kprobes / eBPF)

 Break point instruction and redirection: Hooks physically patch global kernel
 text. The kernel cannot selectively hook functions per task; every CPU core
 triggers the hook, deferring PID filtering to post-trigger logic.

 2. Rust for Linux Tracing Status

 rustc correctly emits -mfentry code stubs via its LLVM backend, enabling
 native integration with ftrace, function_graph, and eBPF trampolines
 (fentry/fexit). Metadata & Signature Analysis: funcgraph-args parses Rust
 via pahole BTF generation. However, idiomatic types like generics or slices
 are difficult to represent cleanly compared to standard repr(C) structs.

 3. Inline Function Tracing Limitations

 Tracing Visibility: Inlined code cannot be targeted via tracefs. Its runtime
 footprint is absorbed by the caller. Debugging requires explicit noinline (C)
 or #[inline(never)] (Rust) markers.

Approach
========

An LLVM SanitizerCoverage [1] pass inserts callbacks at function entry/exit
that record argument values into a per-task mmap'd ring buffer. Kernel
backend reads struct fields via copy_from_kernel_nofault(). When not enabled
for a task, the cost is a single boolean check.

The system captures:
- Function argument values at entry (with automatic struct field expansion)
- Return values at exit
- Per-task isolation (no interference between processes)
- Both C and Rust kernel modules
- Instument even inline(default n)

For C based kernel module example, eight_args_c:

  vfs_write(0x0)
  0x0 = full_proxy_write()
  full_proxy_write(0x0, 0x1, 0x0)
  0x8200080 = __debugfs_file_get()
  __debugfs_file_get(0x0)
  0x0 = __debugfs_file_get()
  0x0 = trigger_write [eight_args_c]()
  trigger_write [eight_args_c](0x0, 0x1, 0x0)
    df_func2 [eight_args_c](0x11, 0x22)
    0x33 = df_func2 [eight_args_c]()
    df_func3 [eight_args_c](0x11, 0x22, 0x33)
    0x66 = df_func3 [eight_args_c]()
    df_func4 [eight_args_c](0x11, 0x22, 0x33, 0x44)
    0xaa = df_func4 [eight_args_c]()
    df_func5 [eight_args_c](0x11, 0x22, 0x33, 0x44, 0x55)
    0xff = df_func5 [eight_args_c]()
    df_func6 [eight_args_c](0x11, 0x22, 0x33, 0x44, 0x55, 0x66)
    0x165 = df_func6 [eight_args_c]()
    df_func7 [eight_args_c](0x11, 0x22, 0x33, 0x44, 0x55, 0x66, 0x77)
    0x1dc = df_func7 [eight_args_c]()
    df_func8 [eight_args_c](0x11, 0x22, 0x33, 0x44, 0x55, 0x66, 0x77, 0x88)
    0x264 = df_func8 [eight_args_c]()
    df_func_struct [eight_args_c](0xaaaa)
    0x16665 = df_func_struct [eight_args_c]()
  0x1 = trigger_write [eight_args_c]()
  0x1 = full_proxy_write()
  0x1 = vfs_write()
  0x1 = ksys_write()
  0x1 = __x64_sys_write()
  0x0 = fpregs_assert_state_consistent()
  0xba5748 = __x64_sys_close()
  file_close_fd(0x4)
  0x0 = file_close_fd()

For corresponding rust kernel example, eight_args_rust:

  ksys_write(0x0, 0x1)
    fdget_pos(0x4)
    0xffff891481d2bc00 = fdget_pos()
  0x0 = vfs_write()
  vfs_write(0x0, 0x1, 0x0)
  0x0 = _RNvCs3p16QzTwthP_15eight_args_rust13write_handler [eight_args_rust]()
  _RNvCs3p16QzTwthP_15eight_args_rust13write_handler [eight_args_rust](0x0, 0x1, 0x0)
    rdf_func2 [eight_args_rust](0x11, 0x22)
    0x33 = rdf_func2 [eight_args_rust]()
    rdf_func3 [eight_args_rust](0x11, 0x22, 0x33)
    0x66 = rdf_func3 [eight_args_rust]()
    rdf_func4 [eight_args_rust](0x11, 0x22, 0x33, 0x44)
    0xaa = rdf_func4 [eight_args_rust]()
    rdf_func5 [eight_args_rust](0x11, 0x22, 0x33, 0x44, 0x55)
    0xff = rdf_func5 [eight_args_rust]()
    rdf_func6 [eight_args_rust](0x11, 0x22, 0x33, 0x44, 0x55, 0x66)
    0x165 = rdf_func6 [eight_args_rust]()
    rdf_func7 [eight_args_rust](0x11, 0x22, 0x33, 0x44, 0x55, 0x66, 0x77)
    0x1dc = rdf_func7 [eight_args_rust]()
    rdf_func8 [eight_args_rust](0x11, 0x22, 0x33, 0x44, 0x55, 0x66, 0x77, 0x88)
    0x264 = rdf_func8 [eight_args_rust]()
    rdf_func_struct [eight_args_rust](0xaaaa)
    0x16665 = rdf_func_struct [eight_args_rust]()
  0x1 = _RNvCs3p16QzTwthP_15eight_args_rust13write_handler [eight_args_rust]()
  0x1 = vfs_write()
  0x1 = ksys_write()
  0x1 = __x64_sys_write()
  0x0 = fpregs_assert_state_consistent()
  0xba5748 = __x64_sys_close()
  file_close_fd(0x4)
  0x0 = file_close_fd()
  0x0 = filp_flush()

Design
======

- Independent from existing /sys/kernel/debug/kcov
- Separate device: /sys/kernel/debug/kcov_dataflow
- Separate ioctl namespace ('d'), separate per-task buffer
- Lock-free write path: READ_ONCE/WRITE_ONCE (Tested on x86_64/arm64)
- Safe pointer reads: copy_from_kernel_nofault()
- in_task() guard rejects interrupt/NMI context
- Per-module opt-in: KCOV_DATAFLOW_file.o := y
- Optional global: CONFIG_KCOV_DATAFLOW_INSTRUMENT_ALL
- Compiler flags: -fsanitize-coverage=trace-args,trace-ret
  (Kconfig uses cc-option to verify compiler support)

CI results:

  https://github.com/yskzalloc/kcov-dataflow/actions

Performance
===========

Per-module instrumentation (recording active):
  +8.3% on instrumented paths, ~27ns per callback

Global instrumentation (INSTRUMENT_ALL, recording disabled):
  .text: +9.5%, .data: +44%, boot: +71%, syscall latency: +133%

Prerequisites
=============

Requires custom LLVM/Clang with trace-args/trace-ret passes:

  git clone --recursive --depth 1 --shallow-submodules \
    --jobs $(nproc) https://github.com/yskzalloc/kcov-dataflow.git
  cd kcov-dataflow

  cd llvm-project
  cmake -S llvm -B build -G Ninja \
    -DCMAKE_BUILD_TYPE=Release \
    -DCMAKE_C_COMPILER=clang \
    -DCMAKE_CXX_COMPILER=clang++ \
    -DLLVM_ENABLE_LLD=ON \
    -DLLVM_ENABLE_PROJECTS="clang;lld" \
    -DLLVM_TARGETS_TO_BUILD="X86;AArch64"
  ninja -C build
  cd ..

Build and boot kernel (using virtme-ng):

  export PATH=$PWD/llvm-project/build/bin:$PATH
  export RUSTC=$PWD/rust/build/x86_64-unknown-linux-gnu/stage1/bin/rustc
  export RUST_LIB_SRC=$PWD/rust/library
  cd linux
  vng --build \
    --configitem CONFIG_KCOV=y \
    --configitem CONFIG_KCOV_DATAFLOW_ARGS=y \
    --configitem CONFIG_KCOV_DATAFLOW_RET=y \
    --configitem CONFIG_KCOV_DATAFLOW_INSTRUMENT_ALL=y \
    --configitem CONFIG_DEBUG_INFO=y \
    --configitem CONFIG_RUST=y \ # For rust kernel tracking
    LLVM=1 CC=clang RUSTC=$RUSTC RUST_LIB_SRC=$RUST_LIB_SRC

Or without virtme-ng:

  cd linux
  make LLVM=1 CC=clang defconfig
  scripts/config --enable KCOV \
                 --enable KCOV_DATAFLOW_ARGS \
                 --enable KCOV_DATAFLOW_RET \
                 --enable KCOV_DATAFLOW_INSTRUMENT_ALL \
                 --enable DEBUG_INFO
  make LLVM=1 CC=clang olddefconfig
  make LLVM=1 CC=clang -j$(nproc)

For Rust module support, build rustc against the custom LLVM:

  https://github.com/yskzalloc/rust

Testing
=======

Tested on linux-next 7.1.0-rc6 (next-20260608) with custom clang/LLVM 23
and rustc 1.98-nightly. Verified on both x86_64 and arm64:

- user_ioctl: 9/9 tests pass (ioctl interface correctness: init, mmap,
  enable/disable, double-enable rejection, buffer capture verification)
- eight_args_c: nested call tree with df_func2..8 + struct (65 context records)
- eight_args_rust: nested call tree with rdf_func2..8 + struct (65 context records)
- rust_ffi_contract: detects FFI contract violation where callee returns
  success (0) but leaves buffer=NULL - captured without crash or KASAN
- binderfs: exercises binder driver via binderfs ioctls (BINDER_VERSION,
  BINDER_SET_MAX_THREADS) with kcov_dataflow recording active, verifies
  argument records captured at binder ioctl boundaries

Links
=====

[1] LLVM RFC: https://discourse.llvm.org/t/rfc-sanitizercoverage-add-fsanitize-coverage-trace-args-trace-ret/91026
[2] LLVM PR: https://github.com/llvm/llvm-project/pull/201410
[3] Repository: https://github.com/yskzalloc/kcov-dataflow
[4] Paper: https://arxiv.org/pdf/2606.00455

---
Change log:

Changes since v1 (https://lore.kernel.org/all/20260603-kcov-dataflow-next-20260603-v2-0-fee0939de2c4@est.tech/):
- Separate from /sys/kernel/debug/kcov (own device, own ioctl namespace)
- Rename internal symbols to avoid collision with existing kcov
- Add CONFIG_KCOV_DATAFLOW_INSTRUMENT_ALL for whole-kernel capture
- Fix INIT_TRACK race, fork cleanup, task exit cleanup
- Add recursion guard barriers
- Reject concurrent enable on multiple fds
- Move from tools to kselftest adding:
  user_ioctl, eight_args_c, eight_args_rust, rust_ffi_contract, binderfs_test
- Separate patch regarding kcov-dataflow Documentation

To: Ingo Molnar <mingo@redhat.com>
To: Peter Zijlstra <peterz@infradead.org>
To: Juri Lelli <juri.lelli@redhat.com>
To: Vincent Guittot <vincent.guittot@linaro.org>
To: Dietmar Eggemann <dietmar.eggemann@arm.com>
To: Steven Rostedt <rostedt@goodmis.org>
To: Ben Segall <bsegall@google.com>
To: Mel Gorman <mgorman@suse.de>
To: Valentin Schneider <vschneid@redhat.com>
To: K Prateek Nayak <kprateek.nayak@amd.com>
To: Andrey Konovalov <andreyknvl@gmail.com>
To: Alexander Potapenko <glider@google.com>
To: Dmitry Vyukov <dvyukov@google.com>
To: Andrew Morton <akpm@linux-foundation.org>
To: Miguel Ojeda <ojeda@kernel.org>
To: Boqun Feng <boqun@kernel.org>
To: Gary Guo <gary@garyguo.net>
To: Björn Roy Baron <bjorn3_gh@protonmail.com>
To: Benno Lossin <lossin@kernel.org>
To: Andreas Hindborg <a.hindborg@kernel.org>
To: Alice Ryhl <aliceryhl@google.com>
To: Trevor Gross <tmgross@umich.edu>
To: Danilo Krummrich <dakr@kernel.org>
To: Nathan Chancellor <nathan@kernel.org>
To: Nicolas Schier <nsc@kernel.org>
To: Nick Desaulniers <nick.desaulniers+lkml@gmail.com>
To: Bill Wendling <morbo@google.com>
To: Justin Stitt <justinstitt@google.com>
To: Kees Cook <kees@kernel.org>
To: David Hildenbrand <david@kernel.org>
To: Lorenzo Stoakes <ljs@kernel.org>
To: "Liam R. Howlett" <liam@infradead.org>
To: Vlastimil Babka <vbabka@kernel.org>
To: Mike Rapoport <rppt@kernel.org>
To: Suren Baghdasaryan <surenb@google.com>
To: Michal Hocko <mhocko@suse.com>
To: Shuah Khan <shuah@kernel.org>
To: Jonathan Corbet <corbet@lwn.net>
To: Shuah Khan <skhan@linuxfoundation.org>
Cc: linux-kernel@vger.kernel.org
Cc: kasan-dev@googlegroups.com
Cc: rust-for-linux@vger.kernel.org
Cc: linux-kbuild@vger.kernel.org
Cc: llvm@lists.linux.dev
Cc: linux-mm@kvack.org
Cc: linux-kselftest@vger.kernel.org
Cc: workflows@vger.kernel.org
Cc: linux-doc@vger.kernel.org

---
Yunseong Kim (14):
      kcov: add per-task dataflow tracking for function arguments/return values
      kcov: fix INIT_TRACK race in kcov_dataflow
      kcov: add barriers to recursion guard in kcov_df_write
      kcov: reject enable on multiple dataflow fds simultaneously
      kcov: clear dataflow fields on fork
      kcov: clean up dataflow state on task exit
      kcov: exclude kcov_dataflow.o from sanitizer instrumentation
      selftests/kcov_dataflow: add trigger-view.py
      selftests/kcov_dataflow: add ioctl interface selftest
      selftests/kcov_dataflow: add eight_args_c test module
      selftests/kcov_dataflow: add eight_args_rust test module
      selftests/kcov_dataflow: add rust_ffi_contract test module
      selftests/kcov_dataflow: add binderfs ioctl capture test
      Documentation: add kcov-dataflow.rst

 Documentation/dev-tools/index.rst                  |   1 +
 Documentation/dev-tools/kcov-dataflow.rst          | 321 ++++++++++++++++++
 include/linux/kcov.h                               |   8 +
 include/linux/sched.h                              |  10 +
 kernel/Makefile                                    |   9 +
 kernel/exit.c                                      |   1 +
 kernel/fork.c                                      |   1 +
 kernel/kcov.c                                      |   2 +
 kernel/kcov_dataflow.c                             | 356 +++++++++++++++++++
 lib/Kconfig.debug                                  |  43 +++
 rust/kernel/str.rs                                 |   2 +-
 scripts/Makefile.kcov                              |  12 +
 scripts/Makefile.lib                               |   9 +
 tools/testing/selftests/kcov_dataflow/.gitignore   |   9 +
 tools/testing/selftests/kcov_dataflow/Makefile     |   4 +
 tools/testing/selftests/kcov_dataflow/README.rst   |  58 ++++
 .../selftests/kcov_dataflow/binderfs/Makefile      |   4 +
 .../kcov_dataflow/binderfs/binderfs_test.c         | 177 ++++++++++
 .../selftests/kcov_dataflow/eight_args_c/Makefile  |   3 +
 .../kcov_dataflow/eight_args_c/eight_args_c.c      |  95 ++++++
 .../kcov_dataflow/eight_args_rust/Makefile         |   3 +
 .../eight_args_rust/eight_args_rust.rs             | 143 ++++++++
 .../selftests/kcov_dataflow/run_binderfs.sh        |  13 +
 .../selftests/kcov_dataflow/run_eight_args_c.sh    |  35 ++
 .../selftests/kcov_dataflow/run_eight_args_rust.sh |  35 ++
 .../kcov_dataflow/run_rust_ffi_contract.sh         |  35 ++
 .../kcov_dataflow/rust_ffi_contract/Makefile       |   3 +
 .../rust_ffi_contract/rust_ffi_contract.c          | 111 ++++++
 .../selftests/kcov_dataflow/trigger-view.py        | 377 +++++++++++++++++++++
 .../kcov_dataflow/user_ioctl/user_ioctl.c          | 156 +++++++++
 30 files changed, 2035 insertions(+), 1 deletion(-)
---
base-commit: a87737435cfa134f9cdcc696ba3080759d04cf72
change-id: 20260611-b4-kcov-dataflow-v2-3ccff828eb31

Best regards,
--  
Yunseong Kim <yunseong.kim@est.tech>


^ permalink raw reply

* Re: [PATCH v2 0/4] mm: split the file's i_mmap tree for NUMA
From: Lorenzo Stoakes @ 2026-06-11 16:00 UTC (permalink / raw)
  To: Huang Shijie
  Cc: akpm, viro, brauner, jack, muchun.song, osalvador, david, surenb,
	mjguzik, liam, vbabka, shakeel.butt, rppt, mhocko, corbet, skhan,
	linux, dinguyen, schuster.simon, James.Bottomley, deller, djbw,
	willy, peterz, mingo, acme, namhyung, mark.rutland,
	alexander.shishkin, jolsa, irogers, adrian.hunter, james.clark,
	mhiramat, oleg, ziy, baolin.wang, npache, ryan.roberts, dev.jain,
	baohua, lance.yang, linmiaohe, nao.horiguchi, jannh, pfalcato,
	riel, harry, will, brian.ruley, rmk+kernel, dave.anglin, linux-mm,
	linux-doc, linux-kernel, linux-arm-kernel, linux-parisc,
	linux-fsdevel, nvdimm, linux-perf-users, linux-trace-kernel,
	zhongyuan, fangbaoshun, yingzhiwei
In-Reply-To: <20260611061915.2354307-1-huangsj@hygon.cn>

Hi Huang,

You seem to be replacing the file rmap altogether here, so you really ought
to have sent this as an RFC so we could discuss it as a community first.

Especially so as Pedro had publicly mentioned his plans to implement
something similar here, so coordination would have been appreciated.

Anyway, as Pedro has pointed out, the code is overly complicated, it's far
too configurable (not always a good thing), and the locking implementation
is questionable.

You seem to be adding a whole bunch of open-coded complexity too, which is
not something we want. Abstraction is key for the rmap.

You're also not adding any kdoc comments or really many comments at all,
and you've not added any tests (though perhaps it's difficult given how
core this is).

So I would suggest that perhaps any respin should be sent as an RFC so we
can engage in that conversation and ensure we're all on the same page?

Especially since Pedro plans to send an alternative, simpler, solution I
believe.

It's also not helpful that you haven't examined the non-NUMA case :)
perhaps your particular server behaves a certain way that this approach
aids, but regresses other NUMA configurations?

We'd really need to be sure of this before accepting invasive changes like
this.

Thanks, Lorenzo

On Thu, Jun 11, 2026 at 02:18:56PM +0800, Huang Shijie wrote:
>   In NUMA, there are maybe many NUMA nodes and many CPUs.
> For example, a Hygon's server has 12 NUMA nodes, and 384 CPUs.
> In the UnixBench tests, there is a test "execl" which tests
> the execve system call.
>
>   When we test our server with "./Run -c 384 execl",
> the test result is not good enough. The i_mmap locks contended heavily on
> "libc.so" and "ld.so". For example, the i_mmap tree for "libc.so" can have
> over 6000 VMAs, all the VMAs can be in different NUMA mode.
> The insert/remove operations do not run quickly enough.

You really need to send detailed, statistically valid numbers across
different NUMA configurations for changes like this to be considered.

>
> patch 1 & patch 2 are try to hide the direct access of i_mmap.
> patch 3 splits the i_mmap into sibling trees, each tree has separate lock,
> and we can get better performance with this patch set in our NUMA server:
>     we can get over 400% performance improvement.
>
> I did not test the non-NUMA case, since I do not have such server.

Yeah this isn't a great thing to hear :) you need to demonstrate this
doesn't regress non-NUMA machines or NUMA machines of a different
configuration.

>
> v1 --> v2:
> 	Not only split the immap tree, but also split the lock.
> 	v1 : https://lkml.org/lkml/2026/4/13/199
>
> Huang Shijie (4):
>   mm: use mapping_mapped to simplify the code
>   mm: use get_i_mmap_root to access the file's i_mmap
>   mm/fs: split the file's i_mmap tree
>   docs/mm: update document for split i_mmap tree
>
>  Documentation/mm/process_addrs.rst |  63 +++++++---
>  arch/arm/mm/fault-armv.c           |   3 +-
>  arch/arm/mm/flush.c                |   3 +-
>  arch/nios2/mm/cacheflush.c         |   3 +-
>  arch/parisc/kernel/cache.c         |   4 +-
>  fs/Kconfig                         |   8 ++
>  fs/dax.c                           |   3 +-
>  fs/hugetlbfs/inode.c               |  30 +++--
>  fs/inode.c                         |  75 +++++++++++-
>  include/linux/fs.h                 | 179 ++++++++++++++++++++++++++++-
>  include/linux/mm.h                 |  81 +++++++++++++
>  include/linux/mm_types.h           |   3 +
>  kernel/events/uprobes.c            |   3 +-
>  mm/hugetlb.c                       |   7 +-
>  mm/internal.h                      |   3 +-
>  mm/khugepaged.c                    |   6 +-
>  mm/memory-failure.c                |   8 +-
>  mm/memory.c                        |   8 +-
>  mm/mmap.c                          |  11 +-
>  mm/nommu.c                         |  28 +++--
>  mm/pagewalk.c                      |   4 +-
>  mm/rmap.c                          |   2 +-
>  mm/vma.c                           |  74 +++++++++---
>  mm/vma_init.c                      |   3 +
>  24 files changed, 534 insertions(+), 78 deletions(-)

This is a _lot_ of changes you're making here. It therefore feels like the
abstraction is broken somewhat?

>
> --
> 2.53.0
>
>

Thanks, Lorenzo

^ permalink raw reply

* Re: [PATCH v3] arm64: errata: Workaround NVIDIA Olympus device store/load ordering erratum
From: Shanker Donthineni @ 2026-06-11 16:00 UTC (permalink / raw)
  To: Vladimir Murzin, Will Deacon
  Cc: Catalin Marinas, Jason Gunthorpe, linux-arm-kernel, Mark Rutland,
	linux-kernel, linux-doc, Vikram Sethi, Jason Sequeira
In-Reply-To: <aee00047-81b9-4562-be47-500b2643f7f6@arm.com>

Hi Vladimir,

On 6/11/2026 10:08 AM, Vladimir Murzin wrote:
> External email: Use caution opening links or attachments
>
>
> Hi,
>
> On 6/11/26 14:34, Will Deacon wrote:
>> On Wed, Jun 10, 2026 at 11:48:22AM -0500, Shanker Donthineni wrote:
>>> On systems with NVIDIA Olympus cores, a Device-nGnR* load can be
>>> observed by a peripheral before an older, non-overlapping Device-nGnR*
>>> store to the same peripheral. This breaks the program-order guarantee
>>> that software expects for Device-nGnR* accesses and can leave a
>>> peripheral in an incorrect state, as a load is observed before an
>>> earlier store takes effect.
>>>
>>> The erratum can occur only when all of the following apply:
>>>
>>>    - A PE executes a Device-nGnR* store followed by a younger
>>>      Device-nGnR* load.
>>>    - The store is not a store-release.
>>>    - The accesses target the same peripheral and do not overlap in bytes.
>>>    - There is at most one intervening Device-nGnR* store in program
>>>      order, and there are no intervening Device-nGnR* loads.
>>>    - There is no DSB, and no DMB that orders loads, between the store and
>>>      the load.
>>>    - Specific micro-architectural and timing conditions occur.
>>>
>>> Promote the raw MMIO store helpers (__raw_writeb/w/l/q) from plain str*
>>> to stlr* (Store-Release), which removes the "store is not a
>>> store-release" condition for every device write the kernel issues.
>>> Because writel() and writel_relaxed() are both built on __raw_writel()
>>> in asm-generic/io.h, patching the raw variants covers both the
>>> non-relaxed and relaxed APIs without touching the higher layers. Note
>>> that writel()'s own barrier sits before the store, so it does not order
>>> the store against a subsequent readl(); the store-release promotion is
>>> what provides that ordering.
>>>
>>> Like ARM64_ERRATUM_832075 on the load side, the change is gated on a new
>>> ARM64_WORKAROUND_DEVICE_STORE_RELEASE capability and only activated on
>>> parts that match MIDR_NVIDIA_OLYMPUS, so unaffected CPUs continue to use
>>> the plain str* sequence.
>>>
>>> Note: stlr* only supports base-register addressing, so affected CPUs use
>>> a base-register stlr* path. Unaffected CPUs keep the original
>>> offset-addressed str* sequence introduced by commit d044d6ba6f02
>>> ("arm64: io: permit offset addressing").
>>>
>>> The __const_memcpy_toio_aligned32() and __const_memcpy_toio_aligned64()
>>> helpers are left unchanged. These helpers are intended for
>>> write-combining mappings, which are Normal-NC on arm64. Replacing their
>>> contiguous str* groups would defeat the write-combining behavior used to
>>> improve store performance.
>>>
>>> Co-developed-by: Vikram Sethi <vsethi@nvidia.com>
>>> Signed-off-by: Vikram Sethi <vsethi@nvidia.com>
>>> Signed-off-by: Shanker Donthineni <sdonthineni@nvidia.com>
>>> Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
>>> ---
>>> Changes since v2:
>>>    - Reworked the raw MMIO write helpers so unaffected CPUs keep the
>>>      existing offset-addressed STR sequence, while affected CPUs use the
>>>      base-register STLR path.
>>>    - Updated the commit message to match the code changes.
>>>    - Rebased on top of the arm64 for-next/errata branch:
>>>      https://git.kernel.org/pub/scm/linux/kernel/git/arm64/linux.git/log/?h=for-next/errata
>>>
>>> Changes since v1:
>>>    - Updated the commit message based on feedback from Vladimir Murzin.
>>>
>>>   Documentation/arch/arm64/silicon-errata.rst |  2 ++
>>>   arch/arm64/Kconfig                          | 23 ++++++++++++++++
>>>   arch/arm64/include/asm/io.h                 | 30 +++++++++++++++++++++
>>>   arch/arm64/kernel/cpu_errata.c              |  8 ++++++
>>>   arch/arm64/tools/cpucaps                    |  1 +
>>>   5 files changed, 64 insertions(+)
>>>
>>> diff --git a/Documentation/arch/arm64/silicon-errata.rst b/Documentation/arch/arm64/silicon-errata.rst
>>> index ad09bbb10da80..fc45125dc2f80 100644
>>> --- a/Documentation/arch/arm64/silicon-errata.rst
>>> +++ b/Documentation/arch/arm64/silicon-errata.rst
>>> @@ -298,6 +298,8 @@ stable kernels.
>>>   +----------------+-----------------+-----------------+-----------------------------+
>>>   | NVIDIA         | Carmel Core     | N/A             | NVIDIA_CARMEL_CNP_ERRATUM   |
>>>   +----------------+-----------------+-----------------+-----------------------------+
>>> +| NVIDIA         | Olympus core    | T410-OLY-1027   | NVIDIA_OLYMPUS_1027_ERRATUM |
>>> ++----------------+-----------------+-----------------+-----------------------------+
>>>   | NVIDIA         | Olympus core    | T410-OLY-1029   | ARM64_ERRATUM_4118414       |
>>>   +----------------+-----------------+-----------------+-----------------------------+
>>>   | NVIDIA         | T241 GICv3/4.x  | T241-FABRIC-4   | N/A                         |
>>> diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
>>> index c65cef81be86a..d633eb70de1ac 100644
>>> --- a/arch/arm64/Kconfig
>>> +++ b/arch/arm64/Kconfig
>>> @@ -564,6 +564,29 @@ config ARM64_ERRATUM_832075
>>>
>>>         If unsure, say Y.
>>>
>>> +config NVIDIA_OLYMPUS_1027_ERRATUM
>>> +    bool "NVIDIA Olympus: device store/load ordering erratum"
>>> +    default y
>>> +    help
>>> +      This option adds an alternative code sequence to work around an
>>> +      NVIDIA Olympus core erratum where a Device-nGnR* store can be
>>> +      observed by a peripheral after a younger Device-nGnR* load to the
>>> +      same peripheral. This breaks the program order that drivers rely
>>> +      on for MMIO and can leave a device in an incorrect state.
>>> +
>>> +      The workaround promotes the raw MMIO store helpers
>>> +      (__raw_writeb/w/l/q) to Store-Release (STLR), which restores the
>>> +      required ordering. Because writel() and writel_relaxed() are built
>>> +      on __raw_writel(), both are covered without changes to the higher
>>> +      layers.
>>> +
>>> +      The fix is applied through the alternatives framework, so enabling
>>> +      this option does not by itself activate the workaround: it is
>>> +      patched in only when an affected CPU is detected, and is a no-op on
>>> +      unaffected CPUs.
>>> +
>>> +      If unsure, say Y.
>>> +
>>>   config ARM64_ERRATUM_834220
>>>       bool "Cortex-A57: 834220: Stage 2 translation fault might be incorrectly reported in presence of a Stage 1 fault (rare)"
>>>       depends on KVM
>>> diff --git a/arch/arm64/include/asm/io.h b/arch/arm64/include/asm/io.h
>>> index 8cbd1e96fd50b..801223e754c90 100644
>>> --- a/arch/arm64/include/asm/io.h
>>> +++ b/arch/arm64/include/asm/io.h
>>> @@ -22,10 +22,22 @@
>>>   /*
>>>    * Generic IO read/write.  These perform native-endian accesses.
>>>    */
>>> +static __always_inline bool arm64_needs_device_store_release(void)
>>> +{
>>> +    return alternative_has_cap_unlikely(
>>> +                            ARM64_WORKAROUND_DEVICE_STORE_RELEASE);
>>> +}
>>> +
>>>   #define __raw_writeb __raw_writeb
>>>   static __always_inline void __raw_writeb(u8 val, volatile void __iomem *addr)
>>>   {
>>>       volatile u8 __iomem *ptr = addr;
>>> +
>>> +    if (arm64_needs_device_store_release()) {
>>> +            asm volatile("stlrb %w0, [%1]" : : "rZ" (val), "r" (addr));
>>> +            return;
>>> +    }
>>> +
>>>       asm volatile("strb %w0, %1" : : "rZ" (val), "Qo" (*ptr));
>>>   }
>> Use an 'else' clause instead of the early return? (similarly for the other
>> changes).
> Perhaps I'm missing something, but it is not clear to me why all that
> complexity is required.
>
> IIUC, benefits coming with d044d6ba6f02 ("arm64: io: permit offset
> addressing") are from better code generation, so we:
>   - save code
>   - open opportunity for  write-combining
>
> d044d6ba6f02 ("arm64: io: permit offset addressing") comes with simple
> benchmark to measure effect of code generation:
>
> | void writeq_zero_8_times(void *ptr)
> | {
> |        writeq_relaxed(0, ptr + 8 * 0);
> |        writeq_relaxed(0, ptr + 8 * 1);
> |        writeq_relaxed(0, ptr + 8 * 2);
> |        writeq_relaxed(0, ptr + 8 * 3);
> |        writeq_relaxed(0, ptr + 8 * 4);
> |        writeq_relaxed(0, ptr + 8 * 5);
> |        writeq_relaxed(0, ptr + 8 * 6);
> |        writeq_relaxed(0, ptr + 8 * 7);
> | }
>
> which compiles to
>
> | <writeq_zero_8_times>:
> |        str     xzr, [x0]
> |        str     xzr, [x0, #8]
> |        str     xzr, [x0, #16]
> |        str     xzr, [x0, #24]
> |        str     xzr, [x0, #32]
> |        str     xzr, [x0, #40]
> |        str     xzr, [x0, #48]
> |        str     xzr, [x0, #56]
>
>
> v1/v2 compiles to
>
> | <writeq_zero_8_times>:
> |        str     xzr, [x0]
> |        add     x1, x0, #0x8
> |        str     xzr, [x1]
> |        add     x1, x0, #0x10
> |        str     xzr, [x1]
> |        add     x1, x0, #0x18
> |        str     xzr, [x1]
> |        add     x1, x0, #0x20
> |        str     xzr, [x1]
> |        add     x1, x0, #0x28
> |        str     xzr, [x1]
> |        add     x1, x0, #0x30
> |        str     xzr, [x1]
> |        add     x0, x0, #0x38
> |        str     xzr, [x0]
>
> were alternatives are swapping str with stlr. In other words, we are
> rolling back to the pre-d044d6ba6f02 implementation.
>
> v3 compiles to:
>
> | <writeq_zero_8_times>:
> |        nop
> |        str     xzr, [x0]
> |        add     x1, x0, #0x8
> |        nop
> |        str     xzr, [x1]
> |        add     x1, x0, #0x10
> |        nop
> |        str     xzr, [x1]
> |        add     x1, x0, #0x18
> |        nop
> |        str     xzr, [x1]
> |        add     x1, x0, #0x20
> |        nop
> |        str     xzr, [x1]
> |        add     x1, x0, #0x28
> |        nop
> |        str     xzr, [x1]
> |        add     x1, x0, #0x30
> |        nop
> |        str     xzr, [x1]
> |        add     x0, x0, #0x38
> |        nop
> |        str     xzr, [x0]
> |        ret
>
> where static branch swapping nop with branch to stlr and back to add.
>
> So it looks to me that we're losing an opportunity for write
> combining, but in terms of code size, v1/v2 seems to be the lesser of
> two evils.

Thanks, that makes sense.

My intent with the v3 change was to keep the offset-addressed STR sequence on
unaffected CPUs and use the base-register STLR sequence only on affected CPUs.
However, as you point out, because STLR only supports base-register addressing,
the affected path still forces the address to be materialized in a register, and
the alternative_has_cap_unlikely() check adds another instruction at each write
site. So the generated code no longer preserves the benefit from d044d6ba6f02 in
practice.

Given that, I agree the extra complexity is not justified. I’ll simplify the raw
MMIO write helpers back to the direct ALTERNATIVE() form from v1/v2, where both
the STR and STLR paths use base-register addressing. That is still a regression
from the offset-addressed STR sequence on unaffected CPUs, but it avoids the
additional static-branch/nop overhead and is the smaller of the two options.

-Shanker


^ permalink raw reply

* Re: [PATCH net-next V3 2/7] netdevsim: Register devlink after device init
From: Jakub Kicinski @ 2026-06-11 15:54 UTC (permalink / raw)
  To: Mark Bloch
  Cc: Eric Dumazet, Paolo Abeni, Andrew Lunn, David S. Miller,
	Jonathan Corbet, Shuah Khan, Jiri Pirko, Simon Horman,
	Sunil Goutham, Linu Cherian, Geetha sowjanya, hariprasad,
	Subbaraya Sundeep, Bharat Bhushan, Saeed Mahameed,
	Leon Romanovsky, Tariq Toukan, Ethan Nelson-Moore, linux-doc,
	netdev, linux-rdma
In-Reply-To: <eb525345-da07-414c-9d05-7e00e3eb472f@nvidia.com>

On Thu, 11 Jun 2026 09:02:03 +0300 Mark Bloch wrote:
> On 11/06/2026 2:50, Jakub Kicinski wrote:
> > On Fri, 5 Jun 2026 21:10:25 +0300 Mark Bloch wrote:  
> >> devl_register() makes the devlink instance visible to userspace. A later
> >> patch also makes registration the point where devlink core may call
> >> eswitch_mode_set() to apply a boot-time default eswitch mode.
> >>
> >> Move netdevsim registration after all objects (resources, params, regions,
> >> traps, debugfs etc) are initialized, and after the initial eswitch mode is
> >> set to legacy.
> >>
> >> Move devl_unregister() to the beginning of nsim_drv_remove(), before those
> >> devlink objects are torn down. This keeps devlink register/unregister as
> >> the notification barrier and makes the later object teardown paths run
> >> after devlink is no longer registered, so they do not emit their own
> >> netlink DEL notifications.  
> > 
> > This is going backwards. At some point someone from nVidia thought that
> > we can order our way out of locking, so mlx5 is likely ordered this way,
> > but this must not be required, or in any way normalized.
> > We (syzbot) quickly discovered that it doesn't cover all corner cases.
> > devl_lock() is exposed specifically to allow the driver to finish
> > whatever init it needs without letting user space invoke callbacks, yet.
> > Almost (?) all driver callbacks hold devl_lock(), so maybe the devlink
> > instance is "visible" to user space but that should not matter.  
> 
> Let me clarify.
> 
> No locking is changed here, and I don't want to make register/unregister
> ordering a substitute for devl_lock().
> 
> The only requirement I have for this series is that devl_register() is called
> only once the driver is ready for devlink core to call eswitch_mode_set().
> That follows from the earlier direction to have the core apply the default
> mode from devl_register() instead of adding an explicit driver call.

This is exactly what I'm objecting to. AFAIU we are trading off
explicit call to get the default value for an implicit behavior
depending on order of calls. We want to optimize for how easy it
is to get the API wrong, not for LoC.

If we don't have a clean way to implement this without driver
changes let's add the explicit API to get the default value.
If driver doesn't call it schedule a work to go via the callback
once devl_lock() is dropped. That way drivers which care can optimize
themselves by reading the default value upfront. Drivers which don't 
care will work correctly, and there's no API call order trap.

Not ideal, but isn't that best we can do here?
I still have flashbacks of the fallout from the call ordering games, 
we have too many drivers to keep this straight...

> So if the objection is to the commit message wording, I can fix that and drop
> the "notification barrier" language.
> 
> For unregister, I can probably leave the old ordering as-is. I moved it only
> to mirror the register path, which felt cleaner, but it is not required for
> the default-mode change and as the lock is held I see no issue with doing
> that.

^ permalink raw reply

* Re: [PATCH v2 1/4] mm: use mapping_mapped to simplify the code
From: Lorenzo Stoakes @ 2026-06-11 15:52 UTC (permalink / raw)
  To: Huang Shijie
  Cc: akpm, viro, brauner, jack, muchun.song, osalvador, david, surenb,
	mjguzik, liam, vbabka, shakeel.butt, rppt, mhocko, corbet, skhan,
	linux, dinguyen, schuster.simon, James.Bottomley, deller, djbw,
	willy, peterz, mingo, acme, namhyung, mark.rutland,
	alexander.shishkin, jolsa, irogers, adrian.hunter, james.clark,
	mhiramat, oleg, ziy, baolin.wang, npache, ryan.roberts, dev.jain,
	baohua, lance.yang, linmiaohe, nao.horiguchi, jannh, pfalcato,
	riel, harry, will, brian.ruley, rmk+kernel, dave.anglin, linux-mm,
	linux-doc, linux-kernel, linux-arm-kernel, linux-parisc,
	linux-fsdevel, nvdimm, linux-perf-users, linux-trace-kernel,
	zhongyuan, fangbaoshun, yingzhiwei
In-Reply-To: <20260611061915.2354307-2-huangsj@hygon.cn>

On Thu, Jun 11, 2026 at 02:18:57PM +0800, Huang Shijie wrote:
> Use mapping_mapped() to simplify the code, make
> the code tidy and clean.
>
> Signed-off-by: Huang Shijie <huangsj@hygon.cn>

Yeah as Pedro said this one could just be sent separately, and I in fact
suggest you do that :) So:

Reviewed-by: Lorenzo Stoakes <ljs@kernel.org>

Cheers, Lorenzo

> ---
>  fs/hugetlbfs/inode.c | 4 ++--
>  mm/memory.c          | 4 ++--
>  2 files changed, 4 insertions(+), 4 deletions(-)
>
> diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
> index 78d61bf2bd9b..216e1a0dd0b2 100644
> --- a/fs/hugetlbfs/inode.c
> +++ b/fs/hugetlbfs/inode.c
> @@ -614,7 +614,7 @@ static void hugetlb_vmtruncate(struct inode *inode, loff_t offset)
>
>  	i_size_write(inode, offset);
>  	i_mmap_lock_write(mapping);
> -	if (!RB_EMPTY_ROOT(&mapping->i_mmap.rb_root))
> +	if (mapping_mapped(mapping))
>  		hugetlb_vmdelete_list(&mapping->i_mmap, pgoff, 0,
>  				      ZAP_FLAG_DROP_MARKER);
>  	i_mmap_unlock_write(mapping);
> @@ -675,7 +675,7 @@ static long hugetlbfs_punch_hole(struct inode *inode, loff_t offset, loff_t len)
>
>  	/* Unmap users of full pages in the hole. */
>  	if (hole_end > hole_start) {
> -		if (!RB_EMPTY_ROOT(&mapping->i_mmap.rb_root))
> +		if (mapping_mapped(mapping))
>  			hugetlb_vmdelete_list(&mapping->i_mmap,
>  					      hole_start >> PAGE_SHIFT,
>  					      hole_end >> PAGE_SHIFT, 0);
> diff --git a/mm/memory.c b/mm/memory.c
> index 86a973119bd4..5335077765e2 100644
> --- a/mm/memory.c
> +++ b/mm/memory.c
> @@ -4386,7 +4386,7 @@ void unmap_mapping_folio(struct folio *folio)
>  	details.zap_flags = ZAP_FLAG_DROP_MARKER;
>
>  	i_mmap_lock_read(mapping);
> -	if (unlikely(!RB_EMPTY_ROOT(&mapping->i_mmap.rb_root)))
> +	if (unlikely(mapping_mapped(mapping)))
>  		unmap_mapping_range_tree(&mapping->i_mmap, first_index,
>  					 last_index, &details);
>  	i_mmap_unlock_read(mapping);
> @@ -4416,7 +4416,7 @@ void unmap_mapping_pages(struct address_space *mapping, pgoff_t start,
>  		last_index = ULONG_MAX;
>
>  	i_mmap_lock_read(mapping);
> -	if (unlikely(!RB_EMPTY_ROOT(&mapping->i_mmap.rb_root)))
> +	if (unlikely(mapping_mapped(mapping)))
>  		unmap_mapping_range_tree(&mapping->i_mmap, first_index,
>  					 last_index, &details);
>  	i_mmap_unlock_read(mapping);
> --
> 2.53.0
>
>

^ permalink raw reply

* Re: [PATCH net-next v4 0/3] Add standard stats for HSR/PRP
From: Andrew Lunn @ 2026-06-11 15:50 UTC (permalink / raw)
  To: MD Danish Anwar
  Cc: David S. Miller, Eric Dumazet, Jakub Kicinski, Paolo Abeni,
	Simon Horman, Jonathan Corbet, Shuah Khan, Roger Quadros,
	Andrew Lunn, Meghana Malladi, Jacob Keller, David Carlier,
	Vadim Fedorenko, Kevin Hao, Markus Elfring, Hangbin Liu,
	Fernando Fernandez Mancera, Jan Vaclav, netdev, linux-doc,
	linux-kernel, linux-arm-kernel, Felix Maurer, Luka Gejak
In-Reply-To: <20260611095035.852370-1-danishanwar@ti.com>

On Thu, Jun 11, 2026 at 03:20:32PM +0530, MD Danish Anwar wrote:
> Add standard stats for HSR / PRP. This series was initially adding HSR/PRP
> related stats for ICSSG driver. Based on maintainers' comments on v2 I am
> now adding support to dump standard stats for HSR/PRP.
> 
> The drivers which support offload can populate these standard stats.
> 
> This series only implements offloaded stats. For software-only interfaces
> Felix Maurer had said he will do it later [1]

That is ideally the wrong way around. Offloading it used to accelerate
what Linux can already do in software. Statistics should be part of
this, you first define software statistics, and then get the hardware
to report those.

So please get the software statistics merged first.

   Andrew

^ permalink raw reply

* Re: [PATCH v2 3/4] mm/fs: split the file's i_mmap tree
From: Lorenzo Stoakes @ 2026-06-11 15:48 UTC (permalink / raw)
  To: Huang Shijie
  Cc: Pedro Falcato, akpm, viro, brauner, jack, muchun.song, osalvador,
	david, surenb, mjguzik, liam, vbabka, shakeel.butt, rppt, mhocko,
	corbet, skhan, linux, dinguyen, schuster.simon, James.Bottomley,
	deller, djbw, willy, peterz, mingo, acme, namhyung, mark.rutland,
	alexander.shishkin, jolsa, irogers, adrian.hunter, james.clark,
	mhiramat, oleg, ziy, baolin.wang, npache, ryan.roberts, dev.jain,
	baohua, lance.yang, linmiaohe, nao.horiguchi, jannh, riel, harry,
	will, brian.ruley, rmk+kernel, dave.anglin, linux-mm, linux-doc,
	linux-kernel, linux-arm-kernel, linux-parisc, linux-fsdevel,
	nvdimm, linux-perf-users, linux-trace-kernel, zhongyuan,
	fangbaoshun, yingzhiwei
In-Reply-To: <aiqFgGbIo1Psy3pI@pedro-suse.lan>

On Thu, Jun 11, 2026 at 12:11:27PM +0100, Pedro Falcato wrote:
> Hi,
>
> On Thu, Jun 11, 2026 at 02:18:59PM +0800, Huang Shijie wrote:
> > In the UnixBench tests, there is a test "execl" which tests
> > the execve system call.
> >   For example, a Hygon's server has 12 NUMA nodes, and 384 CPUs.
> > When we test our server with "./Run -c 384 execl",
> > the test result is not good enough. The i_mmap locks contended heavily on
> > "libc.so" and "ld.so". The i_mmap tree for "libc.so" can be
> > over 6000 VMAs, all the VMAs can be in different NUMA mode. The insert/remove
> > operations do not run quickly enough.
>
> I _really_ would have appreciated some coordination here, because I said I was
> going to take a look at it. I have something that I think is much simpler

Agreed, this is the second (or in fact third?) time in recent weeks that
I'm aware of where publicly discussed work has been duplicated with a
series that came in later.

It's really important, when doing work that impact core stuff to have a
look around and see if others are looking at it, as there's nothing more
frustrating than to work on something, discuss it publicly, only to find
somebody sends a competing series.

It can be tricky, as sometimes it's not obvious, or it might not be so
easily found, but I would strongly suggest always making an effort on that
front.

But you didn't even try to send this as an RFC either :)

> in practice. These patches are also way too complex to be dropped just before
> the merge window.

This late in the cycle means -> next cycle. So you'd have needed to resend
it at rc1 in a couple weeks anyway.

>
> Some comments:
>
> >
> >  In order to reduce the competition of the i_mmap lock, this patch does
> > following:
> >    1.) Split the single i_mmap tree into several sibling trees:
> >        Each tree has a lock. The CONFIG_SPLIT_I_MMAP is used to
> >        turn on/off this feature.
>
> There is no need for a config option. This needs to Just Work.

Yeah, this is just a no-go. We don't add config options for changes to core
rmap code.

>
> >    2.) Introduce a new field "tree_idx" for vm_area_struct to save the
> >        sibling tree index for this VMA.
>
> This is possibly contentious, but there are holes in vm_area_struct.
> So I think this is fine.

Yeah no thanks for the extra field, I already have plans for those gaps in
vm_area_struct.

I am in fact writing code right now that uses them...

>
> >    3.) Introduce a new field "vma_count" for address_space.
> >        The new mapping_mapped() will use it.
> >    4.) Rewrite the vma_interval_tree_foreach()

I also intend to send a series that does a bunch of changes in the rmap
code that this would conflict with.

So let's all coordinate please.

> >    5.) Rewrite the lock functions.

Yeah looping on file rmap lock/unlock is gross.

> >
> >  After this patch, the VMA insert/remove operations will work faster,
> > and we can get over 400% performance improvement with the above test.
> >
> > Signed-off-by: Huang Shijie <huangsj@hygon.cn>

I had a look through and this code is really overwrought and you're putting
a bunch of confusing open-coded all over the codebase without comments.

This isn't upstreamable quality and you really should have sent this as an
RFC first so we could discuss the approach.

Thanks, Lorenzo

^ permalink raw reply

* Re: [PATCH v7 00/42] guest_memfd: In-place conversion support
From: Sean Christopherson @ 2026-06-11 15:46 UTC (permalink / raw)
  To: Ackerley Tng
  Cc: Ackerley Tng via B4 Relay, aik, andrew.jones, binbin.wu, brauner,
	chao.p.peng, david, ira.weiny, jmattson, jthoughton, michael.roth,
	oupton, pankaj.gupta, qperret, rick.p.edgecombe, rientjes,
	shivankg, steven.price, tabba, willy, wyihan, yan.y.zhao,
	forkloop, pratyush, suzuki.poulose, aneesh.kumar, liam,
	Paolo Bonzini, Thomas Gleixner, Ingo Molnar, Borislav Petkov,
	Dave Hansen, x86, H. Peter Anvin, Steven Rostedt,
	Masami Hiramatsu, Mathieu Desnoyers, Jonathan Corbet, Shuah Khan,
	Shuah Khan, Vishal Annapurve, Andrew Morton, Chris Li,
	Kairui Song, Kemeng Shi, Nhat Pham, Baoquan He, Barry Song,
	Axel Rasmussen, Yuanchu Xie, Wei Xu, Youngjun Park, Qi Zheng,
	Shakeel Butt, Kiryl Shutsemau, Jason Gunthorpe, Vlastimil Babka,
	kvm, linux-kernel, linux-trace-kernel, linux-doc, linux-kselftest,
	linux-mm, linux-coco
In-Reply-To: <CAEvNRgF31BzyFyVUa7tDJ=qJ-8ws2kxfNjLxmV=OxKSqhaOiPw@mail.gmail.com>

On Wed, Jun 10, 2026, Ackerley Tng wrote:
> Sean Christopherson <seanjc@google.com> writes:
> 
> > On Thu, Jun 04, 2026, Ackerley Tng wrote:
> >> Sean Christopherson <seanjc@google.com> writes:
> >> >> + KVM: selftests: Test conversion with elevated page refcount
> >> >>     + Askar pointed out that soon vmsplice may not pin pages. Should I
> >> >>       pin pages through CONFIG_GUP_TEST like in [2]? I prefer not to
> >> >>       take a dependency on CONFIG_GUP_TEST.
> >> >
> >> > I'm not exactly excited about taking a dependency on CONFIG_GUP_TEST either, but
> >> > it probably is the least awful choice.  E.g. KVM also pins pages is certain flows,
> >> > but we're _also_ actively working to remove the need to pin.
> >> >
> >> > Hmm, maybe IORING_REGISTER_PBUF_RING?  AFAICT, it's almost literally a "pin user
> >> > memory" syscall.
> >> >
> >>
> >> Hmm that takes a dependency on io_uring, which isn't always compiled
> >> in. Between CONFIG_IO_URING and CONFIG_GUP_TEST, I'd rather
> >> CONFIG_GUP_TEST.
> >
> > Or try both?  If it's not a ridiculous amount of work.
> 
> CONFIG_GUP_TEST was tried in [1]
> 
> [1] https://lore.kernel.org/all/baa8838f623102931e755cf34c86314b305af49c.1747264138.git.ackerleytng@google.com/
> 
> It looks like this
> 
>   static void pin_pages(void *vaddr, uint64_t size)
>   {
>   	const struct pin_longterm_test args = {
>   		.addr = (uint64_t)vaddr,
>   		.size = size,
>   		.flags = PIN_LONGTERM_TEST_FLAG_USE_WRITE,
>   	};
> 
>   	gup_test_fd = open("/sys/kernel/debug/gup_test", O_RDWR);
>   	TEST_REQUIRE(gup_test_fd > 0);

Use __open_path_or_exit().  I also think it makes sent to make these available
to all KVM selftests, there are probably other testcases that could utilize page
pinning.

>   	TEST_ASSERT_EQ(ioctl(gup_test_fd, PIN_LONGTERM_TEST_START, &args), 0);
>   }
> 
>   static void unpin_pages(void)
>   {
>   	TEST_ASSERT_EQ(ioctl(gup_test_fd, PIN_LONGTERM_TEST_STOP), 0);
>   }
> 
> So in the test I'll call pin_pages(), then try to convert, see that it
> fails with EAGAIN and reports the expected error_offset, then I call
> unpin_pages(), then I convert again and expect success.
> 
> Are you uncomfortable with the CONFIG_GUP_TEST interface?

No, my concern is/was the potential for leaking pages if the test fails/crashes,
but it looks gup_test_release() ensures all pins are dropped when the file is
released, so that should be a non-issue.

> What would you like me to try with CONFIG_IO_URING? I'm thinking that the
> main difference between the two is just down to which non-default CONFIG
> option we want to take for guest_memfd tests.

^ permalink raw reply

* Re: [PATCH v3] arm64: errata: Workaround NVIDIA Olympus device store/load ordering erratum
From: Vladimir Murzin @ 2026-06-11 15:08 UTC (permalink / raw)
  To: Will Deacon, Shanker Donthineni
  Cc: Catalin Marinas, Jason Gunthorpe, linux-arm-kernel, Mark Rutland,
	linux-kernel, linux-doc, Vikram Sethi, Jason Sequeira
In-Reply-To: <aiq5VigmtZq9GlAm@willie-the-truck>

Hi,

On 6/11/26 14:34, Will Deacon wrote:
> On Wed, Jun 10, 2026 at 11:48:22AM -0500, Shanker Donthineni wrote:
>> On systems with NVIDIA Olympus cores, a Device-nGnR* load can be
>> observed by a peripheral before an older, non-overlapping Device-nGnR*
>> store to the same peripheral. This breaks the program-order guarantee
>> that software expects for Device-nGnR* accesses and can leave a
>> peripheral in an incorrect state, as a load is observed before an
>> earlier store takes effect.
>>
>> The erratum can occur only when all of the following apply:
>>
>>   - A PE executes a Device-nGnR* store followed by a younger
>>     Device-nGnR* load.
>>   - The store is not a store-release.
>>   - The accesses target the same peripheral and do not overlap in bytes.
>>   - There is at most one intervening Device-nGnR* store in program
>>     order, and there are no intervening Device-nGnR* loads.
>>   - There is no DSB, and no DMB that orders loads, between the store and
>>     the load.
>>   - Specific micro-architectural and timing conditions occur.
>>
>> Promote the raw MMIO store helpers (__raw_writeb/w/l/q) from plain str*
>> to stlr* (Store-Release), which removes the "store is not a
>> store-release" condition for every device write the kernel issues.
>> Because writel() and writel_relaxed() are both built on __raw_writel()
>> in asm-generic/io.h, patching the raw variants covers both the
>> non-relaxed and relaxed APIs without touching the higher layers. Note
>> that writel()'s own barrier sits before the store, so it does not order
>> the store against a subsequent readl(); the store-release promotion is
>> what provides that ordering.
>>
>> Like ARM64_ERRATUM_832075 on the load side, the change is gated on a new
>> ARM64_WORKAROUND_DEVICE_STORE_RELEASE capability and only activated on
>> parts that match MIDR_NVIDIA_OLYMPUS, so unaffected CPUs continue to use
>> the plain str* sequence.
>>
>> Note: stlr* only supports base-register addressing, so affected CPUs use
>> a base-register stlr* path. Unaffected CPUs keep the original
>> offset-addressed str* sequence introduced by commit d044d6ba6f02
>> ("arm64: io: permit offset addressing").
>>
>> The __const_memcpy_toio_aligned32() and __const_memcpy_toio_aligned64()
>> helpers are left unchanged. These helpers are intended for
>> write-combining mappings, which are Normal-NC on arm64. Replacing their
>> contiguous str* groups would defeat the write-combining behavior used to
>> improve store performance.
>>
>> Co-developed-by: Vikram Sethi <vsethi@nvidia.com>
>> Signed-off-by: Vikram Sethi <vsethi@nvidia.com>
>> Signed-off-by: Shanker Donthineni <sdonthineni@nvidia.com>
>> Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
>> ---
>> Changes since v2:
>>   - Reworked the raw MMIO write helpers so unaffected CPUs keep the
>>     existing offset-addressed STR sequence, while affected CPUs use the
>>     base-register STLR path.
>>   - Updated the commit message to match the code changes.
>>   - Rebased on top of the arm64 for-next/errata branch:
>>     https://git.kernel.org/pub/scm/linux/kernel/git/arm64/linux.git/log/?h=for-next/errata
>>
>> Changes since v1:
>>   - Updated the commit message based on feedback from Vladimir Murzin.
>>
>>  Documentation/arch/arm64/silicon-errata.rst |  2 ++
>>  arch/arm64/Kconfig                          | 23 ++++++++++++++++
>>  arch/arm64/include/asm/io.h                 | 30 +++++++++++++++++++++
>>  arch/arm64/kernel/cpu_errata.c              |  8 ++++++
>>  arch/arm64/tools/cpucaps                    |  1 +
>>  5 files changed, 64 insertions(+)
>>
>> diff --git a/Documentation/arch/arm64/silicon-errata.rst b/Documentation/arch/arm64/silicon-errata.rst
>> index ad09bbb10da80..fc45125dc2f80 100644
>> --- a/Documentation/arch/arm64/silicon-errata.rst
>> +++ b/Documentation/arch/arm64/silicon-errata.rst
>> @@ -298,6 +298,8 @@ stable kernels.
>>  +----------------+-----------------+-----------------+-----------------------------+
>>  | NVIDIA         | Carmel Core     | N/A             | NVIDIA_CARMEL_CNP_ERRATUM   |
>>  +----------------+-----------------+-----------------+-----------------------------+
>> +| NVIDIA         | Olympus core    | T410-OLY-1027   | NVIDIA_OLYMPUS_1027_ERRATUM |
>> ++----------------+-----------------+-----------------+-----------------------------+
>>  | NVIDIA         | Olympus core    | T410-OLY-1029   | ARM64_ERRATUM_4118414       |
>>  +----------------+-----------------+-----------------+-----------------------------+
>>  | NVIDIA         | T241 GICv3/4.x  | T241-FABRIC-4   | N/A                         |
>> diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
>> index c65cef81be86a..d633eb70de1ac 100644
>> --- a/arch/arm64/Kconfig
>> +++ b/arch/arm64/Kconfig
>> @@ -564,6 +564,29 @@ config ARM64_ERRATUM_832075
>>  
>>  	  If unsure, say Y.
>>  
>> +config NVIDIA_OLYMPUS_1027_ERRATUM
>> +	bool "NVIDIA Olympus: device store/load ordering erratum"
>> +	default y
>> +	help
>> +	  This option adds an alternative code sequence to work around an
>> +	  NVIDIA Olympus core erratum where a Device-nGnR* store can be
>> +	  observed by a peripheral after a younger Device-nGnR* load to the
>> +	  same peripheral. This breaks the program order that drivers rely
>> +	  on for MMIO and can leave a device in an incorrect state.
>> +
>> +	  The workaround promotes the raw MMIO store helpers
>> +	  (__raw_writeb/w/l/q) to Store-Release (STLR), which restores the
>> +	  required ordering. Because writel() and writel_relaxed() are built
>> +	  on __raw_writel(), both are covered without changes to the higher
>> +	  layers.
>> +
>> +	  The fix is applied through the alternatives framework, so enabling
>> +	  this option does not by itself activate the workaround: it is
>> +	  patched in only when an affected CPU is detected, and is a no-op on
>> +	  unaffected CPUs.
>> +
>> +	  If unsure, say Y.
>> +
>>  config ARM64_ERRATUM_834220
>>  	bool "Cortex-A57: 834220: Stage 2 translation fault might be incorrectly reported in presence of a Stage 1 fault (rare)"
>>  	depends on KVM
>> diff --git a/arch/arm64/include/asm/io.h b/arch/arm64/include/asm/io.h
>> index 8cbd1e96fd50b..801223e754c90 100644
>> --- a/arch/arm64/include/asm/io.h
>> +++ b/arch/arm64/include/asm/io.h
>> @@ -22,10 +22,22 @@
>>  /*
>>   * Generic IO read/write.  These perform native-endian accesses.
>>   */
>> +static __always_inline bool arm64_needs_device_store_release(void)
>> +{
>> +	return alternative_has_cap_unlikely(
>> +				ARM64_WORKAROUND_DEVICE_STORE_RELEASE);
>> +}
>> +
>>  #define __raw_writeb __raw_writeb
>>  static __always_inline void __raw_writeb(u8 val, volatile void __iomem *addr)
>>  {
>>  	volatile u8 __iomem *ptr = addr;
>> +
>> +	if (arm64_needs_device_store_release()) {
>> +		asm volatile("stlrb %w0, [%1]" : : "rZ" (val), "r" (addr));
>> +		return;
>> +	}
>> +
>>  	asm volatile("strb %w0, %1" : : "rZ" (val), "Qo" (*ptr));
>>  }
> Use an 'else' clause instead of the early return? (similarly for the other
> changes).

Perhaps I'm missing something, but it is not clear to me why all that
complexity is required.

IIUC, benefits coming with d044d6ba6f02 ("arm64: io: permit offset
addressing") are from better code generation, so we:
 - save code
 - open opportunity for  write-combining

d044d6ba6f02 ("arm64: io: permit offset addressing") comes with simple
benchmark to measure effect of code generation:

| void writeq_zero_8_times(void *ptr)
| {
|        writeq_relaxed(0, ptr + 8 * 0);
|        writeq_relaxed(0, ptr + 8 * 1);
|        writeq_relaxed(0, ptr + 8 * 2);
|        writeq_relaxed(0, ptr + 8 * 3);
|        writeq_relaxed(0, ptr + 8 * 4);
|        writeq_relaxed(0, ptr + 8 * 5);
|        writeq_relaxed(0, ptr + 8 * 6);
|        writeq_relaxed(0, ptr + 8 * 7);
| }

which compiles to

| <writeq_zero_8_times>:
|        str     xzr, [x0]
|        str     xzr, [x0, #8]
|        str     xzr, [x0, #16]
|        str     xzr, [x0, #24]
|        str     xzr, [x0, #32]
|        str     xzr, [x0, #40]
|        str     xzr, [x0, #48]
|        str     xzr, [x0, #56]


v1/v2 compiles to

| <writeq_zero_8_times>:
|        str     xzr, [x0]
|        add     x1, x0, #0x8
|        str     xzr, [x1]
|        add     x1, x0, #0x10
|        str     xzr, [x1]
|        add     x1, x0, #0x18
|        str     xzr, [x1]
|        add     x1, x0, #0x20
|        str     xzr, [x1]
|        add     x1, x0, #0x28
|        str     xzr, [x1]
|        add     x1, x0, #0x30
|        str     xzr, [x1]
|        add     x0, x0, #0x38
|        str     xzr, [x0]

were alternatives are swapping str with stlr. In other words, we are
rolling back to the pre-d044d6ba6f02 implementation.

v3 compiles to:

| <writeq_zero_8_times>:
|        nop
|        str     xzr, [x0]
|        add     x1, x0, #0x8
|        nop
|        str     xzr, [x1]
|        add     x1, x0, #0x10
|        nop
|        str     xzr, [x1]
|        add     x1, x0, #0x18
|        nop
|        str     xzr, [x1]
|        add     x1, x0, #0x20
|        nop
|        str     xzr, [x1]
|        add     x1, x0, #0x28
|        nop
|        str     xzr, [x1]
|        add     x1, x0, #0x30
|        nop
|        str     xzr, [x1]
|        add     x0, x0, #0x38
|        nop
|        str     xzr, [x0]
|        ret

where static branch swapping nop with branch to stlr and back to add.

So it looks to me that we're losing an opportunity for write
combining, but in terms of code size, v1/v2 seems to be the lesser of
two evils.

Cheers
Vladimir

> 
> I still reckon you should do something with the memcpy-to-io routines.
> A simple option could be to make dgh() a dmb on parts with the erratum?
> That at least moves the barrier out of the loop.
> 
> Will
> 


^ permalink raw reply

* [PATCH v3] hwmon: add a driver for the temp/voltage sensor on PolarFire SoC
From: Conor Dooley @ 2026-06-11 15:00 UTC (permalink / raw)
  To: linux-hwmon
  Cc: conor, Lars Randers, Conor Dooley, Guenter Roeck, Jonathan Corbet,
	Shuah Khan, Daire McNamara, linux-doc, linux-kernel, linux-riscv,
	Valentina.FernandezAlanis

From: Lars Randers <lranders@mail.dk>

Add a driver for the temperature and voltage sensors on PolarFire SoC.
The temperature reports how hot the die is, and the voltages are the
SoC's 1.05, 1.8 and 2.5 volt rails respectively.

The hardware supports alarms in theory, but there is an erratum that
prevents clearing them once triggered, so no support is added for them.

The hardware measures voltage with 16 bits, of which 1 is a sign bit and
the remainder holds the voltage as a fixed point integer value. It's
improbable that the hardware will work if the voltages are negative, so
the driver ignores the sign bits.

There's no dt support etc here because this is the child of a simple-mfd
syscon.

Signed-off-by: Lars Randers <lranders@mail.dk>
Co-developed-by: Conor Dooley <conor.dooley@microchip.com>
Signed-off-by: Conor Dooley <conor.dooley@microchip.com>
---
v3:
- clamp interval values
- note that 0-8 ms is the range for update_interval
- return -ENODATA for invalid data
- do write bounds checking once
- comment on MMIO regmap return value checks

v2:
- Fix some minor things pointed out by Sashiko including inaccurate
  comments, bounds checking of values read from sysfs and Kconfig
  dependencies.
- Make update_interval use milliseconds instead of microseconds
  (I'll add update_interval_us support when that lands, there's a
  proposed workaround for the erratum circulating internally, so it'll
  probably come alongside alarm support).

CC: Guenter Roeck <linux@roeck-us.net>
CC: Jonathan Corbet <corbet@lwn.net>
CC: Shuah Khan <skhan@linuxfoundation.org>
CC: Conor Dooley <conor.dooley@microchip.com>
CC: Daire McNamara <daire.mcnamara@microchip.com>
CC: linux-hwmon@vger.kernel.org
CC: linux-doc@vger.kernel.org
CC: linux-kernel@vger.kernel.org
CC: linux-riscv@lists.infradead.org
CC: Valentina.FernandezAlanis@microchip.com
---
 Documentation/hwmon/index.rst    |   1 +
 Documentation/hwmon/tvs-mpfs.rst |  53 +++++
 MAINTAINERS                      |   1 +
 drivers/hwmon/Kconfig            |  13 ++
 drivers/hwmon/Makefile           |   1 +
 drivers/hwmon/tvs-mpfs.c         | 388 +++++++++++++++++++++++++++++++
 6 files changed, 457 insertions(+)
 create mode 100644 Documentation/hwmon/tvs-mpfs.rst
 create mode 100644 drivers/hwmon/tvs-mpfs.c

diff --git a/Documentation/hwmon/index.rst b/Documentation/hwmon/index.rst
index 8b655e5d6b68..84a5339e1d6f 100644
--- a/Documentation/hwmon/index.rst
+++ b/Documentation/hwmon/index.rst
@@ -262,6 +262,7 @@ Hardware Monitoring Kernel Drivers
    tps53679
    tps546d24
    tsc1641
+   tvs-mpfs
    twl4030-madc-hwmon
    ucd9000
    ucd9200
diff --git a/Documentation/hwmon/tvs-mpfs.rst b/Documentation/hwmon/tvs-mpfs.rst
new file mode 100644
index 000000000000..1035812f363a
--- /dev/null
+++ b/Documentation/hwmon/tvs-mpfs.rst
@@ -0,0 +1,53 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+Kernel driver tvs-mpfs
+======================
+
+Supported chips:
+
+  * PolarFire SoC
+
+Authors:
+
+   - Conor Dooley <conor.dooley@microchip.com>
+   - Lars Randers <lranders@mail.dk>
+
+Description
+-----------
+
+This driver implements support for the temperature and voltage sensors on
+PolarFire SoC. The temperature reports how hot the die is, and the voltages are
+the SoC's 1.05, 1.8 and 2.5 volt rails respectively.
+
+
+Usage Notes
+-----------
+
+update_interval has a permitted range of 0 to 8 milliseconds.
+
+Temperatures are read in millidegrees Celsius, but the hardware measures in
+degrees Kelvin, storing the result as 11.4 fixed point data, for a maximum
+value of 2047.9375 degrees Kelvin.
+
+Voltages are read in millivolts. The hardware measures in millivolts, storing
+the value as 12.3 fixed point data, for a maximum of 4095.875 millivolts.
+The minimum value reportable by the driver is 0 volts, although the hardware
+is capable of measuring negative values.
+
+Sysfs entries
+-------------
+
+The following attributes are supported. update_interval is read-write, as are
+the enables. All other attributes are read only.
+
+======================= ====================================================
+temp1_label		Fixed name for channel.
+temp1_input		Measured temperature for channel.
+temp1_enable		Enable/disable for channel.
+
+in[0-2]_label		Fixed name for channel.
+in[0-2]_input		Measured voltage for channel.
+in[0-2]_enable		Enable/disable for channel.
+
+update_interval		The interval at which the chip will update readings.
+======================= ====================================================
diff --git a/MAINTAINERS b/MAINTAINERS
index 2fb1c75afd16..a492cf5ad0fc 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -22938,6 +22938,7 @@ F:	drivers/char/hw_random/mpfs-rng.c
 F:	drivers/clk/microchip/clk-mpfs*.c
 F:	drivers/firmware/microchip/mpfs-auto-update.c
 F:	drivers/gpio/gpio-mpfs.c
+F:	drivers/hwmon/tvs-mpfs.c
 F:	drivers/i2c/busses/i2c-microchip-corei2c.c
 F:	drivers/mailbox/mailbox-mpfs.c
 F:	drivers/pci/controller/plda/pcie-microchip-host.c
diff --git a/drivers/hwmon/Kconfig b/drivers/hwmon/Kconfig
index 14e4cea48acc..2b9622b1db95 100644
--- a/drivers/hwmon/Kconfig
+++ b/drivers/hwmon/Kconfig
@@ -930,6 +930,19 @@ config SENSORS_JC42
 	  This driver can also be built as a module. If so, the module
 	  will be called jc42.
 
+config SENSORS_POLARFIRE_SOC_TVS
+	tristate "PolarFire SoC (MPFS) temperature and voltage sensor"
+	depends on POLARFIRE_SOC_SYSCONS || COMPILE_TEST
+	depends on MFD_SYSCON
+	help
+	  This driver adds support for the PolarFire SoC (MPFS) Temperature and
+	  Voltage Sensor.
+
+	  To compile this driver as a module, choose M here. the
+	  module will be called tvs-mpfs.
+
+	  If unsure, say N.
+
 config SENSORS_POWERZ
 	tristate "ChargerLAB POWER-Z USB-C tester"
 	depends on USB
diff --git a/drivers/hwmon/Makefile b/drivers/hwmon/Makefile
index 4788996aa137..b58d249e4cf4 100644
--- a/drivers/hwmon/Makefile
+++ b/drivers/hwmon/Makefile
@@ -194,6 +194,7 @@ obj-$(CONFIG_SENSORS_NZXT_SMART2) += nzxt-smart2.o
 obj-$(CONFIG_SENSORS_PC87360)	+= pc87360.o
 obj-$(CONFIG_SENSORS_PC87427)	+= pc87427.o
 obj-$(CONFIG_SENSORS_PCF8591)	+= pcf8591.o
+obj-$(CONFIG_SENSORS_POLARFIRE_SOC_TVS)  += tvs-mpfs.o
 obj-$(CONFIG_SENSORS_POWERZ)	+= powerz.o
 obj-$(CONFIG_SENSORS_POWR1220)  += powr1220.o
 obj-$(CONFIG_SENSORS_PT5161L)	+= pt5161l.o
diff --git a/drivers/hwmon/tvs-mpfs.c b/drivers/hwmon/tvs-mpfs.c
new file mode 100644
index 000000000000..48de8d98b344
--- /dev/null
+++ b/drivers/hwmon/tvs-mpfs.c
@@ -0,0 +1,388 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * Author: Lars Randers <lranders@mail.dk>
+ */
+
+#include <linux/bitfield.h>
+#include <linux/err.h>
+#include <linux/freezer.h>
+#include <linux/hwmon.h>
+#include <linux/io.h>
+#include <linux/kthread.h>
+#include <linux/mfd/syscon.h>
+#include <linux/minmax.h>
+#include <linux/module.h>
+#include <linux/of_address.h>
+#include <linux/platform_device.h>
+#include <linux/regmap.h>
+
+#define MPFS_TVS_CTRL 0x08
+#define MPFS_TVS_OUTPUT0 0x24
+#define MPFS_TVS_OUTPUT1 0x28
+
+#define MPFS_TVS_CTRL_TEMP_VALID	BIT(19)
+#define MPFS_TVS_CTRL_V2P5_VALID	BIT(18)
+#define MPFS_TVS_CTRL_V1P8_VALID	BIT(17)
+#define MPFS_TVS_CTRL_V1P05_VALID	BIT(16)
+
+#define MPFS_TVS_CTRL_TEMP_ENABLE	BIT(3)
+#define MPFS_TVS_CTRL_V2P5_ENABLE	BIT(2)
+#define MPFS_TVS_CTRL_V1P8_ENABLE	BIT(1)
+#define MPFS_TVS_CTRL_V1P05_ENABLE	BIT(0)
+#define MPFS_TVS_CTRL_ENABLE_ALL	GENMASK(3, 0)
+
+/*
+ * For all of these the value in millivolts is stored in 16 bits, with an upper
+ * sign bit and a lower 3 bits of decimal. These masks discard the sign bit and
+ * decimal places, because if Linux is running these voltages cannot be negative
+ * and so avoid having to convert to two's complement.
+ */
+#define MPFS_OUTPUT0_V1P8_MASK	GENMASK(30, 19)
+#define MPFS_OUTPUT0_V1P05_MASK	GENMASK(14, 3)
+#define MPFS_OUTPUT1_V2P5_MASK	GENMASK(14, 3)
+
+/*
+ * The register map claims that the temperature is stored in bits 31:16, but
+ * application note "AN4682: PolarFire FPGA Temperature and Voltage Sensor"
+ * says that 31 is reserved. Temperature is in kelvin, so what's probably a
+ * sign bit has no value anyway.
+ */
+#define MPFS_OUTPUT1_TEMP_MASK GENMASK(30, 16)
+
+#define MPFS_TVS_INTERVAL_MASK GENMASK(15, 8)
+#define MPFS_TVS_INTERVAL_OFFSET 8
+/* The interval register is in increments of 32 us */
+#define MPFS_TVS_INTERVAL_SCALE 32
+/* with 254 usable increments of 32 us available, 8 ms is the integer limit */
+#define MPFS_TVS_INTERVAL_MAX_MS 8U
+
+/* 273.1875 in 11.4 fixed-point notation */
+#define MPFS_TVS_K_TO_C 0x1113
+
+enum mpfs_tvs_sensors {
+	SENSOR_V1P05 = 0,
+	SENSOR_V1P8,
+	SENSOR_V2P5,
+};
+
+static const char * const mpfs_tvs_voltage_labels[] = { "1P05", "1P8", "2P5" };
+
+struct mpfs_tvs {
+	struct regmap *regmap;
+};
+
+static int mpfs_tvs_voltage_read(struct mpfs_tvs *data, u32 attr,
+				 int channel, long *val)
+{
+	u32 tmp, control;
+
+	if (attr != hwmon_in_input && attr != hwmon_in_enable)
+		return -EOPNOTSUPP;
+
+	regmap_read(data->regmap, MPFS_TVS_CTRL, &control);
+
+	switch (channel) {
+	case SENSOR_V2P5:
+		if (attr == hwmon_in_enable) {
+			*val = FIELD_GET(MPFS_TVS_CTRL_V2P5_ENABLE, control);
+			break;
+		}
+
+		if (!(control & MPFS_TVS_CTRL_V2P5_VALID))
+			return -ENODATA;
+
+		regmap_read(data->regmap, MPFS_TVS_OUTPUT1, &tmp);
+		*val = FIELD_GET(MPFS_OUTPUT1_V2P5_MASK, tmp);
+		break;
+	case SENSOR_V1P8:
+		if (attr == hwmon_in_enable) {
+			*val = FIELD_GET(MPFS_TVS_CTRL_V1P8_ENABLE, control);
+			break;
+		}
+
+		if (!(control & MPFS_TVS_CTRL_V1P8_VALID))
+			return -ENODATA;
+
+		regmap_read(data->regmap, MPFS_TVS_OUTPUT0, &tmp);
+		*val = FIELD_GET(MPFS_OUTPUT0_V1P8_MASK, tmp);
+		break;
+	case SENSOR_V1P05:
+		if (attr == hwmon_in_enable) {
+			*val = FIELD_GET(MPFS_TVS_CTRL_V1P05_ENABLE, control);
+			break;
+		}
+
+		if (!(control & MPFS_TVS_CTRL_V1P05_VALID))
+			return -ENODATA;
+
+		regmap_read(data->regmap, MPFS_TVS_OUTPUT0, &tmp);
+		*val = FIELD_GET(MPFS_OUTPUT0_V1P05_MASK, tmp);
+		break;
+	default:
+		return -EOPNOTSUPP;
+	}
+
+	return 0;
+}
+
+static int mpfs_tvs_voltage_write(struct mpfs_tvs *data, u32 attr,
+				  int channel, long val)
+{
+	u32 tmp;
+
+	if (attr != hwmon_in_enable)
+		return -EOPNOTSUPP;
+
+	if (val > 1 || val < 0)
+		return -EINVAL;
+
+	switch (channel) {
+	case SENSOR_V2P5:
+		tmp = FIELD_PREP(MPFS_TVS_CTRL_V2P5_ENABLE, val);
+		regmap_update_bits(data->regmap, MPFS_TVS_CTRL,
+				   MPFS_TVS_CTRL_V2P5_ENABLE, tmp);
+		break;
+	case SENSOR_V1P8:
+		tmp = FIELD_PREP(MPFS_TVS_CTRL_V1P8_ENABLE, val);
+		regmap_update_bits(data->regmap, MPFS_TVS_CTRL,
+				   MPFS_TVS_CTRL_V1P8_ENABLE, tmp);
+		break;
+	case SENSOR_V1P05:
+		tmp = FIELD_PREP(MPFS_TVS_CTRL_V1P05_ENABLE, val);
+		regmap_update_bits(data->regmap, MPFS_TVS_CTRL,
+				   MPFS_TVS_CTRL_V1P05_ENABLE, tmp);
+		break;
+	default:
+		return -EOPNOTSUPP;
+	}
+
+	return 0;
+}
+
+static int mpfs_tvs_temp_read(struct mpfs_tvs *data, u32 attr, long *val)
+{
+	u32 tmp, control;
+
+	if (attr != hwmon_temp_input && attr != hwmon_temp_enable)
+		return -EOPNOTSUPP;
+
+	regmap_read(data->regmap, MPFS_TVS_CTRL, &control);
+
+	if (attr == hwmon_temp_enable) {
+		*val = FIELD_GET(MPFS_TVS_CTRL_TEMP_ENABLE, control);
+		return 0;
+	}
+
+	if (!(control & MPFS_TVS_CTRL_TEMP_VALID))
+		return -ENODATA;
+
+	regmap_read(data->regmap, MPFS_TVS_OUTPUT1, &tmp);
+	*val = FIELD_GET(MPFS_OUTPUT1_TEMP_MASK, tmp);
+	*val -= MPFS_TVS_K_TO_C;
+	*val = (1000 * *val) >> 4; /* fixed point (11.4) to millidegrees */
+
+	return 0;
+}
+
+static int mpfs_tvs_temp_write(struct mpfs_tvs *data, u32 attr, long val)
+{
+	u32 tmp;
+
+	if (attr != hwmon_temp_enable)
+		return -EOPNOTSUPP;
+
+	if (val > 1 || val < 0)
+		return -EINVAL;
+
+	tmp = FIELD_PREP(MPFS_TVS_CTRL_TEMP_ENABLE, val);
+	regmap_update_bits(data->regmap, MPFS_TVS_CTRL,
+			   MPFS_TVS_CTRL_TEMP_ENABLE, tmp);
+
+	return 0;
+}
+
+static int mpfs_tvs_interval_read(struct mpfs_tvs *data, u32 attr, long *val)
+{
+	u32 tmp;
+
+	if (attr != hwmon_chip_update_interval)
+		return -EOPNOTSUPP;
+
+	regmap_read(data->regmap, MPFS_TVS_CTRL, &tmp);
+	*val = FIELD_GET(MPFS_TVS_INTERVAL_MASK, tmp);
+	*val *= MPFS_TVS_INTERVAL_SCALE;
+	*val /= 1000;
+
+	return 0;
+}
+
+static int mpfs_tvs_interval_write(struct mpfs_tvs *data, u32 attr, long val)
+{
+	unsigned long temp = val;
+
+	if (attr != hwmon_chip_update_interval)
+		return -EOPNOTSUPP;
+
+	temp = clamp(temp, 0U, MPFS_TVS_INTERVAL_MAX_MS);
+
+	temp *= 1000;
+	temp /= MPFS_TVS_INTERVAL_SCALE;
+
+	temp <<= MPFS_TVS_INTERVAL_OFFSET;
+	regmap_update_bits(data->regmap, MPFS_TVS_CTRL,
+			   MPFS_TVS_INTERVAL_MASK, temp);
+
+	return 0;
+}
+
+static umode_t mpfs_tvs_is_visible(const void *data,
+				   enum hwmon_sensor_types type,
+				   u32 attr, int channel)
+{
+	if (type == hwmon_chip && attr == hwmon_chip_update_interval)
+		return 0644;
+
+	if (type == hwmon_temp) {
+		switch (attr) {
+		case hwmon_temp_enable:
+			return 0644;
+		case hwmon_temp_input:
+		case hwmon_temp_label:
+			return 0444;
+		default:
+			return 0;
+		}
+	}
+
+	if (type == hwmon_in) {
+		switch (attr) {
+		case hwmon_in_enable:
+			return 0644;
+		case hwmon_in_input:
+		case hwmon_in_label:
+			return 0444;
+		default:
+			return 0;
+		}
+	}
+
+	return 0;
+}
+
+static int mpfs_tvs_read(struct device *dev, enum hwmon_sensor_types type,
+			 u32 attr, int channel, long *val)
+{
+	struct mpfs_tvs *data = dev_get_drvdata(dev);
+
+	switch (type) {
+	case hwmon_temp:
+		return mpfs_tvs_temp_read(data, attr, val);
+	case hwmon_in:
+		return mpfs_tvs_voltage_read(data, attr, channel, val);
+	case hwmon_chip:
+		return mpfs_tvs_interval_read(data, attr, val);
+	default:
+		return -EOPNOTSUPP;
+	}
+}
+
+static int mpfs_tvs_write(struct device *dev, enum hwmon_sensor_types type,
+			  u32 attr, int channel, long val)
+{
+	struct mpfs_tvs *data = dev_get_drvdata(dev);
+
+	switch (type) {
+	case hwmon_temp:
+		return mpfs_tvs_temp_write(data, attr, val);
+	case hwmon_in:
+		return mpfs_tvs_voltage_write(data, attr, channel, val);
+	case hwmon_chip:
+		return mpfs_tvs_interval_write(data, attr, val);
+	default:
+		return -EOPNOTSUPP;
+	}
+}
+
+static int mpfs_tvs_read_labels(struct device *dev,
+				enum hwmon_sensor_types type,
+				u32 attr, int channel,
+				const char **str)
+{
+	switch (type) {
+	case hwmon_temp:
+		*str = "Die Temp";
+		return 0;
+	case hwmon_in:
+		*str = mpfs_tvs_voltage_labels[channel];
+		return 0;
+	default:
+		return -EOPNOTSUPP;
+	}
+}
+
+static const struct hwmon_ops mpfs_tvs_ops = {
+	.is_visible = mpfs_tvs_is_visible,
+	.read_string = mpfs_tvs_read_labels,
+	.read = mpfs_tvs_read,
+	.write = mpfs_tvs_write,
+};
+
+static const struct hwmon_channel_info *mpfs_tvs_info[] = {
+	HWMON_CHANNEL_INFO(chip,
+			   HWMON_C_REGISTER_TZ | HWMON_C_UPDATE_INTERVAL),
+	HWMON_CHANNEL_INFO(temp,
+			   HWMON_T_INPUT | HWMON_T_LABEL | HWMON_T_ENABLE),
+	HWMON_CHANNEL_INFO(in,
+			   HWMON_I_INPUT | HWMON_I_LABEL | HWMON_I_ENABLE,
+			   HWMON_I_INPUT | HWMON_I_LABEL | HWMON_I_ENABLE,
+			   HWMON_I_INPUT | HWMON_I_LABEL | HWMON_I_ENABLE),
+	NULL
+};
+
+static const struct hwmon_chip_info mpfs_tvs_chip_info = {
+	.ops = &mpfs_tvs_ops,
+	.info = mpfs_tvs_info,
+};
+
+static int mpfs_tvs_probe(struct platform_device *pdev)
+{
+	struct device *hwmon_dev;
+	struct mpfs_tvs *data;
+
+	data = devm_kzalloc(&pdev->dev, sizeof(*data), GFP_KERNEL);
+	if (!data)
+		return -ENOMEM;
+
+	data->regmap = device_node_to_regmap(pdev->dev.parent->of_node);
+	if (IS_ERR(data->regmap))
+		return dev_err_probe(&pdev->dev, PTR_ERR(data->regmap),
+				     "Failed to find syscon regmap\n");
+
+	/*
+	 * It's an MMIO regmap with no resources, there's nothing that can fail
+	 * and return an error
+	 */
+	regmap_write(data->regmap, MPFS_TVS_CTRL, MPFS_TVS_CTRL_ENABLE_ALL);
+
+	hwmon_dev = devm_hwmon_device_register_with_info(&pdev->dev, "mpfs_tvs",
+							 data,
+							 &mpfs_tvs_chip_info,
+							 NULL);
+	if (IS_ERR(hwmon_dev))
+		return dev_err_probe(&pdev->dev, PTR_ERR(hwmon_dev),
+				     "hwmon device registration failed.\n");
+
+	return 0;
+}
+
+static struct platform_driver mpfs_tvs_driver = {
+	.probe = mpfs_tvs_probe,
+	.driver = {
+		.name = "mpfs-tvs",
+	},
+};
+module_platform_driver(mpfs_tvs_driver);
+
+MODULE_AUTHOR("Lars Randers <lranders@mail.dk>");
+MODULE_DESCRIPTION("PolarFire SoC temperature & voltage sensor driver");
+MODULE_LICENSE("GPL");
-- 
2.53.0


^ permalink raw reply related

* Re: [PATCH v3 04/12] x86,fs/resctrl: Program PLZA through kmode arch hooks
From: Babu Moger @ 2026-06-11 14:46 UTC (permalink / raw)
  To: Peter Newman, Moger, Babu
  Cc: Luck, Tony, corbet, reinette.chatre, Dave.Martin, james.morse,
	tglx, bp, dave.hansen, skhan, x86, mingo, hpa, akpm, rdunlap,
	pawan.kumar.gupta, feng.tang, dapeng1.mi, kees, elver, lirongqing,
	paulmck, bhelgaas, seanjc, alexandre.chartre, yazen.ghannam,
	peterz, chang.seok.bae, kim.phillips, xin, naveen,
	thomas.lendacky, linux-doc, linux-kernel, eranian,
	sos-linux-ext-patches
In-Reply-To: <CALPaoCj=UTSvk42n3+OY8LZ_mrEmDpsNXJ53oJd5t+9QVXA6Uw@mail.gmail.com>

Hi Peter,


On 6/11/26 06:44, Peter Newman wrote:
> Hi Babu,
> 
> On Thu, May 21, 2026 at 1:09 AM Moger, Babu <bmoger@amd.com> wrote:
>>
>> Hi Tony,
>>
>> On 5/20/2026 5:16 PM, Luck, Tony wrote:
>>> On Wed, May 20, 2026 at 12:49:25PM -0500, Babu Moger wrote:
>>>> Hi Tony,
>>>>
>>>>
>>>> On 5/19/26 15:59, Luck, Tony wrote:
>>>>> On Thu, Apr 30, 2026 at 06:24:49PM -0500, Babu Moger wrote:
>>>>>> +void resctrl_arch_configure_kmode(cpumask_var_t cpu_mask, u32 closid, u32 rmid, bool enable)
>>>>>> +{
>>>>>> +  union msr_pqr_plza_assoc plza = { 0 };
>>>>>> +
>>>>>> +  plza.split.rmid = rmid;
>>>>>> +  plza.split.rmid_en = 1;
>>>>>
>>>>> Shouldn't there be a parameter for the value of rmid_en?
>>>>
>>>>
>>>> I realized that behavior is not required—it was actually due to a mistake in
>>>> my v2 series implementation.
> 
> Really? This is in fact the only behavior we wanted:
> 
> https://lore.kernel.org/lkml/CABPqkBSq=cgn-am4qorA_VN0vsbpbfDePSi7gubicpROB1=djw@mail.gmail.com/

I have responded to similar comment already.

https://lore.kernel.org/lkml/1d7c79bf-1e40-4db7-8f66-45f234b6d87e@amd.com/

You are right—we should not set rmid_en = 1 in all cases.

For the "inherit_mon" mode, rmid_en will be 0, so the monitoring counts 
will remain unaffected. This represents the generic use case.

For the "assign_mon" mode, rmid_en will be 1. In this case, the kernel 
monitoring counts will be separate from the user’s.

So, we have both the options. I hope this addresses your concerns.

Thanks

Babu

^ permalink raw reply

* Re: [PATCH v5 09/10] dt-bindings: firmware: add arm,ras-cper
From: Ahmed Tiba @ 2026-06-11 14:22 UTC (permalink / raw)
  To: Jonathan Cameron
  Cc: will, xueshuai, saket.dumbre, mchehab, dave, djbw, bp, tony.luck,
	guohanjun, lenb, skhan, vishal.l.verma, rafael, corbet, ira.weiny,
	dave.jiang, krzk+dt, robh, catalin.marinas, alison.schofield,
	conor+dt, linux-arm-kernel, Michael.Zhao2, linux-doc,
	linux-kernel, linux-cxl, Dmitry.Lamerov, devicetree, linux-acpi,
	linux-edac, acpica-devel
In-Reply-To: <20260529174407.7081ad0b@jic23-huawei>

On 29/05/2026 17:44, Jonathan Cameron wrote:
> On Fri, 29 May 2026 10:50:49 +0100
> Ahmed Tiba<ahmed.tiba@arm.com> wrote:
>>   .../devicetree/bindings/firmware/arm,ras-cper.yaml | 54 ++++++++++++++++++++++
>>   MAINTAINERS                                        |  5 ++
>>   2 files changed, 59 insertions(+)
>>
>> diff --git a/Documentation/devicetree/bindings/firmware/arm,ras-cper.yaml b/Documentation/devicetree/bindings/firmware/arm,ras-cper.yaml
>> new file mode 100644
>> index 000000000000..3d4de096093f
>> --- /dev/null
>> +++ b/Documentation/devicetree/bindings/firmware/arm,ras-cper.yaml
>> @@ -0,0 +1,54 @@
>> +# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
>> +%YAML 1.2
>> +---
>> +$id:http://devicetree.org/schemas/firmware/arm,ras-cper.yaml#
>> +$schema:http://devicetree.org/meta-schemas/core.yaml#
>> +
>> +title: Arm RAS CPER provider
>> +
>> +maintainers:
>> +  - Ahmed Tiba<ahmed.tiba@arm.com>
>> +
>> +description:
>> +  Arm Reliability, Availability and Serviceability (RAS) firmware can expose
>> +  a firmware-first CPER error source directly via DeviceTree. Firmware
>> +  provides the CPER Generic Error Status block and notifies the OS through
>> +  an interrupt.
> I'd like some spec references in here if possible.
I can add a reference to the UEFI CPER specification for the Generic
Error Status record format.

For the firmware-first DT description itself I do not have a more 
specific public reference to cite.

>> +
>> +properties:
>> +  compatible:
>> +    const: arm,ras-cper
>> +
>> +  memory-region:
>> +    minItems: 1
>> +    items:
>> +      - description:
>> +          CPER Generic Error Status block exposed by firmware.
>> +      - description:
>> +          Optional firmware-owned ack buffer used on platforms
>> +          where firmware needs an explicit "ack" handshake before overwriting
>> +          the CPER buffer. Firmware watches bit 0 and expects the OS to set it
>> +          once the current status block has been consumed.
> Does the arm spec really make this optional?  Can we constraint it to not be
> just to make our lives easier?  I've never been sure how you would actually
> make a working platform without the ack support.
I will update the binding to require both memory-region entries.

Best regards,
Ahmed




^ permalink raw reply

* Re: [PATCH v3] arm64: errata: Workaround NVIDIA Olympus device store/load ordering erratum
From: Shanker Donthineni @ 2026-06-11 14:08 UTC (permalink / raw)
  To: Will Deacon
  Cc: Catalin Marinas, Vladimir Murzin, Jason Gunthorpe,
	linux-arm-kernel, Mark Rutland, linux-kernel, linux-doc,
	Vikram Sethi, Jason Sequeira
In-Reply-To: <aiq5VigmtZq9GlAm@willie-the-truck>

Hi Will,

On 6/11/2026 8:34 AM, Will Deacon wrote:
> External email: Use caution opening links or attachments
>
>
> On Wed, Jun 10, 2026 at 11:48:22AM -0500, Shanker Donthineni wrote:
>> On systems with NVIDIA Olympus cores, a Device-nGnR* load can be
>> observed by a peripheral before an older, non-overlapping Device-nGnR*
>> store to the same peripheral. This breaks the program-order guarantee
>> that software expects for Device-nGnR* accesses and can leave a
>> peripheral in an incorrect state, as a load is observed before an
>> earlier store takes effect.
>>
>> The erratum can occur only when all of the following apply:
>>
>>    - A PE executes a Device-nGnR* store followed by a younger
>>      Device-nGnR* load.
>>    - The store is not a store-release.
>>    - The accesses target the same peripheral and do not overlap in bytes.
>>    - There is at most one intervening Device-nGnR* store in program
>>      order, and there are no intervening Device-nGnR* loads.
>>    - There is no DSB, and no DMB that orders loads, between the store and
>>      the load.
>>    - Specific micro-architectural and timing conditions occur.
>>
>> Promote the raw MMIO store helpers (__raw_writeb/w/l/q) from plain str*
>> to stlr* (Store-Release), which removes the "store is not a
>> store-release" condition for every device write the kernel issues.
>> Because writel() and writel_relaxed() are both built on __raw_writel()
>> in asm-generic/io.h, patching the raw variants covers both the
>> non-relaxed and relaxed APIs without touching the higher layers. Note
>> that writel()'s own barrier sits before the store, so it does not order
>> the store against a subsequent readl(); the store-release promotion is
>> what provides that ordering.
>>
>> Like ARM64_ERRATUM_832075 on the load side, the change is gated on a new
>> ARM64_WORKAROUND_DEVICE_STORE_RELEASE capability and only activated on
>> parts that match MIDR_NVIDIA_OLYMPUS, so unaffected CPUs continue to use
>> the plain str* sequence.
>>
>> Note: stlr* only supports base-register addressing, so affected CPUs use
>> a base-register stlr* path. Unaffected CPUs keep the original
>> offset-addressed str* sequence introduced by commit d044d6ba6f02
>> ("arm64: io: permit offset addressing").
>>
>> The __const_memcpy_toio_aligned32() and __const_memcpy_toio_aligned64()
>> helpers are left unchanged. These helpers are intended for
>> write-combining mappings, which are Normal-NC on arm64. Replacing their
>> contiguous str* groups would defeat the write-combining behavior used to
>> improve store performance.
>>
>> Co-developed-by: Vikram Sethi <vsethi@nvidia.com>
>> Signed-off-by: Vikram Sethi <vsethi@nvidia.com>
>> Signed-off-by: Shanker Donthineni <sdonthineni@nvidia.com>
>> Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
>> ---
>> Changes since v2:
>>    - Reworked the raw MMIO write helpers so unaffected CPUs keep the
>>      existing offset-addressed STR sequence, while affected CPUs use the
>>      base-register STLR path.
>>    - Updated the commit message to match the code changes.
>>    - Rebased on top of the arm64 for-next/errata branch:
>>      https://git.kernel.org/pub/scm/linux/kernel/git/arm64/linux.git/log/?h=for-next/errata
>>
>> Changes since v1:
>>    - Updated the commit message based on feedback from Vladimir Murzin.
>>
>>   Documentation/arch/arm64/silicon-errata.rst |  2 ++
>>   arch/arm64/Kconfig                          | 23 ++++++++++++++++
>>   arch/arm64/include/asm/io.h                 | 30 +++++++++++++++++++++
>>   arch/arm64/kernel/cpu_errata.c              |  8 ++++++
>>   arch/arm64/tools/cpucaps                    |  1 +
>>   5 files changed, 64 insertions(+)
>>
>> diff --git a/Documentation/arch/arm64/silicon-errata.rst b/Documentation/arch/arm64/silicon-errata.rst
>> index ad09bbb10da80..fc45125dc2f80 100644
>> --- a/Documentation/arch/arm64/silicon-errata.rst
>> +++ b/Documentation/arch/arm64/silicon-errata.rst
>> @@ -298,6 +298,8 @@ stable kernels.
>>   +----------------+-----------------+-----------------+-----------------------------+
>>   | NVIDIA         | Carmel Core     | N/A             | NVIDIA_CARMEL_CNP_ERRATUM   |
>>   +----------------+-----------------+-----------------+-----------------------------+
>> +| NVIDIA         | Olympus core    | T410-OLY-1027   | NVIDIA_OLYMPUS_1027_ERRATUM |
>> ++----------------+-----------------+-----------------+-----------------------------+
>>   | NVIDIA         | Olympus core    | T410-OLY-1029   | ARM64_ERRATUM_4118414       |
>>   +----------------+-----------------+-----------------+-----------------------------+
>>   | NVIDIA         | T241 GICv3/4.x  | T241-FABRIC-4   | N/A                         |
>> diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
>> index c65cef81be86a..d633eb70de1ac 100644
>> --- a/arch/arm64/Kconfig
>> +++ b/arch/arm64/Kconfig
>> @@ -564,6 +564,29 @@ config ARM64_ERRATUM_832075
>>
>>          If unsure, say Y.
>>
>> +config NVIDIA_OLYMPUS_1027_ERRATUM
>> +     bool "NVIDIA Olympus: device store/load ordering erratum"
>> +     default y
>> +     help
>> +       This option adds an alternative code sequence to work around an
>> +       NVIDIA Olympus core erratum where a Device-nGnR* store can be
>> +       observed by a peripheral after a younger Device-nGnR* load to the
>> +       same peripheral. This breaks the program order that drivers rely
>> +       on for MMIO and can leave a device in an incorrect state.
>> +
>> +       The workaround promotes the raw MMIO store helpers
>> +       (__raw_writeb/w/l/q) to Store-Release (STLR), which restores the
>> +       required ordering. Because writel() and writel_relaxed() are built
>> +       on __raw_writel(), both are covered without changes to the higher
>> +       layers.
>> +
>> +       The fix is applied through the alternatives framework, so enabling
>> +       this option does not by itself activate the workaround: it is
>> +       patched in only when an affected CPU is detected, and is a no-op on
>> +       unaffected CPUs.
>> +
>> +       If unsure, say Y.
>> +
>>   config ARM64_ERRATUM_834220
>>        bool "Cortex-A57: 834220: Stage 2 translation fault might be incorrectly reported in presence of a Stage 1 fault (rare)"
>>        depends on KVM
>> diff --git a/arch/arm64/include/asm/io.h b/arch/arm64/include/asm/io.h
>> index 8cbd1e96fd50b..801223e754c90 100644
>> --- a/arch/arm64/include/asm/io.h
>> +++ b/arch/arm64/include/asm/io.h
>> @@ -22,10 +22,22 @@
>>   /*
>>    * Generic IO read/write.  These perform native-endian accesses.
>>    */
>> +static __always_inline bool arm64_needs_device_store_release(void)
>> +{
>> +     return alternative_has_cap_unlikely(
>> +                             ARM64_WORKAROUND_DEVICE_STORE_RELEASE);
>> +}
>> +
>>   #define __raw_writeb __raw_writeb
>>   static __always_inline void __raw_writeb(u8 val, volatile void __iomem *addr)
>>   {
>>        volatile u8 __iomem *ptr = addr;
>> +
>> +     if (arm64_needs_device_store_release()) {
>> +             asm volatile("stlrb %w0, [%1]" : : "rZ" (val), "r" (addr));
>> +             return;
>> +     }
>> +
>>        asm volatile("strb %w0, %1" : : "rZ" (val), "Qo" (*ptr));
>>   }
> Use an 'else' clause instead of the early return? (similarly for the other
> changes).

I agree. I’ll rework the raw write helpers to use an explicit if/else
form instead of returning early from the STLR path.

>
> I still reckon you should do something with the memcpy-to-io routines.
> A simple option could be to make dgh() a dmb on parts with the erratum?
> That at least moves the barrier out of the loop.

For the memcpy-to-IO routines, would it be acceptable to address the erratum by
patching dgh() to a DMB OSH on affected CPUs, as shown below? I’ll also sync
with the Olympus CPU hardware team to confirm this approach for the v4 patch.

#define dgh()		asm volatile(ALTERNATIVE("hint #6", "dmb osh",	\
					ARM64_WORKAROUND_DEVICE_STORE_RELEASE) \
				     : : : "memory")

This keeps the existing memcpy-to-IO store sequences unchanged while placing the
ordering barrier outside the copy loop as you suggested.

-Shanker


^ permalink raw reply


This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox