public inbox for linux-kernel@vger.kernel.org
 help / color / mirror / Atom feed
* [PATCH v2 0/4] KCOV fixes
@ 2024-06-11  7:50 Dmitry Vyukov
  2024-06-11  7:50 ` [PATCH v2 1/4] x86/entry: Remove unwanted instrumentation in common_interrupt() Dmitry Vyukov
                   ` (4 more replies)
  0 siblings, 5 replies; 26+ messages in thread
From: Dmitry Vyukov @ 2024-06-11  7:50 UTC (permalink / raw)
  To: tglx, mingo, bp, dave.hansen, x86
  Cc: linux-kernel, syzkaller, elver, glider, nogikh, tarasmadan,
	Dmitry Vyukov

Fix spurious KCOV coverage from interrupts and add a test.
Ignore some additional files that lead to large amounts
of uninteresting coverage.
As a reference point, tracing a simple open system call
produces ~10K PCs with these changes instead of ~45K PCs.

Dmitry Vyukov (4):
  x86/entry: Remove unwanted instrumentation in common_interrupt()
  kcov: add interrupt handling self test
  module: Fix KCOV-ignored file name
  x86: Ignore stack unwinding in KCOV

 arch/x86/include/asm/hardirq.h  |  8 ++++++--
 arch/x86/include/asm/idtentry.h |  6 +++---
 arch/x86/kernel/Makefile        |  8 ++++++++
 kernel/kcov.c                   | 31 +++++++++++++++++++++++++++++++
 kernel/module/Makefile          |  2 +-
 lib/Kconfig.debug               |  8 ++++++++
 6 files changed, 57 insertions(+), 6 deletions(-)


base-commit: 83a7eefedc9b56fe7bfeff13b6c7356688ffa670
-- 
2.45.2.505.gda0bf45e8d-goog


^ permalink raw reply	[flat|nested] 26+ messages in thread

* [PATCH v2 1/4] x86/entry: Remove unwanted instrumentation in common_interrupt()
  2024-06-11  7:50 [PATCH v2 0/4] KCOV fixes Dmitry Vyukov
@ 2024-06-11  7:50 ` Dmitry Vyukov
  2024-06-19 11:19   ` Peter Zijlstra
  2024-08-08 15:49   ` [tip: x86/build] " tip-bot2 for Dmitry Vyukov
  2024-06-11  7:50 ` [PATCH v2 2/4] kcov: add interrupt handling self test Dmitry Vyukov
                   ` (3 subsequent siblings)
  4 siblings, 2 replies; 26+ messages in thread
From: Dmitry Vyukov @ 2024-06-11  7:50 UTC (permalink / raw)
  To: tglx, mingo, bp, dave.hansen, x86
  Cc: linux-kernel, syzkaller, elver, glider, nogikh, tarasmadan,
	Dmitry Vyukov

common_interrupt() and friends call kvm_set_cpu_l1tf_flush_l1d(),
which is not marked as noinstr nor __always_inline.
So compiler outlines it and adds instrumentation to it.
Since the call is inside of instrumentation_begin/end(),
objtool does not warn about it.

The manifestation is that KCOV produces spurious coverage
in kvm_set_cpu_l1tf_flush_l1d() in random places because
the call happens when preempt count is not yet updated
to say that we are in an interrupt.

Mark kvm_set_cpu_l1tf_flush_l1d() as __always_inline and move
out of instrumentation_begin/end() section.
It only calls __this_cpu_write() which is already safe to call
in noinstr contexts.

Signed-off-by: Dmitry Vyukov <dvyukov@google.com>
Reviewed-by: Alexander Potapenko <glider@google.com>
Fixes: 6368558c3710 ("x86/entry: Provide IDTENTRY_SYSVEC")
Cc: x86@kernel.org
Cc: linux-kernel@vger.kernel.org
Cc: syzkaller@googlegroups.com
---
 arch/x86/include/asm/hardirq.h  | 8 ++++++--
 arch/x86/include/asm/idtentry.h | 6 +++---
 2 files changed, 9 insertions(+), 5 deletions(-)

diff --git a/arch/x86/include/asm/hardirq.h b/arch/x86/include/asm/hardirq.h
index c67fa6ad098a..6ffa8b75f4cd 100644
--- a/arch/x86/include/asm/hardirq.h
+++ b/arch/x86/include/asm/hardirq.h
@@ -69,7 +69,11 @@ extern u64 arch_irq_stat(void);
 #define local_softirq_pending_ref       pcpu_hot.softirq_pending
 
 #if IS_ENABLED(CONFIG_KVM_INTEL)
-static inline void kvm_set_cpu_l1tf_flush_l1d(void)
+/*
+ * This function is called from noinstr interrupt contexts
+ * and must be inlined to not get instrumentation.
+ */
+static __always_inline void kvm_set_cpu_l1tf_flush_l1d(void)
 {
 	__this_cpu_write(irq_stat.kvm_cpu_l1tf_flush_l1d, 1);
 }
@@ -84,7 +88,7 @@ static __always_inline bool kvm_get_cpu_l1tf_flush_l1d(void)
 	return __this_cpu_read(irq_stat.kvm_cpu_l1tf_flush_l1d);
 }
 #else /* !IS_ENABLED(CONFIG_KVM_INTEL) */
-static inline void kvm_set_cpu_l1tf_flush_l1d(void) { }
+static __always_inline void kvm_set_cpu_l1tf_flush_l1d(void) { }
 #endif /* IS_ENABLED(CONFIG_KVM_INTEL) */
 
 #endif /* _ASM_X86_HARDIRQ_H */
diff --git a/arch/x86/include/asm/idtentry.h b/arch/x86/include/asm/idtentry.h
index d4f24499b256..ad5c68f0509d 100644
--- a/arch/x86/include/asm/idtentry.h
+++ b/arch/x86/include/asm/idtentry.h
@@ -212,8 +212,8 @@ __visible noinstr void func(struct pt_regs *regs,			\
 	irqentry_state_t state = irqentry_enter(regs);			\
 	u32 vector = (u32)(u8)error_code;				\
 									\
+	kvm_set_cpu_l1tf_flush_l1d();                                   \
 	instrumentation_begin();					\
-	kvm_set_cpu_l1tf_flush_l1d();					\
 	run_irq_on_irqstack_cond(__##func, regs, vector);		\
 	instrumentation_end();						\
 	irqentry_exit(regs, state);					\
@@ -250,7 +250,6 @@ static void __##func(struct pt_regs *regs);				\
 									\
 static __always_inline void instr_##func(struct pt_regs *regs)		\
 {									\
-	kvm_set_cpu_l1tf_flush_l1d();					\
 	run_sysvec_on_irqstack_cond(__##func, regs);			\
 }									\
 									\
@@ -258,6 +257,7 @@ __visible noinstr void func(struct pt_regs *regs)			\
 {									\
 	irqentry_state_t state = irqentry_enter(regs);			\
 									\
+	kvm_set_cpu_l1tf_flush_l1d();                                   \
 	instrumentation_begin();					\
 	instr_##func (regs);						\
 	instrumentation_end();						\
@@ -288,7 +288,6 @@ static __always_inline void __##func(struct pt_regs *regs);		\
 static __always_inline void instr_##func(struct pt_regs *regs)		\
 {									\
 	__irq_enter_raw();						\
-	kvm_set_cpu_l1tf_flush_l1d();					\
 	__##func (regs);						\
 	__irq_exit_raw();						\
 }									\
@@ -297,6 +296,7 @@ __visible noinstr void func(struct pt_regs *regs)			\
 {									\
 	irqentry_state_t state = irqentry_enter(regs);			\
 									\
+	kvm_set_cpu_l1tf_flush_l1d();                                   \
 	instrumentation_begin();					\
 	instr_##func (regs);						\
 	instrumentation_end();						\
-- 
2.45.2.505.gda0bf45e8d-goog


^ permalink raw reply related	[flat|nested] 26+ messages in thread

* [PATCH v2 2/4] kcov: add interrupt handling self test
  2024-06-11  7:50 [PATCH v2 0/4] KCOV fixes Dmitry Vyukov
  2024-06-11  7:50 ` [PATCH v2 1/4] x86/entry: Remove unwanted instrumentation in common_interrupt() Dmitry Vyukov
@ 2024-06-11  7:50 ` Dmitry Vyukov
  2024-06-11  9:29   ` Marco Elver
                     ` (3 more replies)
  2024-06-11  7:50 ` [PATCH v2 3/4] module: Fix KCOV-ignored file name Dmitry Vyukov
                   ` (2 subsequent siblings)
  4 siblings, 4 replies; 26+ messages in thread
From: Dmitry Vyukov @ 2024-06-11  7:50 UTC (permalink / raw)
  To: tglx, mingo, bp, dave.hansen, x86
  Cc: linux-kernel, syzkaller, elver, glider, nogikh, tarasmadan,
	Dmitry Vyukov

Add a boot self test that can catch sprious coverage from interrupts.
The coverage callback filters out interrupt code, but only after the
handler updates preempt count. Some code periodically leaks out
of that section and leads to spurious coverage.
Add a best-effort (but simple) test that is likely to catch such bugs.
If the test is enabled on CI systems that use KCOV, they should catch
any issues fast.

Signed-off-by: Dmitry Vyukov <dvyukov@google.com>
Reviewed-by: Alexander Potapenko <glider@google.com>
Cc: x86@kernel.org
Cc: linux-kernel@vger.kernel.org
Cc: syzkaller@googlegroups.com

---

Changed since v1:
 - renamed KCOV_TEST to KCOV_SELFTEST
 - improved the config description
 - loop for exactly 300ms in the test

In my local testing w/o the previous fix,
it immidiatly produced the following splat:

kcov: running selftest
BUG: TASK stack guard page was hit at ffffc90000147ff8
Oops: stack guard page: 0000 [#1] PREEMPT SMP KASAN PTI
...
 kvm_set_cpu_l1tf_flush_l1d+0x5/0x20
 sysvec_call_function+0x15/0xb0
 asm_sysvec_call_function+0x1a/0x20
 kcov_init+0xe4/0x130
 do_one_initcall+0xbc/0x470
 kernel_init_freeable+0x4fc/0x930
 kernel_init+0x1c/0x2b0
---
 kernel/kcov.c     | 31 +++++++++++++++++++++++++++++++
 lib/Kconfig.debug |  8 ++++++++
 2 files changed, 39 insertions(+)

diff --git a/kernel/kcov.c b/kernel/kcov.c
index c3124f6d5536..72a5bf55107f 100644
--- a/kernel/kcov.c
+++ b/kernel/kcov.c
@@ -11,6 +11,7 @@
 #include <linux/fs.h>
 #include <linux/hashtable.h>
 #include <linux/init.h>
+#include <linux/jiffies.h>
 #include <linux/kmsan-checks.h>
 #include <linux/mm.h>
 #include <linux/preempt.h>
@@ -1057,6 +1058,32 @@ u64 kcov_common_handle(void)
 }
 EXPORT_SYMBOL(kcov_common_handle);
 
+#ifdef CONFIG_KCOV_SELFTEST
+static void __init selftest(void)
+{
+	unsigned long start;
+
+	pr_err("running self test\n");
+	/*
+	 * Test that interrupts don't produce spurious coverage.
+	 * The coverage callback filters out interrupt code, but only
+	 * after the handler updates preempt count. Some code periodically
+	 * leaks out of that section and leads to spurious coverage.
+	 * It's hard to call the actual interrupt handler directly,
+	 * so we just loop here for a bit waiting for a timer interrupt.
+	 * We set kcov_mode to enable tracing, but don't setup the area,
+	 * so any attempt to trace will crash. Note: we must not call any
+	 * potentially traced functions in this region.
+	 */
+	start = jiffies;
+	current->kcov_mode = KCOV_MODE_TRACE_PC;
+	while ((jiffies - start) * MSEC_PER_SEC / HZ < 300)
+		;
+	current->kcov_mode = 0;
+	pr_err("done running self test\n");
+}
+#endif
+
 static int __init kcov_init(void)
 {
 	int cpu;
@@ -1076,6 +1103,10 @@ static int __init kcov_init(void)
 	 */
 	debugfs_create_file_unsafe("kcov", 0600, NULL, NULL, &kcov_fops);
 
+#ifdef CONFIG_KCOV_SELFTEST
+	selftest();
+#endif
+
 	return 0;
 }
 
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index 59b6765d86b8..695a437a52d9 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -2171,6 +2171,14 @@ config KCOV_IRQ_AREA_SIZE
 	  soft interrupts. This specifies the size of those areas in the
 	  number of unsigned long words.
 
+config KCOV_SELFTEST
+	bool "Perform short selftests on boot"
+	depends on KCOV
+	help
+	  Run short KCOV coverage collection selftests on boot.
+	  On test failure, causes the kernel to panic. Recommended to be
+	  enabled, ensuring critical functionality works as intended.
+
 menuconfig RUNTIME_TESTING_MENU
 	bool "Runtime Testing"
 	default y
-- 
2.45.2.505.gda0bf45e8d-goog


^ permalink raw reply related	[flat|nested] 26+ messages in thread

* [PATCH v2 3/4] module: Fix KCOV-ignored file name
  2024-06-11  7:50 [PATCH v2 0/4] KCOV fixes Dmitry Vyukov
  2024-06-11  7:50 ` [PATCH v2 1/4] x86/entry: Remove unwanted instrumentation in common_interrupt() Dmitry Vyukov
  2024-06-11  7:50 ` [PATCH v2 2/4] kcov: add interrupt handling self test Dmitry Vyukov
@ 2024-06-11  7:50 ` Dmitry Vyukov
  2024-06-11  9:29   ` Marco Elver
                     ` (2 more replies)
  2024-06-11  7:50 ` [PATCH v2 4/4] x86: Ignore stack unwinding in KCOV Dmitry Vyukov
  2024-06-11  9:31 ` [PATCH v2 0/4] KCOV fixes Dmitry Vyukov
  4 siblings, 3 replies; 26+ messages in thread
From: Dmitry Vyukov @ 2024-06-11  7:50 UTC (permalink / raw)
  To: tglx, mingo, bp, dave.hansen, x86
  Cc: linux-kernel, syzkaller, elver, glider, nogikh, tarasmadan,
	Dmitry Vyukov, Aaron Tomlin

Module.c was renamed to main.c, but the Makefile directive
was copy-pasted verbatim with the old file name.
Fix up the file name.

Signed-off-by: Dmitry Vyukov <dvyukov@google.com>
Reviewed-by: Alexander Potapenko <glider@google.com>
Fixes: cfc1d277891e ("module: Move all into module/")
Cc: Aaron Tomlin <atomlin@redhat.com>
Cc: x86@kernel.org
Cc: linux-kernel@vger.kernel.org
Cc: syzkaller@googlegroups.com
---
 kernel/module/Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kernel/module/Makefile b/kernel/module/Makefile
index a10b2b9a6fdf..50ffcc413b54 100644
--- a/kernel/module/Makefile
+++ b/kernel/module/Makefile
@@ -5,7 +5,7 @@
 
 # These are called from save_stack_trace() on slub debug path,
 # and produce insane amounts of uninteresting coverage.
-KCOV_INSTRUMENT_module.o := n
+KCOV_INSTRUMENT_main.o := n
 
 obj-y += main.o
 obj-y += strict_rwx.o
-- 
2.45.2.505.gda0bf45e8d-goog


^ permalink raw reply related	[flat|nested] 26+ messages in thread

* [PATCH v2 4/4] x86: Ignore stack unwinding in KCOV
  2024-06-11  7:50 [PATCH v2 0/4] KCOV fixes Dmitry Vyukov
                   ` (2 preceding siblings ...)
  2024-06-11  7:50 ` [PATCH v2 3/4] module: Fix KCOV-ignored file name Dmitry Vyukov
@ 2024-06-11  7:50 ` Dmitry Vyukov
  2024-06-13 22:51   ` Andrey Konovalov
                     ` (2 more replies)
  2024-06-11  9:31 ` [PATCH v2 0/4] KCOV fixes Dmitry Vyukov
  4 siblings, 3 replies; 26+ messages in thread
From: Dmitry Vyukov @ 2024-06-11  7:50 UTC (permalink / raw)
  To: tglx, mingo, bp, dave.hansen, x86
  Cc: linux-kernel, syzkaller, elver, glider, nogikh, tarasmadan,
	Dmitry Vyukov

Stack unwinding produces large amounts of uninteresting coverage.
It's called from KASAN kmalloc/kfree hooks, fault injection, etc.
It's not particularly useful and is not a function of system call args.
Ignore that code.

Signed-off-by: Dmitry Vyukov <dvyukov@google.com>
Reviewed-by: Alexander Potapenko <glider@google.com>
Reviewed-by: Marco Elver <elver@google.com>
Cc: x86@kernel.org
Cc: linux-kernel@vger.kernel.org
Cc: syzkaller@googlegroups.com
---
 arch/x86/kernel/Makefile | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index 20a0dd51700a..cd49ebfae984 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -39,6 +39,14 @@ KMSAN_SANITIZE_sev.o					:= n
 # first second.
 KCOV_INSTRUMENT_head$(BITS).o				:= n
 KCOV_INSTRUMENT_sev.o					:= n
+# These are called from save_stack_trace() on debug paths,
+# and produce large amounts of uninteresting coverage.
+KCOV_INSTRUMENT_stacktrace.o				:= n
+KCOV_INSTRUMENT_dumpstack.o				:= n
+KCOV_INSTRUMENT_dumpstack_$(BITS).o			:= n
+KCOV_INSTRUMENT_unwind_orc.o				:= n
+KCOV_INSTRUMENT_unwind_frame.o				:= n
+KCOV_INSTRUMENT_unwind_guess.o				:= n
 
 CFLAGS_irq.o := -I $(src)/../include/asm/trace
 
-- 
2.45.2.505.gda0bf45e8d-goog


^ permalink raw reply related	[flat|nested] 26+ messages in thread

* Re: [PATCH v2 2/4] kcov: add interrupt handling self test
  2024-06-11  7:50 ` [PATCH v2 2/4] kcov: add interrupt handling self test Dmitry Vyukov
@ 2024-06-11  9:29   ` Marco Elver
  2024-06-13 23:01   ` Andrey Konovalov
                     ` (2 subsequent siblings)
  3 siblings, 0 replies; 26+ messages in thread
From: Marco Elver @ 2024-06-11  9:29 UTC (permalink / raw)
  To: Dmitry Vyukov
  Cc: tglx, mingo, bp, dave.hansen, x86, linux-kernel, syzkaller,
	glider, nogikh, tarasmadan

On Tue, 11 Jun 2024 at 09:50, Dmitry Vyukov <dvyukov@google.com> wrote:
>
> Add a boot self test that can catch sprious coverage from interrupts.
> The coverage callback filters out interrupt code, but only after the
> handler updates preempt count. Some code periodically leaks out
> of that section and leads to spurious coverage.
> Add a best-effort (but simple) test that is likely to catch such bugs.
> If the test is enabled on CI systems that use KCOV, they should catch
> any issues fast.
>
> Signed-off-by: Dmitry Vyukov <dvyukov@google.com>
> Reviewed-by: Alexander Potapenko <glider@google.com>
> Cc: x86@kernel.org
> Cc: linux-kernel@vger.kernel.org
> Cc: syzkaller@googlegroups.com

Reviewed-by: Marco Elver <elver@google.com>

> ---
>
> Changed since v1:
>  - renamed KCOV_TEST to KCOV_SELFTEST
>  - improved the config description
>  - loop for exactly 300ms in the test
>
> In my local testing w/o the previous fix,
> it immidiatly produced the following splat:
>
> kcov: running selftest
> BUG: TASK stack guard page was hit at ffffc90000147ff8
> Oops: stack guard page: 0000 [#1] PREEMPT SMP KASAN PTI
> ...
>  kvm_set_cpu_l1tf_flush_l1d+0x5/0x20
>  sysvec_call_function+0x15/0xb0
>  asm_sysvec_call_function+0x1a/0x20
>  kcov_init+0xe4/0x130
>  do_one_initcall+0xbc/0x470
>  kernel_init_freeable+0x4fc/0x930
>  kernel_init+0x1c/0x2b0
> ---
>  kernel/kcov.c     | 31 +++++++++++++++++++++++++++++++
>  lib/Kconfig.debug |  8 ++++++++
>  2 files changed, 39 insertions(+)
>
> diff --git a/kernel/kcov.c b/kernel/kcov.c
> index c3124f6d5536..72a5bf55107f 100644
> --- a/kernel/kcov.c
> +++ b/kernel/kcov.c
> @@ -11,6 +11,7 @@
>  #include <linux/fs.h>
>  #include <linux/hashtable.h>
>  #include <linux/init.h>
> +#include <linux/jiffies.h>
>  #include <linux/kmsan-checks.h>
>  #include <linux/mm.h>
>  #include <linux/preempt.h>
> @@ -1057,6 +1058,32 @@ u64 kcov_common_handle(void)
>  }
>  EXPORT_SYMBOL(kcov_common_handle);
>
> +#ifdef CONFIG_KCOV_SELFTEST
> +static void __init selftest(void)
> +{
> +       unsigned long start;
> +
> +       pr_err("running self test\n");
> +       /*
> +        * Test that interrupts don't produce spurious coverage.
> +        * The coverage callback filters out interrupt code, but only
> +        * after the handler updates preempt count. Some code periodically
> +        * leaks out of that section and leads to spurious coverage.
> +        * It's hard to call the actual interrupt handler directly,
> +        * so we just loop here for a bit waiting for a timer interrupt.
> +        * We set kcov_mode to enable tracing, but don't setup the area,
> +        * so any attempt to trace will crash. Note: we must not call any
> +        * potentially traced functions in this region.
> +        */
> +       start = jiffies;
> +       current->kcov_mode = KCOV_MODE_TRACE_PC;
> +       while ((jiffies - start) * MSEC_PER_SEC / HZ < 300)
> +               ;
> +       current->kcov_mode = 0;
> +       pr_err("done running self test\n");
> +}
> +#endif
> +
>  static int __init kcov_init(void)
>  {
>         int cpu;
> @@ -1076,6 +1103,10 @@ static int __init kcov_init(void)
>          */
>         debugfs_create_file_unsafe("kcov", 0600, NULL, NULL, &kcov_fops);
>
> +#ifdef CONFIG_KCOV_SELFTEST
> +       selftest();
> +#endif
> +
>         return 0;
>  }
>
> diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
> index 59b6765d86b8..695a437a52d9 100644
> --- a/lib/Kconfig.debug
> +++ b/lib/Kconfig.debug
> @@ -2171,6 +2171,14 @@ config KCOV_IRQ_AREA_SIZE
>           soft interrupts. This specifies the size of those areas in the
>           number of unsigned long words.
>
> +config KCOV_SELFTEST
> +       bool "Perform short selftests on boot"
> +       depends on KCOV
> +       help
> +         Run short KCOV coverage collection selftests on boot.
> +         On test failure, causes the kernel to panic. Recommended to be
> +         enabled, ensuring critical functionality works as intended.
> +
>  menuconfig RUNTIME_TESTING_MENU
>         bool "Runtime Testing"
>         default y
> --
> 2.45.2.505.gda0bf45e8d-goog
>

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH v2 3/4] module: Fix KCOV-ignored file name
  2024-06-11  7:50 ` [PATCH v2 3/4] module: Fix KCOV-ignored file name Dmitry Vyukov
@ 2024-06-11  9:29   ` Marco Elver
  2024-06-13 22:55   ` Andrey Konovalov
  2024-08-08 15:49   ` [tip: x86/build] " tip-bot2 for Dmitry Vyukov
  2 siblings, 0 replies; 26+ messages in thread
From: Marco Elver @ 2024-06-11  9:29 UTC (permalink / raw)
  To: Dmitry Vyukov
  Cc: tglx, mingo, bp, dave.hansen, x86, linux-kernel, syzkaller,
	glider, nogikh, tarasmadan, Aaron Tomlin

On Tue, 11 Jun 2024 at 09:50, Dmitry Vyukov <dvyukov@google.com> wrote:
>
> Module.c was renamed to main.c, but the Makefile directive
> was copy-pasted verbatim with the old file name.
> Fix up the file name.
>
> Signed-off-by: Dmitry Vyukov <dvyukov@google.com>
> Reviewed-by: Alexander Potapenko <glider@google.com>
> Fixes: cfc1d277891e ("module: Move all into module/")
> Cc: Aaron Tomlin <atomlin@redhat.com>
> Cc: x86@kernel.org
> Cc: linux-kernel@vger.kernel.org
> Cc: syzkaller@googlegroups.com

Reviewed-by: Marco Elver <elver@google.com>

> ---
>  kernel/module/Makefile | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
>
> diff --git a/kernel/module/Makefile b/kernel/module/Makefile
> index a10b2b9a6fdf..50ffcc413b54 100644
> --- a/kernel/module/Makefile
> +++ b/kernel/module/Makefile
> @@ -5,7 +5,7 @@
>
>  # These are called from save_stack_trace() on slub debug path,
>  # and produce insane amounts of uninteresting coverage.
> -KCOV_INSTRUMENT_module.o := n
> +KCOV_INSTRUMENT_main.o := n
>
>  obj-y += main.o
>  obj-y += strict_rwx.o
> --
> 2.45.2.505.gda0bf45e8d-goog
>

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH v2 0/4] KCOV fixes
  2024-06-11  7:50 [PATCH v2 0/4] KCOV fixes Dmitry Vyukov
                   ` (3 preceding siblings ...)
  2024-06-11  7:50 ` [PATCH v2 4/4] x86: Ignore stack unwinding in KCOV Dmitry Vyukov
@ 2024-06-11  9:31 ` Dmitry Vyukov
  2024-06-19  5:20   ` Dmitry Vyukov
  4 siblings, 1 reply; 26+ messages in thread
From: Dmitry Vyukov @ 2024-06-11  9:31 UTC (permalink / raw)
  To: tglx, mingo, bp, dave.hansen, x86
  Cc: linux-kernel, syzkaller, elver, glider, nogikh, tarasmadan

On Tue, 11 Jun 2024 at 09:50, Dmitry Vyukov <dvyukov@google.com> wrote:
>
> Fix spurious KCOV coverage from interrupts and add a test.
> Ignore some additional files that lead to large amounts
> of uninteresting coverage.
> As a reference point, tracing a simple open system call
> produces ~10K PCs with these changes instead of ~45K PCs.
>
> Dmitry Vyukov (4):
>   x86/entry: Remove unwanted instrumentation in common_interrupt()
>   kcov: add interrupt handling self test
>   module: Fix KCOV-ignored file name
>   x86: Ignore stack unwinding in KCOV
>
>  arch/x86/include/asm/hardirq.h  |  8 ++++++--
>  arch/x86/include/asm/idtentry.h |  6 +++---
>  arch/x86/kernel/Makefile        |  8 ++++++++
>  kernel/kcov.c                   | 31 +++++++++++++++++++++++++++++++
>  kernel/module/Makefile          |  2 +-
>  lib/Kconfig.debug               |  8 ++++++++
>  6 files changed, 57 insertions(+), 6 deletions(-)
>
>
> base-commit: 83a7eefedc9b56fe7bfeff13b6c7356688ffa670
> --
> 2.45.2.505.gda0bf45e8d-goog

Thomas, Ingo, Borislav, Dave,

Can you take this via x86 tree please?

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH v2 4/4] x86: Ignore stack unwinding in KCOV
  2024-06-11  7:50 ` [PATCH v2 4/4] x86: Ignore stack unwinding in KCOV Dmitry Vyukov
@ 2024-06-13 22:51   ` Andrey Konovalov
  2024-06-19 11:23   ` Peter Zijlstra
  2024-08-08 15:49   ` [tip: x86/build] " tip-bot2 for Dmitry Vyukov
  2 siblings, 0 replies; 26+ messages in thread
From: Andrey Konovalov @ 2024-06-13 22:51 UTC (permalink / raw)
  To: Dmitry Vyukov
  Cc: tglx, mingo, bp, dave.hansen, x86, linux-kernel, syzkaller, elver,
	glider, nogikh, tarasmadan

On Tue, Jun 11, 2024 at 9:50 AM 'Dmitry Vyukov' via syzkaller
<syzkaller@googlegroups.com> wrote:
>
> Stack unwinding produces large amounts of uninteresting coverage.
> It's called from KASAN kmalloc/kfree hooks, fault injection, etc.
> It's not particularly useful and is not a function of system call args.
> Ignore that code.
>
> Signed-off-by: Dmitry Vyukov <dvyukov@google.com>
> Reviewed-by: Alexander Potapenko <glider@google.com>
> Reviewed-by: Marco Elver <elver@google.com>
> Cc: x86@kernel.org
> Cc: linux-kernel@vger.kernel.org
> Cc: syzkaller@googlegroups.com
> ---
>  arch/x86/kernel/Makefile | 8 ++++++++
>  1 file changed, 8 insertions(+)
>
> diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
> index 20a0dd51700a..cd49ebfae984 100644
> --- a/arch/x86/kernel/Makefile
> +++ b/arch/x86/kernel/Makefile
> @@ -39,6 +39,14 @@ KMSAN_SANITIZE_sev.o                                 := n
>  # first second.
>  KCOV_INSTRUMENT_head$(BITS).o                          := n
>  KCOV_INSTRUMENT_sev.o                                  := n
> +# These are called from save_stack_trace() on debug paths,
> +# and produce large amounts of uninteresting coverage.
> +KCOV_INSTRUMENT_stacktrace.o                           := n
> +KCOV_INSTRUMENT_dumpstack.o                            := n
> +KCOV_INSTRUMENT_dumpstack_$(BITS).o                    := n
> +KCOV_INSTRUMENT_unwind_orc.o                           := n
> +KCOV_INSTRUMENT_unwind_frame.o                         := n
> +KCOV_INSTRUMENT_unwind_guess.o                         := n
>
>  CFLAGS_irq.o := -I $(src)/../include/asm/trace
>
> --

Reviewed-by: Andrey Konovalov <andreyknvl@gmail.com>

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH v2 3/4] module: Fix KCOV-ignored file name
  2024-06-11  7:50 ` [PATCH v2 3/4] module: Fix KCOV-ignored file name Dmitry Vyukov
  2024-06-11  9:29   ` Marco Elver
@ 2024-06-13 22:55   ` Andrey Konovalov
  2024-08-08 15:49   ` [tip: x86/build] " tip-bot2 for Dmitry Vyukov
  2 siblings, 0 replies; 26+ messages in thread
From: Andrey Konovalov @ 2024-06-13 22:55 UTC (permalink / raw)
  To: Dmitry Vyukov
  Cc: tglx, mingo, bp, dave.hansen, x86, linux-kernel, syzkaller, elver,
	glider, nogikh, tarasmadan, Aaron Tomlin

On Tue, Jun 11, 2024 at 9:50 AM 'Dmitry Vyukov' via syzkaller
<syzkaller@googlegroups.com> wrote:
>
> Module.c was renamed to main.c, but the Makefile directive
> was copy-pasted verbatim with the old file name.
> Fix up the file name.
>
> Signed-off-by: Dmitry Vyukov <dvyukov@google.com>
> Reviewed-by: Alexander Potapenko <glider@google.com>
> Fixes: cfc1d277891e ("module: Move all into module/")
> Cc: Aaron Tomlin <atomlin@redhat.com>
> Cc: x86@kernel.org
> Cc: linux-kernel@vger.kernel.org
> Cc: syzkaller@googlegroups.com
> ---
>  kernel/module/Makefile | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
>
> diff --git a/kernel/module/Makefile b/kernel/module/Makefile
> index a10b2b9a6fdf..50ffcc413b54 100644
> --- a/kernel/module/Makefile
> +++ b/kernel/module/Makefile
> @@ -5,7 +5,7 @@
>
>  # These are called from save_stack_trace() on slub debug path,
>  # and produce insane amounts of uninteresting coverage.
> -KCOV_INSTRUMENT_module.o := n
> +KCOV_INSTRUMENT_main.o := n
>
>  obj-y += main.o
>  obj-y += strict_rwx.o
> --
> 2.45.2.505.gda0bf45e8d-goog

Reviewed-by: Andrey Konovalov <andreyknvl@gmail.com>

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH v2 2/4] kcov: add interrupt handling self test
  2024-06-11  7:50 ` [PATCH v2 2/4] kcov: add interrupt handling self test Dmitry Vyukov
  2024-06-11  9:29   ` Marco Elver
@ 2024-06-13 23:01   ` Andrey Konovalov
  2024-06-19 11:13   ` Peter Zijlstra
  2024-08-08 15:49   ` [tip: x86/build] kcov: Add " tip-bot2 for Dmitry Vyukov
  3 siblings, 0 replies; 26+ messages in thread
From: Andrey Konovalov @ 2024-06-13 23:01 UTC (permalink / raw)
  To: Dmitry Vyukov
  Cc: tglx, mingo, bp, dave.hansen, x86, linux-kernel, syzkaller, elver,
	glider, nogikh, tarasmadan

On Tue, Jun 11, 2024 at 9:50 AM 'Dmitry Vyukov' via syzkaller
<syzkaller@googlegroups.com> wrote:
>
> Add a boot self test that can catch sprious coverage from interrupts.
> The coverage callback filters out interrupt code, but only after the
> handler updates preempt count. Some code periodically leaks out
> of that section and leads to spurious coverage.
> Add a best-effort (but simple) test that is likely to catch such bugs.
> If the test is enabled on CI systems that use KCOV, they should catch
> any issues fast.
>
> Signed-off-by: Dmitry Vyukov <dvyukov@google.com>
> Reviewed-by: Alexander Potapenko <glider@google.com>
> Cc: x86@kernel.org
> Cc: linux-kernel@vger.kernel.org
> Cc: syzkaller@googlegroups.com
>
> ---
>
> Changed since v1:
>  - renamed KCOV_TEST to KCOV_SELFTEST
>  - improved the config description
>  - loop for exactly 300ms in the test
>
> In my local testing w/o the previous fix,
> it immidiatly produced the following splat:
>
> kcov: running selftest
> BUG: TASK stack guard page was hit at ffffc90000147ff8
> Oops: stack guard page: 0000 [#1] PREEMPT SMP KASAN PTI
> ...
>  kvm_set_cpu_l1tf_flush_l1d+0x5/0x20
>  sysvec_call_function+0x15/0xb0
>  asm_sysvec_call_function+0x1a/0x20
>  kcov_init+0xe4/0x130
>  do_one_initcall+0xbc/0x470
>  kernel_init_freeable+0x4fc/0x930
>  kernel_init+0x1c/0x2b0
> ---
>  kernel/kcov.c     | 31 +++++++++++++++++++++++++++++++
>  lib/Kconfig.debug |  8 ++++++++
>  2 files changed, 39 insertions(+)
>
> diff --git a/kernel/kcov.c b/kernel/kcov.c
> index c3124f6d5536..72a5bf55107f 100644
> --- a/kernel/kcov.c
> +++ b/kernel/kcov.c
> @@ -11,6 +11,7 @@
>  #include <linux/fs.h>
>  #include <linux/hashtable.h>
>  #include <linux/init.h>
> +#include <linux/jiffies.h>
>  #include <linux/kmsan-checks.h>
>  #include <linux/mm.h>
>  #include <linux/preempt.h>
> @@ -1057,6 +1058,32 @@ u64 kcov_common_handle(void)
>  }
>  EXPORT_SYMBOL(kcov_common_handle);
>
> +#ifdef CONFIG_KCOV_SELFTEST
> +static void __init selftest(void)
> +{
> +       unsigned long start;
> +
> +       pr_err("running self test\n");
> +       /*
> +        * Test that interrupts don't produce spurious coverage.
> +        * The coverage callback filters out interrupt code, but only
> +        * after the handler updates preempt count. Some code periodically
> +        * leaks out of that section and leads to spurious coverage.
> +        * It's hard to call the actual interrupt handler directly,
> +        * so we just loop here for a bit waiting for a timer interrupt.
> +        * We set kcov_mode to enable tracing, but don't setup the area,
> +        * so any attempt to trace will crash. Note: we must not call any
> +        * potentially traced functions in this region.
> +        */
> +       start = jiffies;
> +       current->kcov_mode = KCOV_MODE_TRACE_PC;
> +       while ((jiffies - start) * MSEC_PER_SEC / HZ < 300)
> +               ;
> +       current->kcov_mode = 0;
> +       pr_err("done running self test\n");
> +}
> +#endif
> +
>  static int __init kcov_init(void)
>  {
>         int cpu;
> @@ -1076,6 +1103,10 @@ static int __init kcov_init(void)
>          */
>         debugfs_create_file_unsafe("kcov", 0600, NULL, NULL, &kcov_fops);
>
> +#ifdef CONFIG_KCOV_SELFTEST
> +       selftest();
> +#endif
> +
>         return 0;
>  }
>
> diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
> index 59b6765d86b8..695a437a52d9 100644
> --- a/lib/Kconfig.debug
> +++ b/lib/Kconfig.debug
> @@ -2171,6 +2171,14 @@ config KCOV_IRQ_AREA_SIZE
>           soft interrupts. This specifies the size of those areas in the
>           number of unsigned long words.
>
> +config KCOV_SELFTEST
> +       bool "Perform short selftests on boot"
> +       depends on KCOV
> +       help
> +         Run short KCOV coverage collection selftests on boot.
> +         On test failure, causes the kernel to panic. Recommended to be

Nit: "causes the kernel to panic" => "causes a kernel panic" or "panic
the kernel"

> +         enabled, ensuring critical functionality works as intended.
> +
>  menuconfig RUNTIME_TESTING_MENU
>         bool "Runtime Testing"
>         default y
> --
> 2.45.2.505.gda0bf45e8d-goog

Reviewed-by: Andrey Konovalov <andreyknvl@gmail.com>

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH v2 0/4] KCOV fixes
  2024-06-11  9:31 ` [PATCH v2 0/4] KCOV fixes Dmitry Vyukov
@ 2024-06-19  5:20   ` Dmitry Vyukov
  2024-06-19  8:30     ` Borislav Petkov
  0 siblings, 1 reply; 26+ messages in thread
From: Dmitry Vyukov @ 2024-06-19  5:20 UTC (permalink / raw)
  To: tglx, mingo, bp, dave.hansen, x86
  Cc: linux-kernel, syzkaller, elver, glider, nogikh, tarasmadan

On Tue, 11 Jun 2024 at 11:31, Dmitry Vyukov <dvyukov@google.com> wrote:
> > Fix spurious KCOV coverage from interrupts and add a test.
> > Ignore some additional files that lead to large amounts
> > of uninteresting coverage.
> > As a reference point, tracing a simple open system call
> > produces ~10K PCs with these changes instead of ~45K PCs.
> >
> > Dmitry Vyukov (4):
> >   x86/entry: Remove unwanted instrumentation in common_interrupt()
> >   kcov: add interrupt handling self test
> >   module: Fix KCOV-ignored file name
> >   x86: Ignore stack unwinding in KCOV
> >
> >  arch/x86/include/asm/hardirq.h  |  8 ++++++--
> >  arch/x86/include/asm/idtentry.h |  6 +++---
> >  arch/x86/kernel/Makefile        |  8 ++++++++
> >  kernel/kcov.c                   | 31 +++++++++++++++++++++++++++++++
> >  kernel/module/Makefile          |  2 +-
> >  lib/Kconfig.debug               |  8 ++++++++
> >  6 files changed, 57 insertions(+), 6 deletions(-)
> >
> >
> > base-commit: 83a7eefedc9b56fe7bfeff13b6c7356688ffa670
> > --
> > 2.45.2.505.gda0bf45e8d-goog
>
> Thomas, Ingo, Borislav, Dave,
>
> Can you take this via x86 tree please?

Or is it OK to take this via mm tree (where KCOV changes usually go)?

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH v2 0/4] KCOV fixes
  2024-06-19  5:20   ` Dmitry Vyukov
@ 2024-06-19  8:30     ` Borislav Petkov
  2024-08-05 12:52       ` Andrey Konovalov
  0 siblings, 1 reply; 26+ messages in thread
From: Borislav Petkov @ 2024-06-19  8:30 UTC (permalink / raw)
  To: Dmitry Vyukov
  Cc: tglx, mingo, dave.hansen, x86, linux-kernel, syzkaller, elver,
	glider, nogikh, tarasmadan

On Wed, Jun 19, 2024 at 07:20:56AM +0200, Dmitry Vyukov wrote:
> Or is it OK to take this via mm tree (where KCOV changes usually go)?

Be patient, pls, you're on the TODO list.

-- 
Regards/Gruss,
    Boris.

https://people.kernel.org/tglx/notes-about-netiquette

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH v2 2/4] kcov: add interrupt handling self test
  2024-06-11  7:50 ` [PATCH v2 2/4] kcov: add interrupt handling self test Dmitry Vyukov
  2024-06-11  9:29   ` Marco Elver
  2024-06-13 23:01   ` Andrey Konovalov
@ 2024-06-19 11:13   ` Peter Zijlstra
  2024-06-19 11:18     ` Dmitry Vyukov
  2024-08-08 15:49   ` [tip: x86/build] kcov: Add " tip-bot2 for Dmitry Vyukov
  3 siblings, 1 reply; 26+ messages in thread
From: Peter Zijlstra @ 2024-06-19 11:13 UTC (permalink / raw)
  To: Dmitry Vyukov
  Cc: tglx, mingo, bp, dave.hansen, x86, linux-kernel, syzkaller, elver,
	glider, nogikh, tarasmadan

On Tue, Jun 11, 2024 at 09:50:31AM +0200, Dmitry Vyukov wrote:
> Add a boot self test that can catch sprious coverage from interrupts.
> The coverage callback filters out interrupt code, but only after the
> handler updates preempt count. Some code periodically leaks out
> of that section and leads to spurious coverage.
> Add a best-effort (but simple) test that is likely to catch such bugs.
> If the test is enabled on CI systems that use KCOV, they should catch
> any issues fast.
> 
> Signed-off-by: Dmitry Vyukov <dvyukov@google.com>
> Reviewed-by: Alexander Potapenko <glider@google.com>
> Cc: x86@kernel.org
> Cc: linux-kernel@vger.kernel.org
> Cc: syzkaller@googlegroups.com
> 
> ---
> 
> Changed since v1:
>  - renamed KCOV_TEST to KCOV_SELFTEST
>  - improved the config description
>  - loop for exactly 300ms in the test
> 
> In my local testing w/o the previous fix,
> it immidiatly produced the following splat:
> 
> kcov: running selftest
> BUG: TASK stack guard page was hit at ffffc90000147ff8
> Oops: stack guard page: 0000 [#1] PREEMPT SMP KASAN PTI
> ...
>  kvm_set_cpu_l1tf_flush_l1d+0x5/0x20
>  sysvec_call_function+0x15/0xb0
>  asm_sysvec_call_function+0x1a/0x20
>  kcov_init+0xe4/0x130
>  do_one_initcall+0xbc/0x470
>  kernel_init_freeable+0x4fc/0x930
>  kernel_init+0x1c/0x2b0

So I'm not entirely sure how the above BUG comes about, nor how this
selftest tickles it. Could you elaborate?
	
I've found check_kcov_mode() which has this !in_task() clause, but I'm
not entirely sure how failing that leads to the above mentioned failure.

> ---
>  kernel/kcov.c     | 31 +++++++++++++++++++++++++++++++
>  lib/Kconfig.debug |  8 ++++++++
>  2 files changed, 39 insertions(+)
> 
> diff --git a/kernel/kcov.c b/kernel/kcov.c
> index c3124f6d5536..72a5bf55107f 100644
> --- a/kernel/kcov.c
> +++ b/kernel/kcov.c
> @@ -11,6 +11,7 @@
>  #include <linux/fs.h>
>  #include <linux/hashtable.h>
>  #include <linux/init.h>
> +#include <linux/jiffies.h>
>  #include <linux/kmsan-checks.h>
>  #include <linux/mm.h>
>  #include <linux/preempt.h>
> @@ -1057,6 +1058,32 @@ u64 kcov_common_handle(void)
>  }
>  EXPORT_SYMBOL(kcov_common_handle);
>  
> +#ifdef CONFIG_KCOV_SELFTEST
> +static void __init selftest(void)
> +{
> +	unsigned long start;
> +
> +	pr_err("running self test\n");
> +	/*
> +	 * Test that interrupts don't produce spurious coverage.
> +	 * The coverage callback filters out interrupt code, but only
> +	 * after the handler updates preempt count. Some code periodically
> +	 * leaks out of that section and leads to spurious coverage.
> +	 * It's hard to call the actual interrupt handler directly,
> +	 * so we just loop here for a bit waiting for a timer interrupt.
> +	 * We set kcov_mode to enable tracing, but don't setup the area,
> +	 * so any attempt to trace will crash. Note: we must not call any
> +	 * potentially traced functions in this region.
> +	 */
> +	start = jiffies;
> +	current->kcov_mode = KCOV_MODE_TRACE_PC;

	barrier();

> +	while ((jiffies - start) * MSEC_PER_SEC / HZ < 300)
> +		;

	barrier();

> +	current->kcov_mode = 0;
> +	pr_err("done running self test\n");
> +}
> +#endif

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH v2 2/4] kcov: add interrupt handling self test
  2024-06-19 11:13   ` Peter Zijlstra
@ 2024-06-19 11:18     ` Dmitry Vyukov
  2024-06-19 11:26       ` Peter Zijlstra
  0 siblings, 1 reply; 26+ messages in thread
From: Dmitry Vyukov @ 2024-06-19 11:18 UTC (permalink / raw)
  To: Peter Zijlstra
  Cc: tglx, mingo, bp, dave.hansen, x86, linux-kernel, syzkaller, elver,
	glider, nogikh, tarasmadan

On Wed, 19 Jun 2024 at 13:13, Peter Zijlstra <peterz@infradead.org> wrote:
>
> On Tue, Jun 11, 2024 at 09:50:31AM +0200, Dmitry Vyukov wrote:
> > Add a boot self test that can catch sprious coverage from interrupts.
> > The coverage callback filters out interrupt code, but only after the
> > handler updates preempt count. Some code periodically leaks out
> > of that section and leads to spurious coverage.
> > Add a best-effort (but simple) test that is likely to catch such bugs.
> > If the test is enabled on CI systems that use KCOV, they should catch
> > any issues fast.
> >
> > Signed-off-by: Dmitry Vyukov <dvyukov@google.com>
> > Reviewed-by: Alexander Potapenko <glider@google.com>
> > Cc: x86@kernel.org
> > Cc: linux-kernel@vger.kernel.org
> > Cc: syzkaller@googlegroups.com
> >
> > ---
> >
> > Changed since v1:
> >  - renamed KCOV_TEST to KCOV_SELFTEST
> >  - improved the config description
> >  - loop for exactly 300ms in the test
> >
> > In my local testing w/o the previous fix,
> > it immidiatly produced the following splat:
> >
> > kcov: running selftest
> > BUG: TASK stack guard page was hit at ffffc90000147ff8
> > Oops: stack guard page: 0000 [#1] PREEMPT SMP KASAN PTI
> > ...
> >  kvm_set_cpu_l1tf_flush_l1d+0x5/0x20
> >  sysvec_call_function+0x15/0xb0
> >  asm_sysvec_call_function+0x1a/0x20
> >  kcov_init+0xe4/0x130
> >  do_one_initcall+0xbc/0x470
> >  kernel_init_freeable+0x4fc/0x930
> >  kernel_init+0x1c/0x2b0
>
> So I'm not entirely sure how the above BUG comes about, nor how this
> selftest tickles it. Could you elaborate?
>
> I've found check_kcov_mode() which has this !in_task() clause, but I'm
> not entirely sure how failing that leads to the above mentioned failure.

I've tried to explain it in the test comment, maybe I need to improve it:

+        * We set kcov_mode to enable tracing, but don't setup the area,
+        * so any attempt to trace will crash. Note: we must not call any
+        * potentially traced functions in this region.

Basically, we setup current task kcov in a way that any attempt to
trace in __sanitizer_cov_trace_pc() will crash, and then just loop
waiting for interrupts.

A more legit way to achieve the same would be to properly setup kcov
for tracing from within the kernel, then call outermost interrupt
entry function, then check we traced nothing. But that would require a
non-trivial amount of new complex code, and e.g. the top-most
interrupt entry function is not exported and is arch-specific.



> > ---
> >  kernel/kcov.c     | 31 +++++++++++++++++++++++++++++++
> >  lib/Kconfig.debug |  8 ++++++++
> >  2 files changed, 39 insertions(+)
> >
> > diff --git a/kernel/kcov.c b/kernel/kcov.c
> > index c3124f6d5536..72a5bf55107f 100644
> > --- a/kernel/kcov.c
> > +++ b/kernel/kcov.c
> > @@ -11,6 +11,7 @@
> >  #include <linux/fs.h>
> >  #include <linux/hashtable.h>
> >  #include <linux/init.h>
> > +#include <linux/jiffies.h>
> >  #include <linux/kmsan-checks.h>
> >  #include <linux/mm.h>
> >  #include <linux/preempt.h>
> > @@ -1057,6 +1058,32 @@ u64 kcov_common_handle(void)
> >  }
> >  EXPORT_SYMBOL(kcov_common_handle);
> >
> > +#ifdef CONFIG_KCOV_SELFTEST
> > +static void __init selftest(void)
> > +{
> > +     unsigned long start;
> > +
> > +     pr_err("running self test\n");
> > +     /*
> > +      * Test that interrupts don't produce spurious coverage.
> > +      * The coverage callback filters out interrupt code, but only
> > +      * after the handler updates preempt count. Some code periodically
> > +      * leaks out of that section and leads to spurious coverage.
> > +      * It's hard to call the actual interrupt handler directly,
> > +      * so we just loop here for a bit waiting for a timer interrupt.
> > +      * We set kcov_mode to enable tracing, but don't setup the area,
> > +      * so any attempt to trace will crash. Note: we must not call any
> > +      * potentially traced functions in this region.
> > +      */
> > +     start = jiffies;
> > +     current->kcov_mode = KCOV_MODE_TRACE_PC;
>
>         barrier();
>
> > +     while ((jiffies - start) * MSEC_PER_SEC / HZ < 300)
> > +             ;
>
>         barrier();
>
> > +     current->kcov_mode = 0;
> > +     pr_err("done running self test\n");
> > +}
> > +#endif
>
> --
> You received this message because you are subscribed to the Google Groups "syzkaller" group.
> To unsubscribe from this group and stop receiving emails from it, send an email to syzkaller+unsubscribe@googlegroups.com.
> To view this discussion on the web visit https://groups.google.com/d/msgid/syzkaller/20240619111309.GJ31592%40noisy.programming.kicks-ass.net.

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH v2 1/4] x86/entry: Remove unwanted instrumentation in common_interrupt()
  2024-06-11  7:50 ` [PATCH v2 1/4] x86/entry: Remove unwanted instrumentation in common_interrupt() Dmitry Vyukov
@ 2024-06-19 11:19   ` Peter Zijlstra
  2024-06-19 13:05     ` Dmitry Vyukov
  2024-08-08 15:49   ` [tip: x86/build] " tip-bot2 for Dmitry Vyukov
  1 sibling, 1 reply; 26+ messages in thread
From: Peter Zijlstra @ 2024-06-19 11:19 UTC (permalink / raw)
  To: Dmitry Vyukov
  Cc: tglx, mingo, bp, dave.hansen, x86, linux-kernel, syzkaller, elver,
	glider, nogikh, tarasmadan

On Tue, Jun 11, 2024 at 09:50:30AM +0200, Dmitry Vyukov wrote:
> common_interrupt() and friends call kvm_set_cpu_l1tf_flush_l1d(),
> which is not marked as noinstr nor __always_inline.
> So compiler outlines it and adds instrumentation to it.
> Since the call is inside of instrumentation_begin/end(),
> objtool does not warn about it.
> 
> The manifestation is that KCOV produces spurious coverage
> in kvm_set_cpu_l1tf_flush_l1d() in random places because
> the call happens when preempt count is not yet updated
> to say that we are in an interrupt.

So I'm reading spurious here, but the next patch trips BUG, them very
much not the same thing. Which is it?

> Mark kvm_set_cpu_l1tf_flush_l1d() as __always_inline and move
> out of instrumentation_begin/end() section.
> It only calls __this_cpu_write() which is already safe to call
> in noinstr contexts.
> 
> Signed-off-by: Dmitry Vyukov <dvyukov@google.com>
> Reviewed-by: Alexander Potapenko <glider@google.com>
> Fixes: 6368558c3710 ("x86/entry: Provide IDTENTRY_SYSVEC")
> Cc: x86@kernel.org
> Cc: linux-kernel@vger.kernel.org
> Cc: syzkaller@googlegroups.com

Anyway, the patch is fine,

Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>

> ---
>  arch/x86/include/asm/hardirq.h  | 8 ++++++--
>  arch/x86/include/asm/idtentry.h | 6 +++---
>  2 files changed, 9 insertions(+), 5 deletions(-)
> 
> diff --git a/arch/x86/include/asm/hardirq.h b/arch/x86/include/asm/hardirq.h
> index c67fa6ad098a..6ffa8b75f4cd 100644
> --- a/arch/x86/include/asm/hardirq.h
> +++ b/arch/x86/include/asm/hardirq.h
> @@ -69,7 +69,11 @@ extern u64 arch_irq_stat(void);
>  #define local_softirq_pending_ref       pcpu_hot.softirq_pending
>  
>  #if IS_ENABLED(CONFIG_KVM_INTEL)
> -static inline void kvm_set_cpu_l1tf_flush_l1d(void)
> +/*
> + * This function is called from noinstr interrupt contexts
> + * and must be inlined to not get instrumentation.
> + */
> +static __always_inline void kvm_set_cpu_l1tf_flush_l1d(void)
>  {
>  	__this_cpu_write(irq_stat.kvm_cpu_l1tf_flush_l1d, 1);
>  }
> @@ -84,7 +88,7 @@ static __always_inline bool kvm_get_cpu_l1tf_flush_l1d(void)
>  	return __this_cpu_read(irq_stat.kvm_cpu_l1tf_flush_l1d);
>  }
>  #else /* !IS_ENABLED(CONFIG_KVM_INTEL) */
> -static inline void kvm_set_cpu_l1tf_flush_l1d(void) { }
> +static __always_inline void kvm_set_cpu_l1tf_flush_l1d(void) { }
>  #endif /* IS_ENABLED(CONFIG_KVM_INTEL) */
>  
>  #endif /* _ASM_X86_HARDIRQ_H */
> diff --git a/arch/x86/include/asm/idtentry.h b/arch/x86/include/asm/idtentry.h
> index d4f24499b256..ad5c68f0509d 100644
> --- a/arch/x86/include/asm/idtentry.h
> +++ b/arch/x86/include/asm/idtentry.h
> @@ -212,8 +212,8 @@ __visible noinstr void func(struct pt_regs *regs,			\
>  	irqentry_state_t state = irqentry_enter(regs);			\
>  	u32 vector = (u32)(u8)error_code;				\
>  									\
> +	kvm_set_cpu_l1tf_flush_l1d();                                   \
>  	instrumentation_begin();					\
> -	kvm_set_cpu_l1tf_flush_l1d();					\
>  	run_irq_on_irqstack_cond(__##func, regs, vector);		\
>  	instrumentation_end();						\
>  	irqentry_exit(regs, state);					\
> @@ -250,7 +250,6 @@ static void __##func(struct pt_regs *regs);				\
>  									\
>  static __always_inline void instr_##func(struct pt_regs *regs)		\
>  {									\
> -	kvm_set_cpu_l1tf_flush_l1d();					\
>  	run_sysvec_on_irqstack_cond(__##func, regs);			\
>  }									\
>  									\
> @@ -258,6 +257,7 @@ __visible noinstr void func(struct pt_regs *regs)			\
>  {									\
>  	irqentry_state_t state = irqentry_enter(regs);			\
>  									\
> +	kvm_set_cpu_l1tf_flush_l1d();                                   \
>  	instrumentation_begin();					\
>  	instr_##func (regs);						\
>  	instrumentation_end();						\
> @@ -288,7 +288,6 @@ static __always_inline void __##func(struct pt_regs *regs);		\
>  static __always_inline void instr_##func(struct pt_regs *regs)		\
>  {									\
>  	__irq_enter_raw();						\
> -	kvm_set_cpu_l1tf_flush_l1d();					\
>  	__##func (regs);						\
>  	__irq_exit_raw();						\
>  }									\
> @@ -297,6 +296,7 @@ __visible noinstr void func(struct pt_regs *regs)			\
>  {									\
>  	irqentry_state_t state = irqentry_enter(regs);			\
>  									\
> +	kvm_set_cpu_l1tf_flush_l1d();                                   \
>  	instrumentation_begin();					\
>  	instr_##func (regs);						\
>  	instrumentation_end();						\
> -- 
> 2.45.2.505.gda0bf45e8d-goog
> 

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH v2 4/4] x86: Ignore stack unwinding in KCOV
  2024-06-11  7:50 ` [PATCH v2 4/4] x86: Ignore stack unwinding in KCOV Dmitry Vyukov
  2024-06-13 22:51   ` Andrey Konovalov
@ 2024-06-19 11:23   ` Peter Zijlstra
  2024-06-19 13:10     ` Dmitry Vyukov
  2024-08-08 15:49   ` [tip: x86/build] " tip-bot2 for Dmitry Vyukov
  2 siblings, 1 reply; 26+ messages in thread
From: Peter Zijlstra @ 2024-06-19 11:23 UTC (permalink / raw)
  To: Dmitry Vyukov
  Cc: tglx, mingo, bp, dave.hansen, x86, linux-kernel, syzkaller, elver,
	glider, nogikh, tarasmadan

On Tue, Jun 11, 2024 at 09:50:33AM +0200, Dmitry Vyukov wrote:
> Stack unwinding produces large amounts of uninteresting coverage.
> It's called from KASAN kmalloc/kfree hooks, fault injection, etc.
> It's not particularly useful and is not a function of system call args.
> Ignore that code.

This stems from KCOV's purpose being guiding syzkaller as opposed to it
being a more general coverage tool, right?

Is that spelled out anywhere?

Anyway,

Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>

> Signed-off-by: Dmitry Vyukov <dvyukov@google.com>
> Reviewed-by: Alexander Potapenko <glider@google.com>
> Reviewed-by: Marco Elver <elver@google.com>
> Cc: x86@kernel.org
> Cc: linux-kernel@vger.kernel.org
> Cc: syzkaller@googlegroups.com
> ---
>  arch/x86/kernel/Makefile | 8 ++++++++
>  1 file changed, 8 insertions(+)
> 
> diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
> index 20a0dd51700a..cd49ebfae984 100644
> --- a/arch/x86/kernel/Makefile
> +++ b/arch/x86/kernel/Makefile
> @@ -39,6 +39,14 @@ KMSAN_SANITIZE_sev.o					:= n
>  # first second.
>  KCOV_INSTRUMENT_head$(BITS).o				:= n
>  KCOV_INSTRUMENT_sev.o					:= n
> +# These are called from save_stack_trace() on debug paths,
> +# and produce large amounts of uninteresting coverage.
> +KCOV_INSTRUMENT_stacktrace.o				:= n
> +KCOV_INSTRUMENT_dumpstack.o				:= n
> +KCOV_INSTRUMENT_dumpstack_$(BITS).o			:= n
> +KCOV_INSTRUMENT_unwind_orc.o				:= n
> +KCOV_INSTRUMENT_unwind_frame.o				:= n
> +KCOV_INSTRUMENT_unwind_guess.o				:= n
>  
>  CFLAGS_irq.o := -I $(src)/../include/asm/trace
>  
> -- 
> 2.45.2.505.gda0bf45e8d-goog
> 

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH v2 2/4] kcov: add interrupt handling self test
  2024-06-19 11:18     ` Dmitry Vyukov
@ 2024-06-19 11:26       ` Peter Zijlstra
  0 siblings, 0 replies; 26+ messages in thread
From: Peter Zijlstra @ 2024-06-19 11:26 UTC (permalink / raw)
  To: Dmitry Vyukov
  Cc: tglx, mingo, bp, dave.hansen, x86, linux-kernel, syzkaller, elver,
	glider, nogikh, tarasmadan

On Wed, Jun 19, 2024 at 01:18:52PM +0200, Dmitry Vyukov wrote:
> On Wed, 19 Jun 2024 at 13:13, Peter Zijlstra <peterz@infradead.org> wrote:
> >
> > On Tue, Jun 11, 2024 at 09:50:31AM +0200, Dmitry Vyukov wrote:
> > > Add a boot self test that can catch sprious coverage from interrupts.
> > > The coverage callback filters out interrupt code, but only after the
> > > handler updates preempt count. Some code periodically leaks out
> > > of that section and leads to spurious coverage.
> > > Add a best-effort (but simple) test that is likely to catch such bugs.
> > > If the test is enabled on CI systems that use KCOV, they should catch
> > > any issues fast.
> > >
> > > Signed-off-by: Dmitry Vyukov <dvyukov@google.com>
> > > Reviewed-by: Alexander Potapenko <glider@google.com>
> > > Cc: x86@kernel.org
> > > Cc: linux-kernel@vger.kernel.org
> > > Cc: syzkaller@googlegroups.com
> > >
> > > ---
> > >
> > > Changed since v1:
> > >  - renamed KCOV_TEST to KCOV_SELFTEST
> > >  - improved the config description
> > >  - loop for exactly 300ms in the test
> > >
> > > In my local testing w/o the previous fix,
> > > it immidiatly produced the following splat:
> > >
> > > kcov: running selftest
> > > BUG: TASK stack guard page was hit at ffffc90000147ff8
> > > Oops: stack guard page: 0000 [#1] PREEMPT SMP KASAN PTI
> > > ...
> > >  kvm_set_cpu_l1tf_flush_l1d+0x5/0x20
> > >  sysvec_call_function+0x15/0xb0
> > >  asm_sysvec_call_function+0x1a/0x20
> > >  kcov_init+0xe4/0x130
> > >  do_one_initcall+0xbc/0x470
> > >  kernel_init_freeable+0x4fc/0x930
> > >  kernel_init+0x1c/0x2b0
> >
> > So I'm not entirely sure how the above BUG comes about, nor how this
> > selftest tickles it. Could you elaborate?
> >
> > I've found check_kcov_mode() which has this !in_task() clause, but I'm
> > not entirely sure how failing that leads to the above mentioned failure.
> 
> I've tried to explain it in the test comment, maybe I need to improve it:
> 
> +        * We set kcov_mode to enable tracing, but don't setup the area,
> +        * so any attempt to trace will crash. Note: we must not call any
> +        * potentially traced functions in this region.

Ah, I'm just slow today.. did not connect the dots. No this is fine.

> Basically, we setup current task kcov in a way that any attempt to
> trace in __sanitizer_cov_trace_pc() will crash, and then just loop
> waiting for interrupts.
> 
> A more legit way to achieve the same would be to properly setup kcov
> for tracing from within the kernel, then call outermost interrupt
> entry function, then check we traced nothing. But that would require a
> non-trivial amount of new complex code, and e.g. the top-most
> interrupt entry function is not exported and is arch-specific.

Yeah, polling jiffies should be fine I suppose.

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH v2 1/4] x86/entry: Remove unwanted instrumentation in common_interrupt()
  2024-06-19 11:19   ` Peter Zijlstra
@ 2024-06-19 13:05     ` Dmitry Vyukov
  0 siblings, 0 replies; 26+ messages in thread
From: Dmitry Vyukov @ 2024-06-19 13:05 UTC (permalink / raw)
  To: Peter Zijlstra
  Cc: tglx, mingo, bp, dave.hansen, x86, linux-kernel, syzkaller, elver,
	glider, nogikh, tarasmadan

On Wed, 19 Jun 2024 at 13:19, Peter Zijlstra <peterz@infradead.org> wrote:
>
> On Tue, Jun 11, 2024 at 09:50:30AM +0200, Dmitry Vyukov wrote:
> > common_interrupt() and friends call kvm_set_cpu_l1tf_flush_l1d(),
> > which is not marked as noinstr nor __always_inline.
> > So compiler outlines it and adds instrumentation to it.
> > Since the call is inside of instrumentation_begin/end(),
> > objtool does not warn about it.
> >
> > The manifestation is that KCOV produces spurious coverage
> > in kvm_set_cpu_l1tf_flush_l1d() in random places because
> > the call happens when preempt count is not yet updated
> > to say that we are in an interrupt.
>
> So I'm reading spurious here, but the next patch trips BUG, them very
> much not the same thing. Which is it?

I am not sure I understand the question.
Currently kvm_set_cpu_l1tf_flush_l1d() is traced, so the added test
produces a BUG without this fix (and does not produce with this fix).
By "spurious" I meant that tracing of kvm_set_cpu_l1tf_flush_l1d() and
interrupts in general is parasitic/unwanted.

> > Mark kvm_set_cpu_l1tf_flush_l1d() as __always_inline and move
> > out of instrumentation_begin/end() section.
> > It only calls __this_cpu_write() which is already safe to call
> > in noinstr contexts.
> >
> > Signed-off-by: Dmitry Vyukov <dvyukov@google.com>
> > Reviewed-by: Alexander Potapenko <glider@google.com>
> > Fixes: 6368558c3710 ("x86/entry: Provide IDTENTRY_SYSVEC")
> > Cc: x86@kernel.org
> > Cc: linux-kernel@vger.kernel.org
> > Cc: syzkaller@googlegroups.com
>
> Anyway, the patch is fine,
>
> Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
>
> > ---
> >  arch/x86/include/asm/hardirq.h  | 8 ++++++--
> >  arch/x86/include/asm/idtentry.h | 6 +++---
> >  2 files changed, 9 insertions(+), 5 deletions(-)
> >
> > diff --git a/arch/x86/include/asm/hardirq.h b/arch/x86/include/asm/hardirq.h
> > index c67fa6ad098a..6ffa8b75f4cd 100644
> > --- a/arch/x86/include/asm/hardirq.h
> > +++ b/arch/x86/include/asm/hardirq.h
> > @@ -69,7 +69,11 @@ extern u64 arch_irq_stat(void);
> >  #define local_softirq_pending_ref       pcpu_hot.softirq_pending
> >
> >  #if IS_ENABLED(CONFIG_KVM_INTEL)
> > -static inline void kvm_set_cpu_l1tf_flush_l1d(void)
> > +/*
> > + * This function is called from noinstr interrupt contexts
> > + * and must be inlined to not get instrumentation.
> > + */
> > +static __always_inline void kvm_set_cpu_l1tf_flush_l1d(void)
> >  {
> >       __this_cpu_write(irq_stat.kvm_cpu_l1tf_flush_l1d, 1);
> >  }
> > @@ -84,7 +88,7 @@ static __always_inline bool kvm_get_cpu_l1tf_flush_l1d(void)
> >       return __this_cpu_read(irq_stat.kvm_cpu_l1tf_flush_l1d);
> >  }
> >  #else /* !IS_ENABLED(CONFIG_KVM_INTEL) */
> > -static inline void kvm_set_cpu_l1tf_flush_l1d(void) { }
> > +static __always_inline void kvm_set_cpu_l1tf_flush_l1d(void) { }
> >  #endif /* IS_ENABLED(CONFIG_KVM_INTEL) */
> >
> >  #endif /* _ASM_X86_HARDIRQ_H */
> > diff --git a/arch/x86/include/asm/idtentry.h b/arch/x86/include/asm/idtentry.h
> > index d4f24499b256..ad5c68f0509d 100644
> > --- a/arch/x86/include/asm/idtentry.h
> > +++ b/arch/x86/include/asm/idtentry.h
> > @@ -212,8 +212,8 @@ __visible noinstr void func(struct pt_regs *regs,                 \
> >       irqentry_state_t state = irqentry_enter(regs);                  \
> >       u32 vector = (u32)(u8)error_code;                               \
> >                                                                       \
> > +     kvm_set_cpu_l1tf_flush_l1d();                                   \
> >       instrumentation_begin();                                        \
> > -     kvm_set_cpu_l1tf_flush_l1d();                                   \
> >       run_irq_on_irqstack_cond(__##func, regs, vector);               \
> >       instrumentation_end();                                          \
> >       irqentry_exit(regs, state);                                     \
> > @@ -250,7 +250,6 @@ static void __##func(struct pt_regs *regs);                               \
> >                                                                       \
> >  static __always_inline void instr_##func(struct pt_regs *regs)               \
> >  {                                                                    \
> > -     kvm_set_cpu_l1tf_flush_l1d();                                   \
> >       run_sysvec_on_irqstack_cond(__##func, regs);                    \
> >  }                                                                    \
> >                                                                       \
> > @@ -258,6 +257,7 @@ __visible noinstr void func(struct pt_regs *regs)                 \
> >  {                                                                    \
> >       irqentry_state_t state = irqentry_enter(regs);                  \
> >                                                                       \
> > +     kvm_set_cpu_l1tf_flush_l1d();                                   \
> >       instrumentation_begin();                                        \
> >       instr_##func (regs);                                            \
> >       instrumentation_end();                                          \
> > @@ -288,7 +288,6 @@ static __always_inline void __##func(struct pt_regs *regs);               \
> >  static __always_inline void instr_##func(struct pt_regs *regs)               \
> >  {                                                                    \
> >       __irq_enter_raw();                                              \
> > -     kvm_set_cpu_l1tf_flush_l1d();                                   \
> >       __##func (regs);                                                \
> >       __irq_exit_raw();                                               \
> >  }                                                                    \
> > @@ -297,6 +296,7 @@ __visible noinstr void func(struct pt_regs *regs)                 \
> >  {                                                                    \
> >       irqentry_state_t state = irqentry_enter(regs);                  \
> >                                                                       \
> > +     kvm_set_cpu_l1tf_flush_l1d();                                   \
> >       instrumentation_begin();                                        \
> >       instr_##func (regs);                                            \
> >       instrumentation_end();                                          \
> > --
> > 2.45.2.505.gda0bf45e8d-goog
> >
>
> --
> You received this message because you are subscribed to the Google Groups "syzkaller" group.
> To unsubscribe from this group and stop receiving emails from it, send an email to syzkaller+unsubscribe@googlegroups.com.
> To view this discussion on the web visit https://groups.google.com/d/msgid/syzkaller/20240619111936.GK31592%40noisy.programming.kicks-ass.net.

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH v2 4/4] x86: Ignore stack unwinding in KCOV
  2024-06-19 11:23   ` Peter Zijlstra
@ 2024-06-19 13:10     ` Dmitry Vyukov
  0 siblings, 0 replies; 26+ messages in thread
From: Dmitry Vyukov @ 2024-06-19 13:10 UTC (permalink / raw)
  To: Peter Zijlstra
  Cc: tglx, mingo, bp, dave.hansen, x86, linux-kernel, syzkaller, elver,
	glider, nogikh, tarasmadan

On Wed, 19 Jun 2024 at 13:23, Peter Zijlstra <peterz@infradead.org> wrote:
>
> On Tue, Jun 11, 2024 at 09:50:33AM +0200, Dmitry Vyukov wrote:
> > Stack unwinding produces large amounts of uninteresting coverage.
> > It's called from KASAN kmalloc/kfree hooks, fault injection, etc.
> > It's not particularly useful and is not a function of system call args.
> > Ignore that code.
>
> This stems from KCOV's purpose being guiding syzkaller as opposed to it
> being a more general coverage tool, right?
>
> Is that spelled out anywhere?

It may be used for other similar purposes as well, e.g. collecting
unit test coverage (not the whole kernel, but a single specific test
provided that even other tests can run and collect their own coverage
in parallel).

It's spelled explicitly in the docs:

https://elixir.bootlin.com/linux/latest/source/Documentation/dev-tools/kcov.rst

"""
KCOV collects and exposes kernel code coverage information in a form suitable
for coverage-guided fuzzing .... Coverage collection is enabled on a
task basis, and
thus KCOV can capture precise coverage of a single system call.

Note that KCOV does not aim to collect as much coverage as possible. It aims
to collect more or less stable coverage that is a function of syscall inputs.
To achieve this goal, it does not collect coverage in soft/hard interrupts...
"""



> Anyway,
>
> Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>

Thanks for your reviews, Peter!


> > Signed-off-by: Dmitry Vyukov <dvyukov@google.com>
> > Reviewed-by: Alexander Potapenko <glider@google.com>
> > Reviewed-by: Marco Elver <elver@google.com>
> > Cc: x86@kernel.org
> > Cc: linux-kernel@vger.kernel.org
> > Cc: syzkaller@googlegroups.com
> > ---
> >  arch/x86/kernel/Makefile | 8 ++++++++
> >  1 file changed, 8 insertions(+)
> >
> > diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
> > index 20a0dd51700a..cd49ebfae984 100644
> > --- a/arch/x86/kernel/Makefile
> > +++ b/arch/x86/kernel/Makefile
> > @@ -39,6 +39,14 @@ KMSAN_SANITIZE_sev.o                                       := n
> >  # first second.
> >  KCOV_INSTRUMENT_head$(BITS).o                                := n
> >  KCOV_INSTRUMENT_sev.o                                        := n
> > +# These are called from save_stack_trace() on debug paths,
> > +# and produce large amounts of uninteresting coverage.
> > +KCOV_INSTRUMENT_stacktrace.o                         := n
> > +KCOV_INSTRUMENT_dumpstack.o                          := n
> > +KCOV_INSTRUMENT_dumpstack_$(BITS).o                  := n
> > +KCOV_INSTRUMENT_unwind_orc.o                         := n
> > +KCOV_INSTRUMENT_unwind_frame.o                               := n
> > +KCOV_INSTRUMENT_unwind_guess.o                               := n
> >
> >  CFLAGS_irq.o := -I $(src)/../include/asm/trace
> >
> > --
> > 2.45.2.505.gda0bf45e8d-goog
> >
>
> --
> You received this message because you are subscribed to the Google Groups "syzkaller" group.
> To unsubscribe from this group and stop receiving emails from it, send an email to syzkaller+unsubscribe@googlegroups.com.
> To view this discussion on the web visit https://groups.google.com/d/msgid/syzkaller/20240619112332.GL31592%40noisy.programming.kicks-ass.net.

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH v2 0/4] KCOV fixes
  2024-06-19  8:30     ` Borislav Petkov
@ 2024-08-05 12:52       ` Andrey Konovalov
  2024-08-08 15:18         ` Thomas Gleixner
  0 siblings, 1 reply; 26+ messages in thread
From: Andrey Konovalov @ 2024-08-05 12:52 UTC (permalink / raw)
  To: Borislav Petkov
  Cc: Dmitry Vyukov, tglx, mingo, dave.hansen, x86, linux-kernel,
	syzkaller, elver, glider, nogikh, tarasmadan

On Wed, Jun 19, 2024 at 10:31 AM Borislav Petkov <bp@alien8.de> wrote:
>
> On Wed, Jun 19, 2024 at 07:20:56AM +0200, Dmitry Vyukov wrote:
> > Or is it OK to take this via mm tree (where KCOV changes usually go)?
>
> Be patient, pls, you're on the TODO list.

Hi Borislav,

I was wondering what's the status of these patches? They didn't make
it into 6.11 and I also still don't see them in linux-next.

Thank you!

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH v2 0/4] KCOV fixes
  2024-08-05 12:52       ` Andrey Konovalov
@ 2024-08-08 15:18         ` Thomas Gleixner
  0 siblings, 0 replies; 26+ messages in thread
From: Thomas Gleixner @ 2024-08-08 15:18 UTC (permalink / raw)
  To: Andrey Konovalov, Borislav Petkov
  Cc: Dmitry Vyukov, mingo, dave.hansen, x86, linux-kernel, syzkaller,
	elver, glider, nogikh, tarasmadan

On Mon, Aug 05 2024 at 14:52, Andrey Konovalov wrote:
> On Wed, Jun 19, 2024 at 10:31 AM Borislav Petkov <bp@alien8.de> wrote:
>>
>> On Wed, Jun 19, 2024 at 07:20:56AM +0200, Dmitry Vyukov wrote:
>> > Or is it OK to take this via mm tree (where KCOV changes usually go)?
>>
>> Be patient, pls, you're on the TODO list.
>
> Hi Borislav,
>
> I was wondering what's the status of these patches? They didn't make
> it into 6.11 and I also still don't see them in linux-next.

Sorry. That fell through the cracks. I'm picking it up now.

^ permalink raw reply	[flat|nested] 26+ messages in thread

* [tip: x86/build] x86: Ignore stack unwinding in KCOV
  2024-06-11  7:50 ` [PATCH v2 4/4] x86: Ignore stack unwinding in KCOV Dmitry Vyukov
  2024-06-13 22:51   ` Andrey Konovalov
  2024-06-19 11:23   ` Peter Zijlstra
@ 2024-08-08 15:49   ` tip-bot2 for Dmitry Vyukov
  2 siblings, 0 replies; 26+ messages in thread
From: tip-bot2 for Dmitry Vyukov @ 2024-08-08 15:49 UTC (permalink / raw)
  To: linux-tip-commits
  Cc: Dmitry Vyukov, Thomas Gleixner, Alexander Potapenko, Marco Elver,
	Andrey Konovalov, Peter Zijlstra (Intel), x86, linux-kernel

The following commit has been merged into the x86/build branch of tip:

Commit-ID:     ae94b263f5f69c180347e795fbefa051b65aacc3
Gitweb:        https://git.kernel.org/tip/ae94b263f5f69c180347e795fbefa051b65aacc3
Author:        Dmitry Vyukov <dvyukov@google.com>
AuthorDate:    Tue, 11 Jun 2024 09:50:33 +02:00
Committer:     Thomas Gleixner <tglx@linutronix.de>
CommitterDate: Thu, 08 Aug 2024 17:36:35 +02:00

x86: Ignore stack unwinding in KCOV

Stack unwinding produces large amounts of uninteresting coverage.
It's called from KASAN kmalloc/kfree hooks, fault injection, etc.
It's not particularly useful and is not a function of system call args.
Ignore that code.

Signed-off-by: Dmitry Vyukov <dvyukov@google.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Alexander Potapenko <glider@google.com>
Reviewed-by: Marco Elver <elver@google.com>
Reviewed-by: Andrey Konovalov <andreyknvl@gmail.com>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lore.kernel.org/all/eaf54b8634970b73552dcd38bf9be6ef55238c10.1718092070.git.dvyukov@google.com

---
 arch/x86/kernel/Makefile | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index a847180..f791898 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -35,6 +35,14 @@ KMSAN_SANITIZE_nmi.o					:= n
 # If instrumentation of the following files is enabled, boot hangs during
 # first second.
 KCOV_INSTRUMENT_head$(BITS).o				:= n
+# These are called from save_stack_trace() on debug paths,
+# and produce large amounts of uninteresting coverage.
+KCOV_INSTRUMENT_stacktrace.o				:= n
+KCOV_INSTRUMENT_dumpstack.o				:= n
+KCOV_INSTRUMENT_dumpstack_$(BITS).o			:= n
+KCOV_INSTRUMENT_unwind_orc.o				:= n
+KCOV_INSTRUMENT_unwind_frame.o				:= n
+KCOV_INSTRUMENT_unwind_guess.o				:= n
 
 CFLAGS_irq.o := -I $(src)/../include/asm/trace
 

^ permalink raw reply related	[flat|nested] 26+ messages in thread

* [tip: x86/build] module: Fix KCOV-ignored file name
  2024-06-11  7:50 ` [PATCH v2 3/4] module: Fix KCOV-ignored file name Dmitry Vyukov
  2024-06-11  9:29   ` Marco Elver
  2024-06-13 22:55   ` Andrey Konovalov
@ 2024-08-08 15:49   ` tip-bot2 for Dmitry Vyukov
  2 siblings, 0 replies; 26+ messages in thread
From: tip-bot2 for Dmitry Vyukov @ 2024-08-08 15:49 UTC (permalink / raw)
  To: linux-tip-commits
  Cc: Dmitry Vyukov, Thomas Gleixner, Alexander Potapenko, Marco Elver,
	Andrey Konovalov, stable, x86, linux-kernel

The following commit has been merged into the x86/build branch of tip:

Commit-ID:     f34d086fb7102fec895fd58b9e816b981b284c17
Gitweb:        https://git.kernel.org/tip/f34d086fb7102fec895fd58b9e816b981b284c17
Author:        Dmitry Vyukov <dvyukov@google.com>
AuthorDate:    Tue, 11 Jun 2024 09:50:32 +02:00
Committer:     Thomas Gleixner <tglx@linutronix.de>
CommitterDate: Thu, 08 Aug 2024 17:36:35 +02:00

module: Fix KCOV-ignored file name

module.c was renamed to main.c, but the Makefile directive was copy-pasted
verbatim with the old file name.  Fix up the file name.

Fixes: cfc1d277891e ("module: Move all into module/")
Signed-off-by: Dmitry Vyukov <dvyukov@google.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Alexander Potapenko <glider@google.com>
Reviewed-by: Marco Elver <elver@google.com>
Reviewed-by: Andrey Konovalov <andreyknvl@gmail.com>
Cc: stable@vger.kernel.org
Link: https://lore.kernel.org/all/bc0cf790b4839c5e38e2fafc64271f620568a39e.1718092070.git.dvyukov@google.com

---
 kernel/module/Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kernel/module/Makefile b/kernel/module/Makefile
index a10b2b9..50ffcc4 100644
--- a/kernel/module/Makefile
+++ b/kernel/module/Makefile
@@ -5,7 +5,7 @@
 
 # These are called from save_stack_trace() on slub debug path,
 # and produce insane amounts of uninteresting coverage.
-KCOV_INSTRUMENT_module.o := n
+KCOV_INSTRUMENT_main.o := n
 
 obj-y += main.o
 obj-y += strict_rwx.o

^ permalink raw reply related	[flat|nested] 26+ messages in thread

* [tip: x86/build] kcov: Add interrupt handling self test
  2024-06-11  7:50 ` [PATCH v2 2/4] kcov: add interrupt handling self test Dmitry Vyukov
                     ` (2 preceding siblings ...)
  2024-06-19 11:13   ` Peter Zijlstra
@ 2024-08-08 15:49   ` tip-bot2 for Dmitry Vyukov
  3 siblings, 0 replies; 26+ messages in thread
From: tip-bot2 for Dmitry Vyukov @ 2024-08-08 15:49 UTC (permalink / raw)
  To: linux-tip-commits
  Cc: Dmitry Vyukov, Thomas Gleixner, Alexander Potapenko, Marco Elver,
	Andrey Konovalov, x86, linux-kernel

The following commit has been merged into the x86/build branch of tip:

Commit-ID:     6cd0dd934b03d4ee4094ac474108723e2f2ed7d6
Gitweb:        https://git.kernel.org/tip/6cd0dd934b03d4ee4094ac474108723e2f2ed7d6
Author:        Dmitry Vyukov <dvyukov@google.com>
AuthorDate:    Tue, 11 Jun 2024 09:50:31 +02:00
Committer:     Thomas Gleixner <tglx@linutronix.de>
CommitterDate: Thu, 08 Aug 2024 17:36:35 +02:00

kcov: Add interrupt handling self test

Add a boot self test that can catch sprious coverage from interrupts.
The coverage callback filters out interrupt code, but only after the
handler updates preempt count. Some code periodically leaks out
of that section and leads to spurious coverage.
Add a best-effort (but simple) test that is likely to catch such bugs.
If the test is enabled on CI systems that use KCOV, they should catch
any issues fast.

Signed-off-by: Dmitry Vyukov <dvyukov@google.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Alexander Potapenko <glider@google.com>
Reviewed-by: Marco Elver <elver@google.com>
Reviewed-by: Andrey Konovalov <andreyknvl@gmail.com>
Link: https://lore.kernel.org/all/7662127c97e29da1a748ad1c1539dd7b65b737b2.1718092070.git.dvyukov@google.com


---
 kernel/kcov.c     | 31 +++++++++++++++++++++++++++++++
 lib/Kconfig.debug |  8 ++++++++
 2 files changed, 39 insertions(+)

diff --git a/kernel/kcov.c b/kernel/kcov.c
index f0a69d4..d9d4a0c 100644
--- a/kernel/kcov.c
+++ b/kernel/kcov.c
@@ -11,6 +11,7 @@
 #include <linux/fs.h>
 #include <linux/hashtable.h>
 #include <linux/init.h>
+#include <linux/jiffies.h>
 #include <linux/kmsan-checks.h>
 #include <linux/mm.h>
 #include <linux/preempt.h>
@@ -1058,6 +1059,32 @@ u64 kcov_common_handle(void)
 }
 EXPORT_SYMBOL(kcov_common_handle);
 
+#ifdef CONFIG_KCOV_SELFTEST
+static void __init selftest(void)
+{
+	unsigned long start;
+
+	pr_err("running self test\n");
+	/*
+	 * Test that interrupts don't produce spurious coverage.
+	 * The coverage callback filters out interrupt code, but only
+	 * after the handler updates preempt count. Some code periodically
+	 * leaks out of that section and leads to spurious coverage.
+	 * It's hard to call the actual interrupt handler directly,
+	 * so we just loop here for a bit waiting for a timer interrupt.
+	 * We set kcov_mode to enable tracing, but don't setup the area,
+	 * so any attempt to trace will crash. Note: we must not call any
+	 * potentially traced functions in this region.
+	 */
+	start = jiffies;
+	current->kcov_mode = KCOV_MODE_TRACE_PC;
+	while ((jiffies - start) * MSEC_PER_SEC / HZ < 300)
+		;
+	current->kcov_mode = 0;
+	pr_err("done running self test\n");
+}
+#endif
+
 static int __init kcov_init(void)
 {
 	int cpu;
@@ -1077,6 +1104,10 @@ static int __init kcov_init(void)
 	 */
 	debugfs_create_file_unsafe("kcov", 0600, NULL, NULL, &kcov_fops);
 
+#ifdef CONFIG_KCOV_SELFTEST
+	selftest();
+#endif
+
 	return 0;
 }
 
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index a30c03a..270e367 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -2173,6 +2173,14 @@ config KCOV_IRQ_AREA_SIZE
 	  soft interrupts. This specifies the size of those areas in the
 	  number of unsigned long words.
 
+config KCOV_SELFTEST
+	bool "Perform short selftests on boot"
+	depends on KCOV
+	help
+	  Run short KCOV coverage collection selftests on boot.
+	  On test failure, causes the kernel to panic. Recommended to be
+	  enabled, ensuring critical functionality works as intended.
+
 menuconfig RUNTIME_TESTING_MENU
 	bool "Runtime Testing"
 	default y

^ permalink raw reply related	[flat|nested] 26+ messages in thread

* [tip: x86/build] x86/entry: Remove unwanted instrumentation in common_interrupt()
  2024-06-11  7:50 ` [PATCH v2 1/4] x86/entry: Remove unwanted instrumentation in common_interrupt() Dmitry Vyukov
  2024-06-19 11:19   ` Peter Zijlstra
@ 2024-08-08 15:49   ` tip-bot2 for Dmitry Vyukov
  1 sibling, 0 replies; 26+ messages in thread
From: tip-bot2 for Dmitry Vyukov @ 2024-08-08 15:49 UTC (permalink / raw)
  To: linux-tip-commits
  Cc: Dmitry Vyukov, Thomas Gleixner, Alexander Potapenko,
	Peter Zijlstra (Intel), stable, x86, linux-kernel

The following commit has been merged into the x86/build branch of tip:

Commit-ID:     477d81a1c47a1b79b9c08fc92b5dea3c5143800b
Gitweb:        https://git.kernel.org/tip/477d81a1c47a1b79b9c08fc92b5dea3c5143800b
Author:        Dmitry Vyukov <dvyukov@google.com>
AuthorDate:    Tue, 11 Jun 2024 09:50:30 +02:00
Committer:     Thomas Gleixner <tglx@linutronix.de>
CommitterDate: Thu, 08 Aug 2024 17:36:35 +02:00

x86/entry: Remove unwanted instrumentation in common_interrupt()

common_interrupt() and related variants call kvm_set_cpu_l1tf_flush_l1d(),
which is neither marked noinstr nor __always_inline.

So compiler puts it out of line and adds instrumentation to it.  Since the
call is inside of instrumentation_begin/end(), objtool does not warn about
it.

The manifestation is that KCOV produces spurious coverage in
kvm_set_cpu_l1tf_flush_l1d() in random places because the call happens when
preempt count is not yet updated to say that the kernel is in an interrupt.

Mark kvm_set_cpu_l1tf_flush_l1d() as __always_inline and move it out of the
instrumentation_begin/end() section.  It only calls __this_cpu_write()
which is already safe to call in noinstr contexts.

Fixes: 6368558c3710 ("x86/entry: Provide IDTENTRY_SYSVEC")
Signed-off-by: Dmitry Vyukov <dvyukov@google.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Alexander Potapenko <glider@google.com>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: stable@vger.kernel.org
Link: https://lore.kernel.org/all/3f9a1de9e415fcb53d07dc9e19fa8481bb021b1b.1718092070.git.dvyukov@google.com

---
 arch/x86/include/asm/hardirq.h  | 8 ++++++--
 arch/x86/include/asm/idtentry.h | 6 +++---
 2 files changed, 9 insertions(+), 5 deletions(-)

diff --git a/arch/x86/include/asm/hardirq.h b/arch/x86/include/asm/hardirq.h
index c67fa6a..6ffa8b7 100644
--- a/arch/x86/include/asm/hardirq.h
+++ b/arch/x86/include/asm/hardirq.h
@@ -69,7 +69,11 @@ extern u64 arch_irq_stat(void);
 #define local_softirq_pending_ref       pcpu_hot.softirq_pending
 
 #if IS_ENABLED(CONFIG_KVM_INTEL)
-static inline void kvm_set_cpu_l1tf_flush_l1d(void)
+/*
+ * This function is called from noinstr interrupt contexts
+ * and must be inlined to not get instrumentation.
+ */
+static __always_inline void kvm_set_cpu_l1tf_flush_l1d(void)
 {
 	__this_cpu_write(irq_stat.kvm_cpu_l1tf_flush_l1d, 1);
 }
@@ -84,7 +88,7 @@ static __always_inline bool kvm_get_cpu_l1tf_flush_l1d(void)
 	return __this_cpu_read(irq_stat.kvm_cpu_l1tf_flush_l1d);
 }
 #else /* !IS_ENABLED(CONFIG_KVM_INTEL) */
-static inline void kvm_set_cpu_l1tf_flush_l1d(void) { }
+static __always_inline void kvm_set_cpu_l1tf_flush_l1d(void) { }
 #endif /* IS_ENABLED(CONFIG_KVM_INTEL) */
 
 #endif /* _ASM_X86_HARDIRQ_H */
diff --git a/arch/x86/include/asm/idtentry.h b/arch/x86/include/asm/idtentry.h
index d4f2449..ad5c68f 100644
--- a/arch/x86/include/asm/idtentry.h
+++ b/arch/x86/include/asm/idtentry.h
@@ -212,8 +212,8 @@ __visible noinstr void func(struct pt_regs *regs,			\
 	irqentry_state_t state = irqentry_enter(regs);			\
 	u32 vector = (u32)(u8)error_code;				\
 									\
+	kvm_set_cpu_l1tf_flush_l1d();                                   \
 	instrumentation_begin();					\
-	kvm_set_cpu_l1tf_flush_l1d();					\
 	run_irq_on_irqstack_cond(__##func, regs, vector);		\
 	instrumentation_end();						\
 	irqentry_exit(regs, state);					\
@@ -250,7 +250,6 @@ static void __##func(struct pt_regs *regs);				\
 									\
 static __always_inline void instr_##func(struct pt_regs *regs)		\
 {									\
-	kvm_set_cpu_l1tf_flush_l1d();					\
 	run_sysvec_on_irqstack_cond(__##func, regs);			\
 }									\
 									\
@@ -258,6 +257,7 @@ __visible noinstr void func(struct pt_regs *regs)			\
 {									\
 	irqentry_state_t state = irqentry_enter(regs);			\
 									\
+	kvm_set_cpu_l1tf_flush_l1d();                                   \
 	instrumentation_begin();					\
 	instr_##func (regs);						\
 	instrumentation_end();						\
@@ -288,7 +288,6 @@ static __always_inline void __##func(struct pt_regs *regs);		\
 static __always_inline void instr_##func(struct pt_regs *regs)		\
 {									\
 	__irq_enter_raw();						\
-	kvm_set_cpu_l1tf_flush_l1d();					\
 	__##func (regs);						\
 	__irq_exit_raw();						\
 }									\
@@ -297,6 +296,7 @@ __visible noinstr void func(struct pt_regs *regs)			\
 {									\
 	irqentry_state_t state = irqentry_enter(regs);			\
 									\
+	kvm_set_cpu_l1tf_flush_l1d();                                   \
 	instrumentation_begin();					\
 	instr_##func (regs);						\
 	instrumentation_end();						\

^ permalink raw reply related	[flat|nested] 26+ messages in thread

end of thread, other threads:[~2024-08-08 15:49 UTC | newest]

Thread overview: 26+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2024-06-11  7:50 [PATCH v2 0/4] KCOV fixes Dmitry Vyukov
2024-06-11  7:50 ` [PATCH v2 1/4] x86/entry: Remove unwanted instrumentation in common_interrupt() Dmitry Vyukov
2024-06-19 11:19   ` Peter Zijlstra
2024-06-19 13:05     ` Dmitry Vyukov
2024-08-08 15:49   ` [tip: x86/build] " tip-bot2 for Dmitry Vyukov
2024-06-11  7:50 ` [PATCH v2 2/4] kcov: add interrupt handling self test Dmitry Vyukov
2024-06-11  9:29   ` Marco Elver
2024-06-13 23:01   ` Andrey Konovalov
2024-06-19 11:13   ` Peter Zijlstra
2024-06-19 11:18     ` Dmitry Vyukov
2024-06-19 11:26       ` Peter Zijlstra
2024-08-08 15:49   ` [tip: x86/build] kcov: Add " tip-bot2 for Dmitry Vyukov
2024-06-11  7:50 ` [PATCH v2 3/4] module: Fix KCOV-ignored file name Dmitry Vyukov
2024-06-11  9:29   ` Marco Elver
2024-06-13 22:55   ` Andrey Konovalov
2024-08-08 15:49   ` [tip: x86/build] " tip-bot2 for Dmitry Vyukov
2024-06-11  7:50 ` [PATCH v2 4/4] x86: Ignore stack unwinding in KCOV Dmitry Vyukov
2024-06-13 22:51   ` Andrey Konovalov
2024-06-19 11:23   ` Peter Zijlstra
2024-06-19 13:10     ` Dmitry Vyukov
2024-08-08 15:49   ` [tip: x86/build] " tip-bot2 for Dmitry Vyukov
2024-06-11  9:31 ` [PATCH v2 0/4] KCOV fixes Dmitry Vyukov
2024-06-19  5:20   ` Dmitry Vyukov
2024-06-19  8:30     ` Borislav Petkov
2024-08-05 12:52       ` Andrey Konovalov
2024-08-08 15:18         ` Thomas Gleixner

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox