linux-arm-kernel.lists.infradead.org archive mirror
 help / color / mirror / Atom feed
* [PATCH v2] tracing: Enable kprobe for selected Arm64 assembly
@ 2025-11-03 18:52 Ben Niu
  2025-11-17 10:14 ` Mark Rutland
  0 siblings, 1 reply; 3+ messages in thread
From: Ben Niu @ 2025-11-03 18:52 UTC (permalink / raw)
  To: Catalin Marinas
  Cc: Mark Rutland, Will Deacon, tytso, Jason, Ben Niu,
	linux-arm-kernel, linux-kernel

When ftrace is enabled, a function can only be kprobe'd if
  (1) the function has proper nops inserted at the beginning
  (2) the first inserted nop's address is added in section
      __patchable_function_entries.

See function within_notrace_func in kernel/trace/trace_kprobe.c
for more details.

This patch adds a new asm function macro SYM_FUNC_START_TRACE
that makes an asm funtion satisfy the above two conditions so that
it can be kprobe'd. In addition, the macro is applied to
__arch_copy_to_user and __arch_copy_from_user, which were found
to be hot in certain workloads.

Note: although this patch unblocks kprobe tracing, fentry is still
broken because no BTF info gets generated from assembly files. A
separate patch is needed to fix that.

This patch was built with different combos of the following
features and kprobe tracing for asm functions __arch_copy_to_user
and __arch_copy_from_user worked in all cases:

CONFIG_DYNAMIC_FTRACE_WITH_CALL_OPS
CONFIG_DYNAMIC_FTRACE_WITH_ARGS
CONFIG_ARM64_BTI_KERNEL

Signed-off-by: Ben Niu <benniu@meta.com>
---
v2: only opt-in __arch_copy_from_user and __arch_copy_to_user for
    tracing since tracing asm functions in general may be unsafe

v1: aQOpd14RXnyQLOWR@meta.com
---
 arch/arm64/include/asm/linkage.h | 91 ++++++++++++++++++++++++--------
 arch/arm64/lib/copy_from_user.S  |  2 +-
 arch/arm64/lib/copy_to_user.S    |  2 +-
 3 files changed, 70 insertions(+), 25 deletions(-)

diff --git a/arch/arm64/include/asm/linkage.h b/arch/arm64/include/asm/linkage.h
index d3acd9c87509..b9a7513dc7d9 100644
--- a/arch/arm64/include/asm/linkage.h
+++ b/arch/arm64/include/asm/linkage.h
@@ -5,8 +5,47 @@
 #include <asm/assembler.h>
 #endif
 
-#define __ALIGN		.balign CONFIG_FUNCTION_ALIGNMENT
-#define __ALIGN_STR	".balign " #CONFIG_FUNCTION_ALIGNMENT
+#define __ALIGN .balign CONFIG_FUNCTION_ALIGNMENT
+#define __ALIGN_STR ".balign " #CONFIG_FUNCTION_ALIGNMENT
+
+#ifdef CONFIG_DYNAMIC_FTRACE_WITH_CALL_OPS
+
+#define PRE_FUNCTION_NOPS                                                   \
+	ALIGN;                                                              \
+	nops CONFIG_FUNCTION_ALIGNMENT / 4 - 2;                             \
+	.pushsection __patchable_function_entries, "awo", @progbits, .text; \
+	.p2align 3;                                                         \
+	.8byte 1f;                                                          \
+	.popsection;                                                        \
+	1 :;                                                                \
+	nops 2;
+
+#define PRE_PROLOGUE_NOPS nops 2;
+
+#elif defined(CONFIG_DYNAMIC_FTRACE_WITH_ARGS)
+
+#define PRE_FUNCTION_NOPS
+
+#define PRE_PROLOGUE_NOPS                                                   \
+	.pushsection __patchable_function_entries, "awo", @progbits, .text; \
+	.p2align 3;                                                         \
+	.8byte 1f;                                                          \
+	.popsection;                                                        \
+	1 :;                                                                \
+	nops 2;
+
+#else
+
+#define PRE_FUNCTION_NOPS
+#define PRE_PROLOGUE_NOPS
+
+#endif
+
+#ifdef CONFIG_ARM64_BTI_KERNEL
+#define BTI_C bti c;
+#else
+#define BTI_C
+#endif
 
 /*
  * When using in-kernel BTI we need to ensure that PCS-conformant
@@ -15,32 +54,38 @@
  * everything, the override is done unconditionally so we're more
  * likely to notice any drift from the overridden definitions.
  */
-#define SYM_FUNC_START(name)				\
-	SYM_START(name, SYM_L_GLOBAL, SYM_A_ALIGN)	\
-	bti c ;
+#define SYM_FUNC_START_TRACE(name)                 \
+	PRE_FUNCTION_NOPS                          \
+	SYM_START(name, SYM_L_GLOBAL, SYM_A_ALIGN) \
+	BTI_C                                      \
+	PRE_PROLOGUE_NOPS
+
+#define SYM_FUNC_START(name)                       \
+	SYM_START(name, SYM_L_GLOBAL, SYM_A_ALIGN) \
+	BTI_C
 
-#define SYM_FUNC_START_NOALIGN(name)			\
-	SYM_START(name, SYM_L_GLOBAL, SYM_A_NONE)	\
-	bti c ;
+#define SYM_FUNC_START_NOALIGN(name)              \
+	SYM_START(name, SYM_L_GLOBAL, SYM_A_NONE) \
+	BTI_C                                     \
 
-#define SYM_FUNC_START_LOCAL(name)			\
-	SYM_START(name, SYM_L_LOCAL, SYM_A_ALIGN)	\
-	bti c ;
+#define SYM_FUNC_START_LOCAL(name)                \
+	SYM_START(name, SYM_L_LOCAL, SYM_A_ALIGN) \
+	BTI_C                                     \
 
-#define SYM_FUNC_START_LOCAL_NOALIGN(name)		\
-	SYM_START(name, SYM_L_LOCAL, SYM_A_NONE)	\
-	bti c ;
+#define SYM_FUNC_START_LOCAL_NOALIGN(name)       \
+	SYM_START(name, SYM_L_LOCAL, SYM_A_NONE) \
+	BTI_C                                    \
 
-#define SYM_FUNC_START_WEAK(name)			\
-	SYM_START(name, SYM_L_WEAK, SYM_A_ALIGN)	\
-	bti c ;
+#define SYM_FUNC_START_WEAK(name)                \
+	SYM_START(name, SYM_L_WEAK, SYM_A_ALIGN) \
+	BTI_C                                    \
 
-#define SYM_FUNC_START_WEAK_NOALIGN(name)		\
-	SYM_START(name, SYM_L_WEAK, SYM_A_NONE)		\
-	bti c ;
+#define SYM_FUNC_START_WEAK_NOALIGN(name)       \
+	SYM_START(name, SYM_L_WEAK, SYM_A_NONE) \
+	BTI_C                                   \
 
-#define SYM_TYPED_FUNC_START(name)				\
-	SYM_TYPED_START(name, SYM_L_GLOBAL, SYM_A_ALIGN)	\
-	bti c ;
+#define SYM_TYPED_FUNC_START(name)                       \
+	SYM_TYPED_START(name, SYM_L_GLOBAL, SYM_A_ALIGN) \
+	BTI_C                                            \
 
 #endif
diff --git a/arch/arm64/lib/copy_from_user.S b/arch/arm64/lib/copy_from_user.S
index 400057d607ec..07b6d6c9573f 100644
--- a/arch/arm64/lib/copy_from_user.S
+++ b/arch/arm64/lib/copy_from_user.S
@@ -61,7 +61,7 @@
 
 end	.req	x5
 srcin	.req	x15
-SYM_FUNC_START(__arch_copy_from_user)
+SYM_FUNC_START_TRACE(__arch_copy_from_user)
 	add	end, x0, x2
 	mov	srcin, x1
 #include "copy_template.S"
diff --git a/arch/arm64/lib/copy_to_user.S b/arch/arm64/lib/copy_to_user.S
index 819f2e3fc7a9..e7e663aea8ca 100644
--- a/arch/arm64/lib/copy_to_user.S
+++ b/arch/arm64/lib/copy_to_user.S
@@ -60,7 +60,7 @@
 
 end	.req	x5
 srcin	.req	x15
-SYM_FUNC_START(__arch_copy_to_user)
+SYM_FUNC_START_TRACE(__arch_copy_to_user)
 	add	end, x0, x2
 	mov	srcin, x1
 #include "copy_template.S"
-- 
2.47.3



^ permalink raw reply related	[flat|nested] 3+ messages in thread

* Re: [PATCH v2] tracing: Enable kprobe for selected Arm64 assembly
  2025-11-03 18:52 [PATCH v2] tracing: Enable kprobe for selected Arm64 assembly Ben Niu
@ 2025-11-17 10:14 ` Mark Rutland
  2025-12-10  3:32   ` Withdraw " Ben Niu
  0 siblings, 1 reply; 3+ messages in thread
From: Mark Rutland @ 2025-11-17 10:14 UTC (permalink / raw)
  To: Ben Niu
  Cc: Catalin Marinas, Will Deacon, tytso, Jason, Ben Niu,
	linux-arm-kernel, linux-kernel

On Mon, Nov 03, 2025 at 10:52:35AM -0800, Ben Niu wrote:
> When ftrace is enabled, a function can only be kprobe'd if
>   (1) the function has proper nops inserted at the beginning
>   (2) the first inserted nop's address is added in section
>       __patchable_function_entries.
>
> See function within_notrace_func in kernel/trace/trace_kprobe.c
> for more details.

As mentioned last time, this isn't accurate, and this is at the wrong
level of abstraction. You're conflating kprobes with kprobes-based trace
evnts, and you're describing the implementation details of ftrace rather
than the logical situation that the function needs to be traceable via
ftrace

This would be better summarized as:

  While kprobes can be placed on most kernel functions, kprobes-based
  trace events can only be placed on functions which are traceable via
  ftrace (unless CONFIG_KPROBE_EVENTS_ON_NOTRACE=y).

IIUC from last time you only want this so that you can introspect
__arch_copy_to_user() and __arch_copy_from_user(), so why can't you
select CONFIG_KPROBE_EVENTS_ON_NOTRACE in your test kernel? That would
require zero kernel changes AFAICT.

I'm not keen on doing this unless absolutely necessary, and as above it
looks like we already have suitable options to make this possible for
your use-case.

> This patch adds a new asm function macro SYM_FUNC_START_TRACE
> that makes an asm funtion satisfy the above two conditions so that
> it can be kprobe'd. In addition, the macro is applied to
> __arch_copy_to_user and __arch_copy_from_user, which were found
> to be hot in certain workloads.
> 
> Note: although this patch unblocks kprobe tracing, fentry is still
> broken because no BTF info gets generated from assembly files. A
> separate patch is needed to fix that.

As above, I'm not keen on doing this, and if it's largely incomplete, I
think that's another nail in the coffin.

[...]

> +#ifdef CONFIG_ARM64_BTI_KERNEL
> +#define BTI_C bti c;
> +#else
> +#define BTI_C
> +#endif

Please note that we deliberately chose to always output BTI for asm
functions to avoid a performance vaiability depending on whether BTI was
enabled, so if we're going to change that, we must do that as a
preparatory step with a clear rationale.

Mark.


^ permalink raw reply	[flat|nested] 3+ messages in thread

* Withdraw [PATCH v2] tracing: Enable kprobe for selected Arm64 assembly
  2025-11-17 10:14 ` Mark Rutland
@ 2025-12-10  3:32   ` Ben Niu
  0 siblings, 0 replies; 3+ messages in thread
From: Ben Niu @ 2025-12-10  3:32 UTC (permalink / raw)
  To: Mark Rutland
  Cc: Ben Niu, Ben Niu, Catalin Marinas, Will Deacon, tytso, Jason,
	linux-arm-kernel, linux-kernel

On Mon, Nov 17, 2025 at 10:14:25AM +0000, Mark Rutland wrote:
> On Mon, Nov 03, 2025 at 10:52:35AM -0800, Ben Niu wrote:
> > When ftrace is enabled, a function can only be kprobe'd if
> >   (1) the function has proper nops inserted at the beginning
> >   (2) the first inserted nop's address is added in section
> >       __patchable_function_entries.
> >
> > See function within_notrace_func in kernel/trace/trace_kprobe.c
> > for more details.
> 
> As mentioned last time, this isn't accurate, and this is at the wrong
> level of abstraction. You're conflating kprobes with kprobes-based trace
> evnts, and you're describing the implementation details of ftrace rather
> than the logical situation that the function needs to be traceable via
> ftrace
> 
> This would be better summarized as:
> 
>   While kprobes can be placed on most kernel functions, kprobes-based
>   trace events can only be placed on functions which are traceable via
>   ftrace (unless CONFIG_KPROBE_EVENTS_ON_NOTRACE=y).

Thanks. I'm withdrawing this patch because my colleague suggested
a workaround that could trace __arch_copy_to_user/__arch_copy_from_user
without any changes for the kernel:

bpftrace -q -e '
BEGIN {
    printf("Attaching to __arch_copy_to_user...\n");
}

watchpoint:'"0x$(grep ' __arch_copy_to_user$' /proc/kallsyms | awk '{print $1}')"':4:x
{
    $n = reg("r2");
    @__arch_copy_to_user_sizes = hist($n);
}'

> IIUC from last time you only want this so that you can introspect
> __arch_copy_to_user() and __arch_copy_from_user(), so why can't you
> select CONFIG_KPROBE_EVENTS_ON_NOTRACE in your test kernel? That would
> require zero kernel changes AFAICT.

CONFIG_KPROBE_EVENTS_ON_NOTRACE has this help message, see
https://github.com/torvalds/linux/blob/c9b47175e9131118e6f221cc8fb81397d62e7c91/kernel/trace/Kconfig#L798,
that discourages production enablement, so we didn't turn it on.
 
> I'm not keen on doing this unless absolutely necessary, and as above it
> looks like we already have suitable options to make this possible for
> your use-case.

This patch is no longer needed.
 
> > This patch adds a new asm function macro SYM_FUNC_START_TRACE
> > that makes an asm funtion satisfy the above two conditions so that
> > it can be kprobe'd. In addition, the macro is applied to
> > __arch_copy_to_user and __arch_copy_from_user, which were found
> > to be hot in certain workloads.
> > 
> > Note: although this patch unblocks kprobe tracing, fentry is still
> > broken because no BTF info gets generated from assembly files. A
> > separate patch is needed to fix that.
> 
> As above, I'm not keen on doing this, and if it's largely incomplete, I
> think that's another nail in the coffin.
> 
> [...]
> 
> > +#ifdef CONFIG_ARM64_BTI_KERNEL
> > +#define BTI_C bti c;
> > +#else
> > +#define BTI_C
> > +#endif
> 
> Please note that we deliberately chose to always output BTI for asm
> functions to avoid a performance vaiability depending on whether BTI was
> enabled, so if we're going to change that, we must do that as a
> preparatory step with a clear rationale.

Ok, thanks for clarifying that.

> Mark.


^ permalink raw reply	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2025-12-10  3:32 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2025-11-03 18:52 [PATCH v2] tracing: Enable kprobe for selected Arm64 assembly Ben Niu
2025-11-17 10:14 ` Mark Rutland
2025-12-10  3:32   ` Withdraw " Ben Niu

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).