BPF List
 help / color / mirror / Atom feed
* [PATCH bpf-next 1/2] bpf: add bpf_get_hw_counter kfunc
@ 2024-10-23 21:04 Vadim Fedorenko
  2024-10-23 21:04 ` [PATCH bpf-next 2/2] selftests/bpf: add selftest to check rdtsc jit Vadim Fedorenko
                   ` (4 more replies)
  0 siblings, 5 replies; 10+ messages in thread
From: Vadim Fedorenko @ 2024-10-23 21:04 UTC (permalink / raw)
  To: Alexei Starovoitov, Daniel Borkmann, Andrii Nakryiko,
	Eduard Zingerman, Thomas Gleixner, Vadim Fedorenko
  Cc: x86, bpf, Vadim Fedorenko

New kfunc to return ARCH-specific timecounter. For x86 BPF JIT converts
it into rdtsc ordered call. Other architectures will get JIT
implementation too if supported. The fallback is to
__arch_get_hw_counter().

Signed-off-by: Vadim Fedorenko <vadfed@meta.com>
---
 arch/x86/net/bpf_jit_comp.c   | 23 +++++++++++++++++++++++
 arch/x86/net/bpf_jit_comp32.c | 11 +++++++++++
 kernel/bpf/helpers.c          |  7 +++++++
 kernel/bpf/verifier.c         | 11 +++++++++++
 4 files changed, 52 insertions(+)

diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c
index 06b080b61aa5..55595a0fa55b 100644
--- a/arch/x86/net/bpf_jit_comp.c
+++ b/arch/x86/net/bpf_jit_comp.c
@@ -2126,6 +2126,29 @@ st:			if (is_imm8(insn->off))
 		case BPF_JMP | BPF_CALL: {
 			u8 *ip = image + addrs[i - 1];
 
+			if (insn->src_reg == BPF_PSEUDO_KFUNC_CALL && !imm32) {
+				if (insn->dst_reg == 1) {
+					struct cpuinfo_x86 *c = &cpu_data(get_boot_cpu_id());
+
+					/* Save RDX because RDTSC will use EDX:EAX to return u64 */
+					emit_mov_reg(&prog, true, AUX_REG, BPF_REG_3);
+					if (cpu_has(c, X86_FEATURE_LFENCE_RDTSC))
+						EMIT_LFENCE();
+					EMIT2(0x0F, 0x31);
+
+					/* shl RDX, 32 */
+					maybe_emit_1mod(&prog, BPF_REG_3, true);
+					EMIT3(0xC1, add_1reg(0xE0, BPF_REG_3), 32);
+					/* or RAX, RDX */
+					maybe_emit_mod(&prog, BPF_REG_0, BPF_REG_3, true);
+					EMIT2(0x09, add_2reg(0xC0, BPF_REG_0, BPF_REG_3));
+					/* restore RDX from R11 */
+					emit_mov_reg(&prog, true, BPF_REG_3, AUX_REG);
+
+					break;
+				}
+			}
+
 			func = (u8 *) __bpf_call_base + imm32;
 			if (tail_call_reachable) {
 				LOAD_TAIL_CALL_CNT_PTR(bpf_prog->aux->stack_depth);
diff --git a/arch/x86/net/bpf_jit_comp32.c b/arch/x86/net/bpf_jit_comp32.c
index de0f9e5f9f73..c36ff18a044b 100644
--- a/arch/x86/net/bpf_jit_comp32.c
+++ b/arch/x86/net/bpf_jit_comp32.c
@@ -2091,6 +2091,17 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image,
 			if (insn->src_reg == BPF_PSEUDO_CALL)
 				goto notyet;
 
+			if (insn->src_reg == BPF_PSEUDO_KFUNC_CALL && !imm32) {
+				if (insn->dst_reg == 1) {
+					struct cpuinfo_x86 *c = &cpu_data(get_boot_cpu_id());
+
+					if (cpu_has(c, X86_FEATURE_LFENCE_RDTSC))
+						EMIT3(0x0F, 0xAE, 0xE8);
+					EMIT2(0x0F, 0x31);
+					break;
+				}
+			}
+
 			if (insn->src_reg == BPF_PSEUDO_KFUNC_CALL) {
 				int err;
 
diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c
index 5c3fdb29c1b1..6624b2465484 100644
--- a/kernel/bpf/helpers.c
+++ b/kernel/bpf/helpers.c
@@ -23,6 +23,7 @@
 #include <linux/btf_ids.h>
 #include <linux/bpf_mem_alloc.h>
 #include <linux/kasan.h>
+#include <asm/vdso/gettimeofday.h>
 
 #include "../../lib/kstrtox.h"
 
@@ -3023,6 +3024,11 @@ __bpf_kfunc int bpf_copy_from_user_str(void *dst, u32 dst__sz, const void __user
 	return ret + 1;
 }
 
+__bpf_kfunc int bpf_get_hw_counter(void)
+{
+	return __arch_get_hw_counter(1, NULL);
+}
+
 __bpf_kfunc_end_defs();
 
 BTF_KFUNCS_START(generic_btf_ids)
@@ -3112,6 +3118,7 @@ BTF_ID_FLAGS(func, bpf_iter_bits_next, KF_ITER_NEXT | KF_RET_NULL)
 BTF_ID_FLAGS(func, bpf_iter_bits_destroy, KF_ITER_DESTROY)
 BTF_ID_FLAGS(func, bpf_copy_from_user_str, KF_SLEEPABLE)
 BTF_ID_FLAGS(func, bpf_get_kmem_cache)
+BTF_ID_FLAGS(func, bpf_get_hw_counter, KF_FASTCALL)
 BTF_KFUNCS_END(common_btf_ids)
 
 static const struct btf_kfunc_id_set common_kfunc_set = {
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index f514247ba8ba..5f0e4f91ce48 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -11260,6 +11260,7 @@ enum special_kfunc_type {
 	KF_bpf_iter_css_task_new,
 	KF_bpf_session_cookie,
 	KF_bpf_get_kmem_cache,
+	KF_bpf_get_hw_counter,
 };
 
 BTF_SET_START(special_kfunc_set)
@@ -11326,6 +11327,7 @@ BTF_ID(func, bpf_session_cookie)
 BTF_ID_UNUSED
 #endif
 BTF_ID(func, bpf_get_kmem_cache)
+BTF_ID(func, bpf_get_hw_counter)
 
 static bool is_kfunc_ret_null(struct bpf_kfunc_call_arg_meta *meta)
 {
@@ -20396,6 +20398,15 @@ static int fixup_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
 		   desc->func_id == special_kfunc_list[KF_bpf_rdonly_cast]) {
 		insn_buf[0] = BPF_MOV64_REG(BPF_REG_0, BPF_REG_1);
 		*cnt = 1;
+	} else if (IS_ENABLED(CONFIG_X86) &&
+		   desc->func_id == special_kfunc_list[KF_bpf_get_hw_counter]) {
+		insn->imm = 0;
+		insn->code = BPF_JMP | BPF_CALL;
+		insn->src_reg = BPF_PSEUDO_KFUNC_CALL;
+		insn->dst_reg = 1; /* Implement enum for inlined fast calls */
+
+		insn_buf[0] = *insn;
+		*cnt = 1;
 	} else if (is_bpf_wq_set_callback_impl_kfunc(desc->func_id)) {
 		struct bpf_insn ld_addrs[2] = { BPF_LD_IMM64(BPF_REG_4, (long)env->prog->aux) };
 
-- 
2.43.5


^ permalink raw reply related	[flat|nested] 10+ messages in thread

* [PATCH bpf-next 2/2] selftests/bpf: add selftest to check rdtsc jit
  2024-10-23 21:04 [PATCH bpf-next 1/2] bpf: add bpf_get_hw_counter kfunc Vadim Fedorenko
@ 2024-10-23 21:04 ` Vadim Fedorenko
  2024-10-24  1:39 ` [PATCH bpf-next 1/2] bpf: add bpf_get_hw_counter kfunc Alexei Starovoitov
                   ` (3 subsequent siblings)
  4 siblings, 0 replies; 10+ messages in thread
From: Vadim Fedorenko @ 2024-10-23 21:04 UTC (permalink / raw)
  To: Alexei Starovoitov, Daniel Borkmann, Andrii Nakryiko,
	Eduard Zingerman, Thomas Gleixner, Vadim Fedorenko
  Cc: x86, bpf, Vadim Fedorenko

get_hw_counter() is replaced with rdtsc instruction on x86_64. Add
tests to check it.

Signed-off-by: Vadim Fedorenko <vadfed@meta.com>
---
 .../selftests/bpf/prog_tests/verifier.c       |  2 +
 .../selftests/bpf/progs/verifier_rdtsc.c      | 63 +++++++++++++++++++
 2 files changed, 65 insertions(+)
 create mode 100644 tools/testing/selftests/bpf/progs/verifier_rdtsc.c

diff --git a/tools/testing/selftests/bpf/prog_tests/verifier.c b/tools/testing/selftests/bpf/prog_tests/verifier.c
index e26b5150fc43..cf7417465010 100644
--- a/tools/testing/selftests/bpf/prog_tests/verifier.c
+++ b/tools/testing/selftests/bpf/prog_tests/verifier.c
@@ -95,6 +95,7 @@
 #include "verifier_xdp_direct_packet_access.skel.h"
 #include "verifier_bits_iter.skel.h"
 #include "verifier_lsm.skel.h"
+#include "verifier_rdtsc.skel.h"
 
 #define MAX_ENTRIES 11
 
@@ -220,6 +221,7 @@ void test_verifier_xdp(void)                  { RUN(verifier_xdp); }
 void test_verifier_xdp_direct_packet_access(void) { RUN(verifier_xdp_direct_packet_access); }
 void test_verifier_bits_iter(void) { RUN(verifier_bits_iter); }
 void test_verifier_lsm(void)                  { RUN(verifier_lsm); }
+void test_verifier_rdtsc(void)                { RUN(verifier_rdtsc); }
 
 static int init_test_val_map(struct bpf_object *obj, char *map_name)
 {
diff --git a/tools/testing/selftests/bpf/progs/verifier_rdtsc.c b/tools/testing/selftests/bpf/progs/verifier_rdtsc.c
new file mode 100644
index 000000000000..2cc52a1c4ca0
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/verifier_rdtsc.c
@@ -0,0 +1,63 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2022 Meta Inc. */
+#include "vmlinux.h"
+// #include <time.h>
+// #include <errno.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+#include "bpf_misc.h"
+
+//u64 g1 =0, g2 = 0, g3 = 0;
+
+SEC("syscall")
+__arch_x86_64
+__xlated("0: call kernel-function")
+__naked int bpf_rdtsc(void)
+{
+	asm volatile(
+	"call %[bpf_get_hw_counter];"
+	"exit"
+	:
+	: __imm(bpf_get_hw_counter)
+	: __clobber_all
+	);
+}
+
+SEC("syscall")
+__arch_x86_64
+/* program entry for bpf_rdtsc_jit_x86_64(), regular function prologue */
+__jited("	endbr64")
+__jited("	nopl	(%rax,%rax)")
+__jited("	nopl	(%rax)")
+__jited("	pushq	%rbp")
+__jited("	movq	%rsp, %rbp")
+__jited("	endbr64")
+/* save RDX in R11 as it will be overwritten */
+__jited("	movq	%rdx, %r11")
+/* lfence may not be executed depending on cpu features */
+__jited("	{{(lfence|)}}")
+__jited("	rdtsc")
+/* combine EDX:EAX into RAX */
+__jited("	shlq	${{(32|0x20)}}, %rdx")
+__jited("	orq	%rdx, %rax")
+/* restore RDX from R11 */
+__jited("	movq	%r11, %rdx")
+__jited("	leave")
+__naked int bpf_rdtsc_jit_x86_64(void)
+{
+	asm volatile(
+	"call %[bpf_get_hw_counter];"
+	"exit"
+	:
+	: __imm(bpf_get_hw_counter)
+	: __clobber_all
+	);
+}
+
+
+void rdtsc(void)
+{
+	bpf_get_hw_counter();
+}
+
+char _license[] SEC("license") = "GPL";
-- 
2.43.5


^ permalink raw reply related	[flat|nested] 10+ messages in thread

* Re: [PATCH bpf-next 1/2] bpf: add bpf_get_hw_counter kfunc
  2024-10-23 21:04 [PATCH bpf-next 1/2] bpf: add bpf_get_hw_counter kfunc Vadim Fedorenko
  2024-10-23 21:04 ` [PATCH bpf-next 2/2] selftests/bpf: add selftest to check rdtsc jit Vadim Fedorenko
@ 2024-10-24  1:39 ` Alexei Starovoitov
  2024-10-24 13:11   ` Vadim Fedorenko
  2024-10-24 16:03 ` kernel test robot
                   ` (2 subsequent siblings)
  4 siblings, 1 reply; 10+ messages in thread
From: Alexei Starovoitov @ 2024-10-24  1:39 UTC (permalink / raw)
  To: Vadim Fedorenko
  Cc: Alexei Starovoitov, Daniel Borkmann, Andrii Nakryiko,
	Eduard Zingerman, Thomas Gleixner, Vadim Fedorenko, X86 ML, bpf

On Wed, Oct 23, 2024 at 2:05 PM Vadim Fedorenko <vadfed@meta.com> wrote:
>
> New kfunc to return ARCH-specific timecounter. For x86 BPF JIT converts
> it into rdtsc ordered call. Other architectures will get JIT
> implementation too if supported. The fallback is to
> __arch_get_hw_counter().

arch_get_hw_counter is a great idea.

> Signed-off-by: Vadim Fedorenko <vadfed@meta.com>
> ---
>  arch/x86/net/bpf_jit_comp.c   | 23 +++++++++++++++++++++++
>  arch/x86/net/bpf_jit_comp32.c | 11 +++++++++++
>  kernel/bpf/helpers.c          |  7 +++++++
>  kernel/bpf/verifier.c         | 11 +++++++++++
>  4 files changed, 52 insertions(+)
>
> diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c
> index 06b080b61aa5..55595a0fa55b 100644
> --- a/arch/x86/net/bpf_jit_comp.c
> +++ b/arch/x86/net/bpf_jit_comp.c
> @@ -2126,6 +2126,29 @@ st:                      if (is_imm8(insn->off))
>                 case BPF_JMP | BPF_CALL: {
>                         u8 *ip = image + addrs[i - 1];
>
> +                       if (insn->src_reg == BPF_PSEUDO_KFUNC_CALL && !imm32) {
> +                               if (insn->dst_reg == 1) {
> +                                       struct cpuinfo_x86 *c = &cpu_data(get_boot_cpu_id());
> +
> +                                       /* Save RDX because RDTSC will use EDX:EAX to return u64 */
> +                                       emit_mov_reg(&prog, true, AUX_REG, BPF_REG_3);
> +                                       if (cpu_has(c, X86_FEATURE_LFENCE_RDTSC))
> +                                               EMIT_LFENCE();
> +                                       EMIT2(0x0F, 0x31);
> +
> +                                       /* shl RDX, 32 */
> +                                       maybe_emit_1mod(&prog, BPF_REG_3, true);
> +                                       EMIT3(0xC1, add_1reg(0xE0, BPF_REG_3), 32);
> +                                       /* or RAX, RDX */
> +                                       maybe_emit_mod(&prog, BPF_REG_0, BPF_REG_3, true);
> +                                       EMIT2(0x09, add_2reg(0xC0, BPF_REG_0, BPF_REG_3));
> +                                       /* restore RDX from R11 */
> +                                       emit_mov_reg(&prog, true, BPF_REG_3, AUX_REG);

This doesn't match
static inline u64 __arch_get_hw_counter(s32 clock_mode,
                                        const struct vdso_data *vd)
{
        if (likely(clock_mode == VDSO_CLOCKMODE_TSC))
                return (u64)rdtsc_ordered() & S64_MAX;

- & is missing
- rdtsc vs rdtscp

but the later one is much slower (I was told).

So maybe instead of arch_get_hw_counter() it should be modelled
as JIT of sched_clock() ?

> +
> +                                       break;
> +                               }
> +                       }
> +
>                         func = (u8 *) __bpf_call_base + imm32;
>                         if (tail_call_reachable) {
>                                 LOAD_TAIL_CALL_CNT_PTR(bpf_prog->aux->stack_depth);
> diff --git a/arch/x86/net/bpf_jit_comp32.c b/arch/x86/net/bpf_jit_comp32.c
> index de0f9e5f9f73..c36ff18a044b 100644
> --- a/arch/x86/net/bpf_jit_comp32.c
> +++ b/arch/x86/net/bpf_jit_comp32.c
> @@ -2091,6 +2091,17 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image,
>                         if (insn->src_reg == BPF_PSEUDO_CALL)
>                                 goto notyet;
>
> +                       if (insn->src_reg == BPF_PSEUDO_KFUNC_CALL && !imm32) {
> +                               if (insn->dst_reg == 1) {
> +                                       struct cpuinfo_x86 *c = &cpu_data(get_boot_cpu_id());
> +
> +                                       if (cpu_has(c, X86_FEATURE_LFENCE_RDTSC))
> +                                               EMIT3(0x0F, 0xAE, 0xE8);
> +                                       EMIT2(0x0F, 0x31);
> +                                       break;
> +                               }
> +                       }
> +
>                         if (insn->src_reg == BPF_PSEUDO_KFUNC_CALL) {
>                                 int err;
>
> diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c
> index 5c3fdb29c1b1..6624b2465484 100644
> --- a/kernel/bpf/helpers.c
> +++ b/kernel/bpf/helpers.c
> @@ -23,6 +23,7 @@
>  #include <linux/btf_ids.h>
>  #include <linux/bpf_mem_alloc.h>
>  #include <linux/kasan.h>
> +#include <asm/vdso/gettimeofday.h>
>
>  #include "../../lib/kstrtox.h"
>
> @@ -3023,6 +3024,11 @@ __bpf_kfunc int bpf_copy_from_user_str(void *dst, u32 dst__sz, const void __user
>         return ret + 1;
>  }
>
> +__bpf_kfunc int bpf_get_hw_counter(void)
> +{
> +       return __arch_get_hw_counter(1, NULL);
> +}
> +
>  __bpf_kfunc_end_defs();
>
>  BTF_KFUNCS_START(generic_btf_ids)
> @@ -3112,6 +3118,7 @@ BTF_ID_FLAGS(func, bpf_iter_bits_next, KF_ITER_NEXT | KF_RET_NULL)
>  BTF_ID_FLAGS(func, bpf_iter_bits_destroy, KF_ITER_DESTROY)
>  BTF_ID_FLAGS(func, bpf_copy_from_user_str, KF_SLEEPABLE)
>  BTF_ID_FLAGS(func, bpf_get_kmem_cache)
> +BTF_ID_FLAGS(func, bpf_get_hw_counter, KF_FASTCALL)
>  BTF_KFUNCS_END(common_btf_ids)
>
>  static const struct btf_kfunc_id_set common_kfunc_set = {
> diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
> index f514247ba8ba..5f0e4f91ce48 100644
> --- a/kernel/bpf/verifier.c
> +++ b/kernel/bpf/verifier.c
> @@ -11260,6 +11260,7 @@ enum special_kfunc_type {
>         KF_bpf_iter_css_task_new,
>         KF_bpf_session_cookie,
>         KF_bpf_get_kmem_cache,
> +       KF_bpf_get_hw_counter,
>  };
>
>  BTF_SET_START(special_kfunc_set)
> @@ -11326,6 +11327,7 @@ BTF_ID(func, bpf_session_cookie)
>  BTF_ID_UNUSED
>  #endif
>  BTF_ID(func, bpf_get_kmem_cache)
> +BTF_ID(func, bpf_get_hw_counter)
>
>  static bool is_kfunc_ret_null(struct bpf_kfunc_call_arg_meta *meta)
>  {
> @@ -20396,6 +20398,15 @@ static int fixup_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
>                    desc->func_id == special_kfunc_list[KF_bpf_rdonly_cast]) {
>                 insn_buf[0] = BPF_MOV64_REG(BPF_REG_0, BPF_REG_1);
>                 *cnt = 1;
> +       } else if (IS_ENABLED(CONFIG_X86) &&

It's better to introduce bpf_jit_inlines_kfunc_call()
similar to bpf_jit_inlines_helper_call().

> +                  desc->func_id == special_kfunc_list[KF_bpf_get_hw_counter]) {
> +               insn->imm = 0;
> +               insn->code = BPF_JMP | BPF_CALL;
> +               insn->src_reg = BPF_PSEUDO_KFUNC_CALL;
> +               insn->dst_reg = 1; /* Implement enum for inlined fast calls */

Yes. Pls do it cleanly from the start.

Why rewrite though?
Can JIT match the addr of bpf_get_hw_counter ?
And no need to rewrite call insn ?

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH bpf-next 1/2] bpf: add bpf_get_hw_counter kfunc
  2024-10-24  1:39 ` [PATCH bpf-next 1/2] bpf: add bpf_get_hw_counter kfunc Alexei Starovoitov
@ 2024-10-24 13:11   ` Vadim Fedorenko
  2024-10-24 17:03     ` Alexei Starovoitov
  2024-10-24 17:31     ` Eduard Zingerman
  0 siblings, 2 replies; 10+ messages in thread
From: Vadim Fedorenko @ 2024-10-24 13:11 UTC (permalink / raw)
  To: Alexei Starovoitov
  Cc: Alexei Starovoitov, Daniel Borkmann, Andrii Nakryiko,
	Eduard Zingerman, Thomas Gleixner, X86 ML, bpf

On 24/10/2024 02:39, Alexei Starovoitov wrote:
> On Wed, Oct 23, 2024 at 2:05 PM Vadim Fedorenko <vadfed@meta.com> wrote:
>>
>> New kfunc to return ARCH-specific timecounter. For x86 BPF JIT converts
>> it into rdtsc ordered call. Other architectures will get JIT
>> implementation too if supported. The fallback is to
>> __arch_get_hw_counter().
> 
> arch_get_hw_counter is a great idea.
> 
>> Signed-off-by: Vadim Fedorenko <vadfed@meta.com>
>> ---
>>   arch/x86/net/bpf_jit_comp.c   | 23 +++++++++++++++++++++++
>>   arch/x86/net/bpf_jit_comp32.c | 11 +++++++++++
>>   kernel/bpf/helpers.c          |  7 +++++++
>>   kernel/bpf/verifier.c         | 11 +++++++++++
>>   4 files changed, 52 insertions(+)
>>
>> diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c
>> index 06b080b61aa5..55595a0fa55b 100644
>> --- a/arch/x86/net/bpf_jit_comp.c
>> +++ b/arch/x86/net/bpf_jit_comp.c
>> @@ -2126,6 +2126,29 @@ st:                      if (is_imm8(insn->off))
>>                  case BPF_JMP | BPF_CALL: {
>>                          u8 *ip = image + addrs[i - 1];
>>
>> +                       if (insn->src_reg == BPF_PSEUDO_KFUNC_CALL && !imm32) {
>> +                               if (insn->dst_reg == 1) {
>> +                                       struct cpuinfo_x86 *c = &cpu_data(get_boot_cpu_id());
>> +
>> +                                       /* Save RDX because RDTSC will use EDX:EAX to return u64 */
>> +                                       emit_mov_reg(&prog, true, AUX_REG, BPF_REG_3);
>> +                                       if (cpu_has(c, X86_FEATURE_LFENCE_RDTSC))
>> +                                               EMIT_LFENCE();
>> +                                       EMIT2(0x0F, 0x31);
>> +
>> +                                       /* shl RDX, 32 */
>> +                                       maybe_emit_1mod(&prog, BPF_REG_3, true);
>> +                                       EMIT3(0xC1, add_1reg(0xE0, BPF_REG_3), 32);
>> +                                       /* or RAX, RDX */
>> +                                       maybe_emit_mod(&prog, BPF_REG_0, BPF_REG_3, true);
>> +                                       EMIT2(0x09, add_2reg(0xC0, BPF_REG_0, BPF_REG_3));
>> +                                       /* restore RDX from R11 */
>> +                                       emit_mov_reg(&prog, true, BPF_REG_3, AUX_REG);
> 
> This doesn't match
> static inline u64 __arch_get_hw_counter(s32 clock_mode,
>                                          const struct vdso_data *vd)
> {
>          if (likely(clock_mode == VDSO_CLOCKMODE_TSC))
>                  return (u64)rdtsc_ordered() & S64_MAX;
> 
> - & is missing

& S64_MAX is only needed to early detect possible wrap-around of
timecounter in case of vDSO call for CLOCK_MONOTONIC_RAW/CLOCK_COARSE
which adds namespace time offset. TSC is reset during CPU reset and will
not overflow within 10 years according to "Intel 64 and IA-32
Architecture Software Developer's Manual,Vol 3B", so it doesn't really
matter if we mask the highest bit or not while accessing raw cycles
counter.

> - rdtsc vs rdtscp

rdtscp provides additional 32 bit of "signature value" atomically with
TSC value in ECX. This value is not really usable outside of domain
which set it previously while initialization. The kernel stores encoded
cpuid into IA32_TSC_AUX to provide it back to user-space application,
but at the same time ignores its value during read. The combination of
lfence and rdtsc will give us the same result (ordered read of TSC value
independent on the core) without trashing ECX value.

> but the later one is much slower (I was told).

It is slower on AMD CPUs for now, easily visible in perf traces under load.

> So maybe instead of arch_get_hw_counter() it should be modelled
> as JIT of sched_clock() ?

sched_clock() is much more complicated because it converts cycles
counter into ns, we don't actually need this conversion, let's stick
to arch_get_hw_counter().

> 
>> +
>> +                                       break;
>> +                               }
>> +                       }
>> +
>>                          func = (u8 *) __bpf_call_base + imm32;
>>                          if (tail_call_reachable) {
>>                                  LOAD_TAIL_CALL_CNT_PTR(bpf_prog->aux->stack_depth);
>> diff --git a/arch/x86/net/bpf_jit_comp32.c b/arch/x86/net/bpf_jit_comp32.c
>> index de0f9e5f9f73..c36ff18a044b 100644
>> --- a/arch/x86/net/bpf_jit_comp32.c
>> +++ b/arch/x86/net/bpf_jit_comp32.c
>> @@ -2091,6 +2091,17 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image,
>>                          if (insn->src_reg == BPF_PSEUDO_CALL)
>>                                  goto notyet;
>>
>> +                       if (insn->src_reg == BPF_PSEUDO_KFUNC_CALL && !imm32) {
>> +                               if (insn->dst_reg == 1) {
>> +                                       struct cpuinfo_x86 *c = &cpu_data(get_boot_cpu_id());
>> +
>> +                                       if (cpu_has(c, X86_FEATURE_LFENCE_RDTSC))
>> +                                               EMIT3(0x0F, 0xAE, 0xE8);
>> +                                       EMIT2(0x0F, 0x31);
>> +                                       break;
>> +                               }
>> +                       }
>> +
>>                          if (insn->src_reg == BPF_PSEUDO_KFUNC_CALL) {
>>                                  int err;
>>
>> diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c
>> index 5c3fdb29c1b1..6624b2465484 100644
>> --- a/kernel/bpf/helpers.c
>> +++ b/kernel/bpf/helpers.c
>> @@ -23,6 +23,7 @@
>>   #include <linux/btf_ids.h>
>>   #include <linux/bpf_mem_alloc.h>
>>   #include <linux/kasan.h>
>> +#include <asm/vdso/gettimeofday.h>
>>
>>   #include "../../lib/kstrtox.h"
>>
>> @@ -3023,6 +3024,11 @@ __bpf_kfunc int bpf_copy_from_user_str(void *dst, u32 dst__sz, const void __user
>>          return ret + 1;
>>   }
>>
>> +__bpf_kfunc int bpf_get_hw_counter(void)
>> +{
>> +       return __arch_get_hw_counter(1, NULL);
>> +}
>> +
>>   __bpf_kfunc_end_defs();
>>
>>   BTF_KFUNCS_START(generic_btf_ids)
>> @@ -3112,6 +3118,7 @@ BTF_ID_FLAGS(func, bpf_iter_bits_next, KF_ITER_NEXT | KF_RET_NULL)
>>   BTF_ID_FLAGS(func, bpf_iter_bits_destroy, KF_ITER_DESTROY)
>>   BTF_ID_FLAGS(func, bpf_copy_from_user_str, KF_SLEEPABLE)
>>   BTF_ID_FLAGS(func, bpf_get_kmem_cache)
>> +BTF_ID_FLAGS(func, bpf_get_hw_counter, KF_FASTCALL)
>>   BTF_KFUNCS_END(common_btf_ids)
>>
>>   static const struct btf_kfunc_id_set common_kfunc_set = {
>> diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
>> index f514247ba8ba..5f0e4f91ce48 100644
>> --- a/kernel/bpf/verifier.c
>> +++ b/kernel/bpf/verifier.c
>> @@ -11260,6 +11260,7 @@ enum special_kfunc_type {
>>          KF_bpf_iter_css_task_new,
>>          KF_bpf_session_cookie,
>>          KF_bpf_get_kmem_cache,
>> +       KF_bpf_get_hw_counter,
>>   };
>>
>>   BTF_SET_START(special_kfunc_set)
>> @@ -11326,6 +11327,7 @@ BTF_ID(func, bpf_session_cookie)
>>   BTF_ID_UNUSED
>>   #endif
>>   BTF_ID(func, bpf_get_kmem_cache)
>> +BTF_ID(func, bpf_get_hw_counter)
>>
>>   static bool is_kfunc_ret_null(struct bpf_kfunc_call_arg_meta *meta)
>>   {
>> @@ -20396,6 +20398,15 @@ static int fixup_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
>>                     desc->func_id == special_kfunc_list[KF_bpf_rdonly_cast]) {
>>                  insn_buf[0] = BPF_MOV64_REG(BPF_REG_0, BPF_REG_1);
>>                  *cnt = 1;
>> +       } else if (IS_ENABLED(CONFIG_X86) &&
> 
> It's better to introduce bpf_jit_inlines_kfunc_call()
> similar to bpf_jit_inlines_helper_call().

Yep, I thought about introducing it while adding more architectures, but
can do it from the beginning.

> 
>> +                  desc->func_id == special_kfunc_list[KF_bpf_get_hw_counter]) {
>> +               insn->imm = 0;
>> +               insn->code = BPF_JMP | BPF_CALL;
>> +               insn->src_reg = BPF_PSEUDO_KFUNC_CALL;
>> +               insn->dst_reg = 1; /* Implement enum for inlined fast calls */
> 
> Yes. Pls do it cleanly from the start.
> 
> Why rewrite though?
> Can JIT match the addr of bpf_get_hw_counter ?
> And no need to rewrite call insn ?

I was thinking about this way, just wasn't able to find easy examples of
matching function addresses in jit. I'll try to make it but it may add
some extra functions to the jit.


^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH bpf-next 1/2] bpf: add bpf_get_hw_counter kfunc
  2024-10-23 21:04 [PATCH bpf-next 1/2] bpf: add bpf_get_hw_counter kfunc Vadim Fedorenko
  2024-10-23 21:04 ` [PATCH bpf-next 2/2] selftests/bpf: add selftest to check rdtsc jit Vadim Fedorenko
  2024-10-24  1:39 ` [PATCH bpf-next 1/2] bpf: add bpf_get_hw_counter kfunc Alexei Starovoitov
@ 2024-10-24 16:03 ` kernel test robot
  2024-10-24 16:13 ` kernel test robot
  2024-10-24 16:13 ` kernel test robot
  4 siblings, 0 replies; 10+ messages in thread
From: kernel test robot @ 2024-10-24 16:03 UTC (permalink / raw)
  To: Vadim Fedorenko, Alexei Starovoitov, Daniel Borkmann,
	Andrii Nakryiko, Eduard Zingerman, Thomas Gleixner,
	Vadim Fedorenko
  Cc: oe-kbuild-all, x86, bpf

Hi Vadim,

kernel test robot noticed the following build warnings:

[auto build test WARNING on bpf-next/master]

url:    https://github.com/intel-lab-lkp/linux/commits/Vadim-Fedorenko/selftests-bpf-add-selftest-to-check-rdtsc-jit/20241024-050747
base:   https://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next.git master
patch link:    https://lore.kernel.org/r/20241023210437.2266063-1-vadfed%40meta.com
patch subject: [PATCH bpf-next 1/2] bpf: add bpf_get_hw_counter kfunc
config: powerpc-allmodconfig (https://download.01.org/0day-ci/archive/20241024/202410242353.krjd8d6t-lkp@intel.com/config)
compiler: powerpc64-linux-gcc (GCC) 14.1.0
reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20241024/202410242353.krjd8d6t-lkp@intel.com/reproduce)

If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <lkp@intel.com>
| Closes: https://lore.kernel.org/oe-kbuild-all/202410242353.krjd8d6t-lkp@intel.com/

All warnings (new ones prefixed by >>):

   In file included from kernel/bpf/helpers.c:26:
>> arch/powerpc/include/asm/vdso/gettimeofday.h:97:63: warning: 'struct vdso_data' declared inside parameter list will not be visible outside of this definition or declaration
      97 |                                                  const struct vdso_data *vd)
         |                                                               ^~~~~~~~~


vim +97 arch/powerpc/include/asm/vdso/gettimeofday.h

ce7d8056e38b77 Christophe Leroy 2020-11-27   95  
ce7d8056e38b77 Christophe Leroy 2020-11-27   96  static __always_inline u64 __arch_get_hw_counter(s32 clock_mode,
ce7d8056e38b77 Christophe Leroy 2020-11-27  @97  						 const struct vdso_data *vd)
ce7d8056e38b77 Christophe Leroy 2020-11-27   98  {
ce7d8056e38b77 Christophe Leroy 2020-11-27   99  	return get_tb();
ce7d8056e38b77 Christophe Leroy 2020-11-27  100  }
ce7d8056e38b77 Christophe Leroy 2020-11-27  101  

-- 
0-DAY CI Kernel Test Service
https://github.com/intel/lkp-tests/wiki

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH bpf-next 1/2] bpf: add bpf_get_hw_counter kfunc
  2024-10-23 21:04 [PATCH bpf-next 1/2] bpf: add bpf_get_hw_counter kfunc Vadim Fedorenko
                   ` (2 preceding siblings ...)
  2024-10-24 16:03 ` kernel test robot
@ 2024-10-24 16:13 ` kernel test robot
  2024-10-24 16:13 ` kernel test robot
  4 siblings, 0 replies; 10+ messages in thread
From: kernel test robot @ 2024-10-24 16:13 UTC (permalink / raw)
  To: Vadim Fedorenko, Alexei Starovoitov, Daniel Borkmann,
	Andrii Nakryiko, Eduard Zingerman, Thomas Gleixner,
	Vadim Fedorenko
  Cc: llvm, oe-kbuild-all, x86, bpf

Hi Vadim,

kernel test robot noticed the following build errors:

[auto build test ERROR on bpf-next/master]

url:    https://github.com/intel-lab-lkp/linux/commits/Vadim-Fedorenko/selftests-bpf-add-selftest-to-check-rdtsc-jit/20241024-050747
base:   https://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next.git master
patch link:    https://lore.kernel.org/r/20241023210437.2266063-1-vadfed%40meta.com
patch subject: [PATCH bpf-next 1/2] bpf: add bpf_get_hw_counter kfunc
config: arm64-randconfig-001-20241024 (https://download.01.org/0day-ci/archive/20241024/202410242310.od2UFxiK-lkp@intel.com/config)
compiler: clang version 20.0.0git (https://github.com/llvm/llvm-project 5886454669c3c9026f7f27eab13509dd0241f2d6)
reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20241024/202410242310.od2UFxiK-lkp@intel.com/reproduce)

If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <lkp@intel.com>
| Closes: https://lore.kernel.org/oe-kbuild-all/202410242310.od2UFxiK-lkp@intel.com/

All error/warnings (new ones prefixed by >>):

   In file included from kernel/bpf/helpers.c:4:
   In file included from include/linux/bpf.h:21:
   In file included from include/linux/kallsyms.h:13:
   In file included from include/linux/mm.h:2213:
   include/linux/vmstat.h:504:43: warning: arithmetic between different enumeration types ('enum zone_stat_item' and 'enum numa_stat_item') [-Wenum-enum-conversion]
     504 |         return vmstat_text[NR_VM_ZONE_STAT_ITEMS +
         |                            ~~~~~~~~~~~~~~~~~~~~~ ^
     505 |                            item];
         |                            ~~~~
   include/linux/vmstat.h:511:43: warning: arithmetic between different enumeration types ('enum zone_stat_item' and 'enum numa_stat_item') [-Wenum-enum-conversion]
     511 |         return vmstat_text[NR_VM_ZONE_STAT_ITEMS +
         |                            ~~~~~~~~~~~~~~~~~~~~~ ^
     512 |                            NR_VM_NUMA_EVENT_ITEMS +
         |                            ~~~~~~~~~~~~~~~~~~~~~~
   include/linux/vmstat.h:518:36: warning: arithmetic between different enumeration types ('enum node_stat_item' and 'enum lru_list') [-Wenum-enum-conversion]
     518 |         return node_stat_name(NR_LRU_BASE + lru) + 3; // skip "nr_"
         |                               ~~~~~~~~~~~ ^ ~~~
   In file included from kernel/bpf/helpers.c:26:
>> arch/arm64/include/asm/vdso/gettimeofday.h:70:21: warning: declaration of 'struct vdso_data' will not be visible outside of this function [-Wvisibility]
      70 |                                                  const struct vdso_data *vd)
         |                                                               ^
>> arch/arm64/include/asm/vdso/gettimeofday.h:79:20: error: use of undeclared identifier 'VDSO_CLOCKMODE_NONE'
      79 |         if (clock_mode == VDSO_CLOCKMODE_NONE)
         |                           ^
>> arch/arm64/include/asm/vdso/gettimeofday.h:105:9: error: use of undeclared identifier '_vdso_data'
     105 |         return _vdso_data;
         |                ^
   kernel/bpf/helpers.c:115:36: warning: bitwise operation between different enumeration types ('enum bpf_arg_type' and 'enum bpf_type_flag') [-Wenum-enum-conversion]
     115 |         .arg2_type      = ARG_PTR_TO_MAP_VALUE | MEM_UNINIT,
         |                           ~~~~~~~~~~~~~~~~~~~~ ^ ~~~~~~~~~~
   kernel/bpf/helpers.c:128:36: warning: bitwise operation between different enumeration types ('enum bpf_arg_type' and 'enum bpf_type_flag') [-Wenum-enum-conversion]
     128 |         .arg2_type      = ARG_PTR_TO_MAP_VALUE | MEM_UNINIT,
         |                           ~~~~~~~~~~~~~~~~~~~~ ^ ~~~~~~~~~~
   kernel/bpf/helpers.c:539:30: warning: bitwise operation between different enumeration types ('enum bpf_arg_type' and 'enum bpf_type_flag') [-Wenum-enum-conversion]
     539 |         .arg1_type      = ARG_PTR_TO_MEM | MEM_RDONLY,
         |                           ~~~~~~~~~~~~~~ ^ ~~~~~~~~~~
   kernel/bpf/helpers.c:542:41: warning: bitwise operation between different enumeration types ('enum bpf_arg_type' and 'enum bpf_type_flag') [-Wenum-enum-conversion]
     542 |         .arg4_type      = ARG_PTR_TO_FIXED_SIZE_MEM | MEM_UNINIT | MEM_ALIGNED,
         |                           ~~~~~~~~~~~~~~~~~~~~~~~~~ ^ ~~~~~~~~~~
   kernel/bpf/helpers.c:567:30: warning: bitwise operation between different enumeration types ('enum bpf_arg_type' and 'enum bpf_type_flag') [-Wenum-enum-conversion]
     567 |         .arg1_type      = ARG_PTR_TO_MEM | MEM_RDONLY,
         |                           ~~~~~~~~~~~~~~ ^ ~~~~~~~~~~
   kernel/bpf/helpers.c:570:41: warning: bitwise operation between different enumeration types ('enum bpf_arg_type' and 'enum bpf_type_flag') [-Wenum-enum-conversion]
     570 |         .arg4_type      = ARG_PTR_TO_FIXED_SIZE_MEM | MEM_UNINIT | MEM_ALIGNED,
         |                           ~~~~~~~~~~~~~~~~~~~~~~~~~ ^ ~~~~~~~~~~
   kernel/bpf/helpers.c:583:30: warning: bitwise operation between different enumeration types ('enum bpf_arg_type' and 'enum bpf_type_flag') [-Wenum-enum-conversion]
     583 |         .arg1_type      = ARG_PTR_TO_MEM | MEM_RDONLY,
         |                           ~~~~~~~~~~~~~~ ^ ~~~~~~~~~~
   kernel/bpf/helpers.c:653:35: warning: bitwise operation between different enumeration types ('enum bpf_arg_type' and 'enum bpf_type_flag') [-Wenum-enum-conversion]
     653 |         .arg4_type      = ARG_PTR_TO_MEM | MEM_RDONLY,
         |                           ~~~~~~~~~~~~~~ ^ ~~~~~~~~~~
   kernel/bpf/helpers.c:725:39: warning: bitwise operation between different enumeration types ('enum bpf_return_type' and 'enum bpf_type_flag') [-Wenum-enum-conversion]
     725 |         .ret_type       = RET_PTR_TO_MEM_OR_BTF_ID | PTR_MAYBE_NULL | MEM_RDONLY,
         |                           ~~~~~~~~~~~~~~~~~~~~~~~~ ^ ~~~~~~~~~~~~~~
   kernel/bpf/helpers.c:738:39: warning: bitwise operation between different enumeration types ('enum bpf_return_type' and 'enum bpf_type_flag') [-Wenum-enum-conversion]
     738 |         .ret_type       = RET_PTR_TO_MEM_OR_BTF_ID | MEM_RDONLY,
         |                           ~~~~~~~~~~~~~~~~~~~~~~~~ ^ ~~~~~~~~~~
   kernel/bpf/helpers.c:1080:30: warning: bitwise operation between different enumeration types ('enum bpf_arg_type' and 'enum bpf_type_flag') [-Wenum-enum-conversion]
    1080 |         .arg4_type      = ARG_PTR_TO_MEM | PTR_MAYBE_NULL | MEM_RDONLY,
         |                           ~~~~~~~~~~~~~~ ^ ~~~~~~~~~~~~~~
   kernel/bpf/helpers.c:1641:44: warning: bitwise operation between different enumeration types ('enum bpf_arg_type' and 'enum bpf_type_flag') [-Wenum-enum-conversion]
    1641 |         .arg2_type    = ARG_PTR_TO_BTF_ID_OR_NULL | OBJ_RELEASE,
         |                         ~~~~~~~~~~~~~~~~~~~~~~~~~ ^ ~~~~~~~~~~~
   kernel/bpf/helpers.c:1746:33: warning: bitwise operation between different enumeration types ('enum bpf_arg_type' and 'enum bpf_type_flag') [-Wenum-enum-conversion]
    1746 |         .arg4_type      = ARG_PTR_TO_DYNPTR | DYNPTR_TYPE_LOCAL | MEM_UNINIT,
         |                           ~~~~~~~~~~~~~~~~~ ^ ~~~~~~~~~~~~~~~~~
   kernel/bpf/helpers.c:1789:33: warning: bitwise operation between different enumeration types ('enum bpf_arg_type' and 'enum bpf_type_flag') [-Wenum-enum-conversion]
    1789 |         .arg3_type      = ARG_PTR_TO_DYNPTR | MEM_RDONLY,
         |                           ~~~~~~~~~~~~~~~~~ ^ ~~~~~~~~~~
   kernel/bpf/helpers.c:1837:33: warning: bitwise operation between different enumeration types ('enum bpf_arg_type' and 'enum bpf_type_flag') [-Wenum-enum-conversion]
    1837 |         .arg1_type      = ARG_PTR_TO_DYNPTR | MEM_RDONLY,
         |                           ~~~~~~~~~~~~~~~~~ ^ ~~~~~~~~~~
   kernel/bpf/helpers.c:1839:30: warning: bitwise operation between different enumeration types ('enum bpf_arg_type' and 'enum bpf_type_flag') [-Wenum-enum-conversion]
    1839 |         .arg3_type      = ARG_PTR_TO_MEM | MEM_RDONLY,
         |                           ~~~~~~~~~~~~~~ ^ ~~~~~~~~~~
   kernel/bpf/helpers.c:1879:33: warning: bitwise operation between different enumeration types ('enum bpf_arg_type' and 'enum bpf_type_flag') [-Wenum-enum-conversion]
    1879 |         .arg1_type      = ARG_PTR_TO_DYNPTR | MEM_RDONLY,
         |                           ~~~~~~~~~~~~~~~~~ ^ ~~~~~~~~~~
   21 warnings and 2 errors generated.


vim +/VDSO_CLOCKMODE_NONE +79 arch/arm64/include/asm/vdso/gettimeofday.h

28b1a824a4f44d Vincenzo Frascino 2019-06-21   68  
4c5a116ada953b Thomas Gleixner   2020-08-04   69  static __always_inline u64 __arch_get_hw_counter(s32 clock_mode,
4c5a116ada953b Thomas Gleixner   2020-08-04  @70  						 const struct vdso_data *vd)
28b1a824a4f44d Vincenzo Frascino 2019-06-21   71  {
28b1a824a4f44d Vincenzo Frascino 2019-06-21   72  	u64 res;
28b1a824a4f44d Vincenzo Frascino 2019-06-21   73  
27e11a9fe2e2e7 Vincenzo Frascino 2019-06-25   74  	/*
5e3c6a312a0946 Thomas Gleixner   2020-02-07   75  	 * Core checks for mode already, so this raced against a concurrent
5e3c6a312a0946 Thomas Gleixner   2020-02-07   76  	 * update. Return something. Core will do another round and then
5e3c6a312a0946 Thomas Gleixner   2020-02-07   77  	 * see the mode change and fallback to the syscall.
27e11a9fe2e2e7 Vincenzo Frascino 2019-06-25   78  	 */
5e3c6a312a0946 Thomas Gleixner   2020-02-07  @79  	if (clock_mode == VDSO_CLOCKMODE_NONE)
5e3c6a312a0946 Thomas Gleixner   2020-02-07   80  		return 0;
27e11a9fe2e2e7 Vincenzo Frascino 2019-06-25   81  
27e11a9fe2e2e7 Vincenzo Frascino 2019-06-25   82  	/*
9025cebf12d176 Joey Gouly        2022-08-30   83  	 * If FEAT_ECV is available, use the self-synchronizing counter.
9025cebf12d176 Joey Gouly        2022-08-30   84  	 * Otherwise the isb is required to prevent that the counter value
27e11a9fe2e2e7 Vincenzo Frascino 2019-06-25   85  	 * is speculated.
27e11a9fe2e2e7 Vincenzo Frascino 2019-06-25   86  	*/
9025cebf12d176 Joey Gouly        2022-08-30   87  	asm volatile(
9025cebf12d176 Joey Gouly        2022-08-30   88  	ALTERNATIVE("isb\n"
9025cebf12d176 Joey Gouly        2022-08-30   89  		    "mrs %0, cntvct_el0",
9025cebf12d176 Joey Gouly        2022-08-30   90  		    "nop\n"
9025cebf12d176 Joey Gouly        2022-08-30   91  		    __mrs_s("%0", SYS_CNTVCTSS_EL0),
9025cebf12d176 Joey Gouly        2022-08-30   92  		    ARM64_HAS_ECV)
9025cebf12d176 Joey Gouly        2022-08-30   93  	: "=r" (res)
9025cebf12d176 Joey Gouly        2022-08-30   94  	:
9025cebf12d176 Joey Gouly        2022-08-30   95  	: "memory");
9025cebf12d176 Joey Gouly        2022-08-30   96  
77ec462536a13d Will Deacon       2021-03-18   97  	arch_counter_enforce_ordering(res);
28b1a824a4f44d Vincenzo Frascino 2019-06-21   98  
28b1a824a4f44d Vincenzo Frascino 2019-06-21   99  	return res;
28b1a824a4f44d Vincenzo Frascino 2019-06-21  100  }
28b1a824a4f44d Vincenzo Frascino 2019-06-21  101  
28b1a824a4f44d Vincenzo Frascino 2019-06-21  102  static __always_inline
28b1a824a4f44d Vincenzo Frascino 2019-06-21  103  const struct vdso_data *__arch_get_vdso_data(void)
28b1a824a4f44d Vincenzo Frascino 2019-06-21  104  {
28b1a824a4f44d Vincenzo Frascino 2019-06-21 @105  	return _vdso_data;
28b1a824a4f44d Vincenzo Frascino 2019-06-21  106  }
28b1a824a4f44d Vincenzo Frascino 2019-06-21  107  

-- 
0-DAY CI Kernel Test Service
https://github.com/intel/lkp-tests/wiki

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH bpf-next 1/2] bpf: add bpf_get_hw_counter kfunc
  2024-10-23 21:04 [PATCH bpf-next 1/2] bpf: add bpf_get_hw_counter kfunc Vadim Fedorenko
                   ` (3 preceding siblings ...)
  2024-10-24 16:13 ` kernel test robot
@ 2024-10-24 16:13 ` kernel test robot
  4 siblings, 0 replies; 10+ messages in thread
From: kernel test robot @ 2024-10-24 16:13 UTC (permalink / raw)
  To: Vadim Fedorenko, Alexei Starovoitov, Daniel Borkmann,
	Andrii Nakryiko, Eduard Zingerman, Thomas Gleixner,
	Vadim Fedorenko
  Cc: llvm, oe-kbuild-all, x86, bpf

Hi Vadim,

kernel test robot noticed the following build errors:

[auto build test ERROR on bpf-next/master]

url:    https://github.com/intel-lab-lkp/linux/commits/Vadim-Fedorenko/selftests-bpf-add-selftest-to-check-rdtsc-jit/20241024-050747
base:   https://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next.git master
patch link:    https://lore.kernel.org/r/20241023210437.2266063-1-vadfed%40meta.com
patch subject: [PATCH bpf-next 1/2] bpf: add bpf_get_hw_counter kfunc
config: riscv-defconfig (https://download.01.org/0day-ci/archive/20241024/202410242317.RqcJ3H1k-lkp@intel.com/config)
compiler: clang version 20.0.0git (https://github.com/llvm/llvm-project 5886454669c3c9026f7f27eab13509dd0241f2d6)
reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20241024/202410242317.RqcJ3H1k-lkp@intel.com/reproduce)

If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <lkp@intel.com>
| Closes: https://lore.kernel.org/oe-kbuild-all/202410242317.RqcJ3H1k-lkp@intel.com/

All error/warnings (new ones prefixed by >>):

   In file included from kernel/bpf/helpers.c:4:
   In file included from include/linux/bpf.h:21:
   In file included from include/linux/kallsyms.h:13:
   In file included from include/linux/mm.h:2213:
   include/linux/vmstat.h:518:36: warning: arithmetic between different enumeration types ('enum node_stat_item' and 'enum lru_list') [-Wenum-enum-conversion]
     518 |         return node_stat_name(NR_LRU_BASE + lru) + 3; // skip "nr_"
         |                               ~~~~~~~~~~~ ^ ~~~
   In file included from kernel/bpf/helpers.c:26:
>> arch/riscv/include/asm/vdso/gettimeofday.h:72:21: warning: declaration of 'struct vdso_data' will not be visible outside of this function [-Wvisibility]
      72 |                                                  const struct vdso_data *vd)
         |                                                               ^
>> arch/riscv/include/asm/vdso/gettimeofday.h:84:9: error: use of undeclared identifier '_vdso_data'
      84 |         return _vdso_data;
         |                ^
>> arch/riscv/include/asm/vdso/gettimeofday.h:91:9: error: use of undeclared identifier '_timens_data'
      91 |         return _timens_data;
         |                ^
   kernel/bpf/helpers.c:115:36: warning: bitwise operation between different enumeration types ('enum bpf_arg_type' and 'enum bpf_type_flag') [-Wenum-enum-conversion]
     115 |         .arg2_type      = ARG_PTR_TO_MAP_VALUE | MEM_UNINIT,
         |                           ~~~~~~~~~~~~~~~~~~~~ ^ ~~~~~~~~~~
   kernel/bpf/helpers.c:128:36: warning: bitwise operation between different enumeration types ('enum bpf_arg_type' and 'enum bpf_type_flag') [-Wenum-enum-conversion]
     128 |         .arg2_type      = ARG_PTR_TO_MAP_VALUE | MEM_UNINIT,
         |                           ~~~~~~~~~~~~~~~~~~~~ ^ ~~~~~~~~~~
   kernel/bpf/helpers.c:539:30: warning: bitwise operation between different enumeration types ('enum bpf_arg_type' and 'enum bpf_type_flag') [-Wenum-enum-conversion]
     539 |         .arg1_type      = ARG_PTR_TO_MEM | MEM_RDONLY,
         |                           ~~~~~~~~~~~~~~ ^ ~~~~~~~~~~
   kernel/bpf/helpers.c:542:41: warning: bitwise operation between different enumeration types ('enum bpf_arg_type' and 'enum bpf_type_flag') [-Wenum-enum-conversion]
     542 |         .arg4_type      = ARG_PTR_TO_FIXED_SIZE_MEM | MEM_UNINIT | MEM_ALIGNED,
         |                           ~~~~~~~~~~~~~~~~~~~~~~~~~ ^ ~~~~~~~~~~
   kernel/bpf/helpers.c:567:30: warning: bitwise operation between different enumeration types ('enum bpf_arg_type' and 'enum bpf_type_flag') [-Wenum-enum-conversion]
     567 |         .arg1_type      = ARG_PTR_TO_MEM | MEM_RDONLY,
         |                           ~~~~~~~~~~~~~~ ^ ~~~~~~~~~~
   kernel/bpf/helpers.c:570:41: warning: bitwise operation between different enumeration types ('enum bpf_arg_type' and 'enum bpf_type_flag') [-Wenum-enum-conversion]
     570 |         .arg4_type      = ARG_PTR_TO_FIXED_SIZE_MEM | MEM_UNINIT | MEM_ALIGNED,
         |                           ~~~~~~~~~~~~~~~~~~~~~~~~~ ^ ~~~~~~~~~~
   kernel/bpf/helpers.c:583:30: warning: bitwise operation between different enumeration types ('enum bpf_arg_type' and 'enum bpf_type_flag') [-Wenum-enum-conversion]
     583 |         .arg1_type      = ARG_PTR_TO_MEM | MEM_RDONLY,
         |                           ~~~~~~~~~~~~~~ ^ ~~~~~~~~~~
   kernel/bpf/helpers.c:653:35: warning: bitwise operation between different enumeration types ('enum bpf_arg_type' and 'enum bpf_type_flag') [-Wenum-enum-conversion]
     653 |         .arg4_type      = ARG_PTR_TO_MEM | MEM_RDONLY,
         |                           ~~~~~~~~~~~~~~ ^ ~~~~~~~~~~
   kernel/bpf/helpers.c:725:39: warning: bitwise operation between different enumeration types ('enum bpf_return_type' and 'enum bpf_type_flag') [-Wenum-enum-conversion]
     725 |         .ret_type       = RET_PTR_TO_MEM_OR_BTF_ID | PTR_MAYBE_NULL | MEM_RDONLY,
         |                           ~~~~~~~~~~~~~~~~~~~~~~~~ ^ ~~~~~~~~~~~~~~
   kernel/bpf/helpers.c:738:39: warning: bitwise operation between different enumeration types ('enum bpf_return_type' and 'enum bpf_type_flag') [-Wenum-enum-conversion]
     738 |         .ret_type       = RET_PTR_TO_MEM_OR_BTF_ID | MEM_RDONLY,
         |                           ~~~~~~~~~~~~~~~~~~~~~~~~ ^ ~~~~~~~~~~
   kernel/bpf/helpers.c:1080:30: warning: bitwise operation between different enumeration types ('enum bpf_arg_type' and 'enum bpf_type_flag') [-Wenum-enum-conversion]
    1080 |         .arg4_type      = ARG_PTR_TO_MEM | PTR_MAYBE_NULL | MEM_RDONLY,
         |                           ~~~~~~~~~~~~~~ ^ ~~~~~~~~~~~~~~
   kernel/bpf/helpers.c:1641:44: warning: bitwise operation between different enumeration types ('enum bpf_arg_type' and 'enum bpf_type_flag') [-Wenum-enum-conversion]
    1641 |         .arg2_type    = ARG_PTR_TO_BTF_ID_OR_NULL | OBJ_RELEASE,
         |                         ~~~~~~~~~~~~~~~~~~~~~~~~~ ^ ~~~~~~~~~~~
   kernel/bpf/helpers.c:1746:33: warning: bitwise operation between different enumeration types ('enum bpf_arg_type' and 'enum bpf_type_flag') [-Wenum-enum-conversion]
    1746 |         .arg4_type      = ARG_PTR_TO_DYNPTR | DYNPTR_TYPE_LOCAL | MEM_UNINIT,
         |                           ~~~~~~~~~~~~~~~~~ ^ ~~~~~~~~~~~~~~~~~
   kernel/bpf/helpers.c:1789:33: warning: bitwise operation between different enumeration types ('enum bpf_arg_type' and 'enum bpf_type_flag') [-Wenum-enum-conversion]
    1789 |         .arg3_type      = ARG_PTR_TO_DYNPTR | MEM_RDONLY,
         |                           ~~~~~~~~~~~~~~~~~ ^ ~~~~~~~~~~
   kernel/bpf/helpers.c:1837:33: warning: bitwise operation between different enumeration types ('enum bpf_arg_type' and 'enum bpf_type_flag') [-Wenum-enum-conversion]
    1837 |         .arg1_type      = ARG_PTR_TO_DYNPTR | MEM_RDONLY,
         |                           ~~~~~~~~~~~~~~~~~ ^ ~~~~~~~~~~
   kernel/bpf/helpers.c:1839:30: warning: bitwise operation between different enumeration types ('enum bpf_arg_type' and 'enum bpf_type_flag') [-Wenum-enum-conversion]
    1839 |         .arg3_type      = ARG_PTR_TO_MEM | MEM_RDONLY,
         |                           ~~~~~~~~~~~~~~ ^ ~~~~~~~~~~
   kernel/bpf/helpers.c:1879:33: warning: bitwise operation between different enumeration types ('enum bpf_arg_type' and 'enum bpf_type_flag') [-Wenum-enum-conversion]
    1879 |         .arg1_type      = ARG_PTR_TO_DYNPTR | MEM_RDONLY,
         |                           ~~~~~~~~~~~~~~~~~ ^ ~~~~~~~~~~
   19 warnings and 2 errors generated.


vim +/_vdso_data +84 arch/riscv/include/asm/vdso/gettimeofday.h

aa5af0aa90bad3 Evan Green      2023-04-07  70  
4c5a116ada953b Thomas Gleixner 2020-08-04  71  static __always_inline u64 __arch_get_hw_counter(s32 clock_mode,
4c5a116ada953b Thomas Gleixner 2020-08-04 @72  						 const struct vdso_data *vd)
ad5d1122b82fbd Vincent Chen    2020-06-09  73  {
ad5d1122b82fbd Vincent Chen    2020-06-09  74  	/*
ad5d1122b82fbd Vincent Chen    2020-06-09  75  	 * The purpose of csr_read(CSR_TIME) is to trap the system into
ad5d1122b82fbd Vincent Chen    2020-06-09  76  	 * M-mode to obtain the value of CSR_TIME. Hence, unlike other
ad5d1122b82fbd Vincent Chen    2020-06-09  77  	 * architecture, no fence instructions surround the csr_read()
ad5d1122b82fbd Vincent Chen    2020-06-09  78  	 */
ad5d1122b82fbd Vincent Chen    2020-06-09  79  	return csr_read(CSR_TIME);
ad5d1122b82fbd Vincent Chen    2020-06-09  80  }
ad5d1122b82fbd Vincent Chen    2020-06-09  81  
ad5d1122b82fbd Vincent Chen    2020-06-09  82  static __always_inline const struct vdso_data *__arch_get_vdso_data(void)
ad5d1122b82fbd Vincent Chen    2020-06-09  83  {
ad5d1122b82fbd Vincent Chen    2020-06-09 @84  	return _vdso_data;
ad5d1122b82fbd Vincent Chen    2020-06-09  85  }
ad5d1122b82fbd Vincent Chen    2020-06-09  86  
dffe11e280a42c Tong Tiangen    2021-09-01  87  #ifdef CONFIG_TIME_NS
dffe11e280a42c Tong Tiangen    2021-09-01  88  static __always_inline
dffe11e280a42c Tong Tiangen    2021-09-01  89  const struct vdso_data *__arch_get_timens_vdso_data(const struct vdso_data *vd)
dffe11e280a42c Tong Tiangen    2021-09-01  90  {
dffe11e280a42c Tong Tiangen    2021-09-01 @91  	return _timens_data;
dffe11e280a42c Tong Tiangen    2021-09-01  92  }
dffe11e280a42c Tong Tiangen    2021-09-01  93  #endif
ad5d1122b82fbd Vincent Chen    2020-06-09  94  #endif /* !__ASSEMBLY__ */
ad5d1122b82fbd Vincent Chen    2020-06-09  95  

-- 
0-DAY CI Kernel Test Service
https://github.com/intel/lkp-tests/wiki

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH bpf-next 1/2] bpf: add bpf_get_hw_counter kfunc
  2024-10-24 13:11   ` Vadim Fedorenko
@ 2024-10-24 17:03     ` Alexei Starovoitov
  2024-10-24 17:31     ` Eduard Zingerman
  1 sibling, 0 replies; 10+ messages in thread
From: Alexei Starovoitov @ 2024-10-24 17:03 UTC (permalink / raw)
  To: Vadim Fedorenko
  Cc: Alexei Starovoitov, Daniel Borkmann, Andrii Nakryiko,
	Eduard Zingerman, Thomas Gleixner, X86 ML, bpf

On Thu, Oct 24, 2024 at 6:11 AM Vadim Fedorenko
<vadim.fedorenko@linux.dev> wrote:
>
> > static inline u64 __arch_get_hw_counter(s32 clock_mode,
> >                                          const struct vdso_data *vd)
> > {
> >          if (likely(clock_mode == VDSO_CLOCKMODE_TSC))
> >                  return (u64)rdtsc_ordered() & S64_MAX;
> >
> > - & is missing
>
> & S64_MAX is only needed to early detect possible wrap-around of
> timecounter in case of vDSO call for CLOCK_MONOTONIC_RAW/CLOCK_COARSE
> which adds namespace time offset. TSC is reset during CPU reset and will
> not overflow within 10 years according to "Intel 64 and IA-32
> Architecture Software Developer's Manual,Vol 3B", so it doesn't really
> matter if we mask the highest bit or not while accessing raw cycles
> counter.
>
> > - rdtsc vs rdtscp
>
> rdtscp provides additional 32 bit of "signature value" atomically with
> TSC value in ECX. This value is not really usable outside of domain
> which set it previously while initialization. The kernel stores encoded
> cpuid into IA32_TSC_AUX to provide it back to user-space application,
> but at the same time ignores its value during read. The combination of
> lfence and rdtsc will give us the same result (ordered read of TSC value
> independent on the core) without trashing ECX value.

That makes sense.

One more thing:

> __bpf_kfunc int bpf_get_hw_counter(void)

it should be returning u64

> >> +               insn->imm = 0;
> >> +               insn->code = BPF_JMP | BPF_CALL;
> >> +               insn->src_reg = BPF_PSEUDO_KFUNC_CALL;
> >> +               insn->dst_reg = 1; /* Implement enum for inlined fast calls */
> >
> > Yes. Pls do it cleanly from the start.
> >
> > Why rewrite though?
> > Can JIT match the addr of bpf_get_hw_counter ?
> > And no need to rewrite call insn ?
>
> I was thinking about this way, just wasn't able to find easy examples of
> matching function addresses in jit. I'll try to make it but it may add
> some extra functions to the jit.

func = (u8 *) __bpf_call_base + imm32;
if (func == &bpf_get_hw_counter)

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH bpf-next 1/2] bpf: add bpf_get_hw_counter kfunc
  2024-10-24 13:11   ` Vadim Fedorenko
  2024-10-24 17:03     ` Alexei Starovoitov
@ 2024-10-24 17:31     ` Eduard Zingerman
  2024-10-24 17:47       ` Eduard Zingerman
  1 sibling, 1 reply; 10+ messages in thread
From: Eduard Zingerman @ 2024-10-24 17:31 UTC (permalink / raw)
  To: Vadim Fedorenko, Alexei Starovoitov
  Cc: Alexei Starovoitov, Daniel Borkmann, Andrii Nakryiko,
	Thomas Gleixner, X86 ML, bpf

On Thu, 2024-10-24 at 14:11 +0100, Vadim Fedorenko wrote:

[...]

> > > @@ -20396,6 +20398,15 @@ static int fixup_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
> > >                     desc->func_id == special_kfunc_list[KF_bpf_rdonly_cast]) {
> > >                  insn_buf[0] = BPF_MOV64_REG(BPF_REG_0, BPF_REG_1);
> > >                  *cnt = 1;
> > > +       } else if (IS_ENABLED(CONFIG_X86) &&
> > 
> > It's better to introduce bpf_jit_inlines_kfunc_call()
> > similar to bpf_jit_inlines_helper_call().
> 
> Yep, I thought about introducing it while adding more architectures, but
> can do it from the beginning.

After thinking a bit more, I think I messed up in a private discussion
with Vadim. It is necessary to introduce bpf_jit_inlines_kfunc_call()
and use it in the mark_fastcall_pattern_for_call(),
otherwise the following situation is possible:

- the program is executed on the arch where inlining for
  bpf_get_hw_counter() is not implemented;
- there is a pattern in the code:

    r1 = *(u64 *)(r10 - 256);
    call bpf_get_hw_fastcall
    *(u64 *)(r10 - 256) = r1;
    ... r10 - 8 is not used ...
    ... r1 is read ...

- mark_fastcall_pattern_for_call() would mark spill and fill as
  members of the pattern;
- fastcall contract is not violated, because reserved stack slots are
  used as expected;
- remove_fastcall_spills_fills() would remove spill and fill:

    call bpf_get_hw_fastcall
    ... r1 is read ...

- since call is not transformed to instructions by a specific jit the
  value of r1 would be clobbered, making the program invalid.
  
Vadim, sorry I did not point this out earlier, I thought that fastcall
contract ensuring logic would handle everything.

[...]


^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH bpf-next 1/2] bpf: add bpf_get_hw_counter kfunc
  2024-10-24 17:31     ` Eduard Zingerman
@ 2024-10-24 17:47       ` Eduard Zingerman
  0 siblings, 0 replies; 10+ messages in thread
From: Eduard Zingerman @ 2024-10-24 17:47 UTC (permalink / raw)
  To: Vadim Fedorenko, Alexei Starovoitov
  Cc: Alexei Starovoitov, Daniel Borkmann, Andrii Nakryiko,
	Thomas Gleixner, X86 ML, bpf

On Thu, 2024-10-24 at 10:31 -0700, Eduard Zingerman wrote:

[...]

>     ... r10 - 8 is not used ...

I mean r10 - 256.

[...]


^ permalink raw reply	[flat|nested] 10+ messages in thread

end of thread, other threads:[~2024-10-24 17:47 UTC | newest]

Thread overview: 10+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2024-10-23 21:04 [PATCH bpf-next 1/2] bpf: add bpf_get_hw_counter kfunc Vadim Fedorenko
2024-10-23 21:04 ` [PATCH bpf-next 2/2] selftests/bpf: add selftest to check rdtsc jit Vadim Fedorenko
2024-10-24  1:39 ` [PATCH bpf-next 1/2] bpf: add bpf_get_hw_counter kfunc Alexei Starovoitov
2024-10-24 13:11   ` Vadim Fedorenko
2024-10-24 17:03     ` Alexei Starovoitov
2024-10-24 17:31     ` Eduard Zingerman
2024-10-24 17:47       ` Eduard Zingerman
2024-10-24 16:03 ` kernel test robot
2024-10-24 16:13 ` kernel test robot
2024-10-24 16:13 ` kernel test robot

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox