linux-arm-kernel.lists.infradead.org archive mirror
 help / color / mirror / Atom feed
* [PATCH bpf-next v2 0/2] bpf, arm64: Support per-cpu instructions
@ 2024-04-24 17:35 Puranjay Mohan
  2024-04-24 17:35 ` [PATCH bpf-next v2 1/2] arm64, bpf: add internal-only MOV instruction to resolve per-CPU addrs Puranjay Mohan
  2024-04-24 17:35 ` [PATCH bpf-next v2 2/2] bpf, arm64: inline bpf_get_smp_processor_id() helper Puranjay Mohan
  0 siblings, 2 replies; 9+ messages in thread
From: Puranjay Mohan @ 2024-04-24 17:35 UTC (permalink / raw)
  To: Catalin Marinas, Will Deacon, Alexei Starovoitov, Daniel Borkmann,
	Andrii Nakryiko, Martin KaFai Lau, Eduard Zingerman, Song Liu,
	Yonghong Song, John Fastabend, KP Singh, Stanislav Fomichev,
	Hao Luo, Jiri Olsa, Zi Shen Lim, Xu Kuohai, Florent Revest,
	linux-arm-kernel, linux-kernel, bpf
  Cc: puranjay12

Changes in v1 -> v2:
v1: https://lore.kernel.org/all/20240405091707.66675-1-puranjay12@gmail.com/
- Add a patch to inline bpf_get_smp_processor_id()
- Fix an issue in MRS instruction encoding as pointed out by Will
- Remove CONFIG_SMP check

This series adds the support of internal only per-CPU instructions and
inlines the bpf_get_smp_processor_id() helper for ARM64 BPF JIT.

Here is an example of bpf_get_smp_processor_id() and percpu_array_map_lookup_elem()
before and after this series.

                                         BPF
                                        =====
              BEFORE                                       AFTER
             --------                                     -------

int cpu = bpf_get_smp_processor_id();           int cpu = bpf_get_smp_processor_id();
(85) call bpf_get_smp_processor_id#229032       (18) r0 = 0xffff800082072008
                                                (bf) r0 = r0
                                                (61) r0 = *(u32 *)(r0 +0)


p = bpf_map_lookup_elem(map, &zero);            p = bpf_map_lookup_elem(map, &zero);
(18) r1 = map[id:78]                            (18) r1 = map[id:153]
(18) r2 = map[id:82][0]+65536                   (18) r2 = map[id:157][0]+65536
(85) call percpu_array_map_lookup_elem#313512   (07) r1 += 496
                                                (61) r0 = *(u32 *)(r2 +0)
                                                (35) if r0 >= 0x1 goto pc+5
                                                (67) r0 <<= 3
                                                (0f) r0 += r1
                                                (79) r0 = *(u64 *)(r0 +0)
                                                (bf) r0 = r0
                                                (05) goto pc+1
                                                (b7) r0 = 0


                                      ARM64 JIT
                                     ===========

              BEFORE                                       AFTER
             --------                                     -------

int cpu = bpf_get_smp_processor_id();      int cpu = bpf_get_smp_processor_id();
mov     x10, #0xfffffffffffff4d0           mov     x7, #0xffff8000ffffffff
movk    x10, #0x802b, lsl #16              movk    x7, #0x8207, lsl #16
movk    x10, #0x8000, lsl #32              movk    x7, #0x2008
blr     x10                                mrs     x10, tpidr_el1
add     x7, x0, #0x0                       add     x7, x7, x10
                                           ldr     w7, [x7]


p = bpf_map_lookup_elem(map, &zero);       p = bpf_map_lookup_elem(map, &zero);
mov     x0, #0xffff0003ffffffff            mov     x0, #0xffff0003ffffffff
movk    x0, #0xce5c, lsl #16               movk    x0, #0xe0f3, lsl #16
movk    x0, #0xca00                        movk    x0, #0x7c00
mov     x1, #0xffff8000ffffffff            mov     x1, #0xffff8000ffffffff
movk    x1, #0x8bdb, lsl #16               movk    x1, #0xb0c7, lsl #16
movk    x1, #0x6000                        movk    x1, #0xe000
mov     x10, #0xffffffffffff3ed0           add     x0, x0, #0x1f0
movk    x10, #0x802d, lsl #16              ldr     w7, [x1]
movk    x10, #0x8000, lsl #32              cmp     x7, #0x1
blr     x10                                b.cs    0x0000000000000090
add     x7, x0, #0x0                       lsl     x7, x7, #3
                                           add     x7, x7, x0
                                           ldr     x7, [x7]
                                           mrs     x10, tpidr_el1
                                           add     x7, x7, x10
                                           b       0x0000000000000094
                                           mov     x7, #0x0

              Performance improvement found using benchmark[1]

             BEFORE                                       AFTER
            --------                                     -------

glob-arr-inc   :   23.817 ± 0.019M/s      glob-arr-inc   :   24.631 ± 0.027M/s
arr-inc        :   23.253 ± 0.019M/s      arr-inc        :   23.742 ± 0.023M/s
hash-inc       :   12.258 ± 0.010M/s      hash-inc       :   12.625 ± 0.004M/s

[1] https://github.com/anakryiko/linux/commit/8dec900975ef

Puranjay Mohan (2):
  arm64, bpf: add internal-only MOV instruction to resolve per-CPU addrs
  bpf, arm64: inline bpf_get_smp_processor_id() helper

 arch/arm64/include/asm/insn.h |  7 +++++++
 arch/arm64/lib/insn.c         | 11 +++++++++++
 arch/arm64/net/bpf_jit.h      |  6 ++++++
 arch/arm64/net/bpf_jit_comp.c | 14 ++++++++++++++
 kernel/bpf/verifier.c         | 11 ++++++++++-
 5 files changed, 48 insertions(+), 1 deletion(-)

-- 
2.40.1


_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel

^ permalink raw reply	[flat|nested] 9+ messages in thread

end of thread, other threads:[~2024-04-26 10:26 UTC | newest]

Thread overview: 9+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2024-04-24 17:35 [PATCH bpf-next v2 0/2] bpf, arm64: Support per-cpu instructions Puranjay Mohan
2024-04-24 17:35 ` [PATCH bpf-next v2 1/2] arm64, bpf: add internal-only MOV instruction to resolve per-CPU addrs Puranjay Mohan
2024-04-24 17:35 ` [PATCH bpf-next v2 2/2] bpf, arm64: inline bpf_get_smp_processor_id() helper Puranjay Mohan
2024-04-24 22:01   ` Andrii Nakryiko
2024-04-25 10:14     ` Puranjay Mohan
2024-04-25 18:09       ` Andrii Nakryiko
2024-04-25 18:55         ` Puranjay Mohan
2024-04-25 20:43           ` Andrii Nakryiko
2024-04-26 10:26             ` Puranjay Mohan

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).