From: Andrii Nakryiko <andrii@kernel.org>
To: bpf@vger.kernel.org, ast@kernel.org, daniel@iogearbox.net,
martin.lau@kernel.org
Cc: andrii@kernel.org, kernel-team@meta.com
Subject: [PATCH v2 bpf-next 1/4] bpf: add special internal-only MOV instruction to resolve per-CPU addrs
Date: Mon, 1 Apr 2024 19:13:02 -0700 [thread overview]
Message-ID: <20240402021307.1012571-2-andrii@kernel.org> (raw)
In-Reply-To: <20240402021307.1012571-1-andrii@kernel.org>
Add a new BPF instruction for resolving absolute addresses of per-CPU
data from their per-CPU offsets. This instruction is internal-only and
users are not allowed to use them directly. They will only be used for
internal inlining optimizations for now between BPF verifier and BPF JITs.
We use a special BPF_MOV | BPF_ALU64 | BPF_X form with insn->off field
set to BPF_ADDR_PERCPU = -1. I used negative offset value to distinguish
them from positive ones used by user-exposed instructions.
Such instruction performs a resolution of a per-CPU offset stored in
a register to a valid kernel address which can be dereferenced. It is
useful in any use case where absolute address of a per-CPU data has to
be resolved (e.g., in inlining bpf_map_lookup_elem()).
BPF disassembler is also taught to recognize them to support dumping
final BPF assembly code (non-JIT'ed version).
Add arch-specific way for BPF JITs to mark support for this instructions.
This patch also adds support for these instructions in x86-64 BPF JIT.
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
---
arch/x86/net/bpf_jit_comp.c | 16 ++++++++++++++++
include/linux/filter.h | 20 ++++++++++++++++++++
kernel/bpf/core.c | 5 +++++
kernel/bpf/disasm.c | 14 ++++++++++++++
4 files changed, 55 insertions(+)
diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c
index 3b639d6f2f54..af89dd117dce 100644
--- a/arch/x86/net/bpf_jit_comp.c
+++ b/arch/x86/net/bpf_jit_comp.c
@@ -1382,6 +1382,17 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image, u8 *rw_image
maybe_emit_mod(&prog, AUX_REG, dst_reg, true);
EMIT3(0x0F, 0x44, add_2reg(0xC0, AUX_REG, dst_reg));
break;
+ } else if (insn_is_mov_percpu_addr(insn)) {
+ u32 off = (u32)(unsigned long)&this_cpu_off;
+
+ /* mov <dst>, <src> (if necessary) */
+ EMIT_mov(dst_reg, src_reg);
+
+ /* add <dst>, gs:[<off>] */
+ EMIT2(0x65, add_1mod(0x48, dst_reg));
+ EMIT3(0x03, add_1reg(0x04, dst_reg), 0x25);
+ EMIT(off, 4);
+ break;
}
fallthrough;
case BPF_ALU | BPF_MOV | BPF_X:
@@ -3365,6 +3376,11 @@ bool bpf_jit_supports_subprog_tailcalls(void)
return true;
}
+bool bpf_jit_supports_percpu_insn(void)
+{
+ return true;
+}
+
void bpf_jit_free(struct bpf_prog *prog)
{
if (prog->jited) {
diff --git a/include/linux/filter.h b/include/linux/filter.h
index 44934b968b57..8bcfe6800400 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -178,6 +178,25 @@ struct ctl_table_header;
.off = 0, \
.imm = 0 })
+/* Special (internal-only) form of mov, used to resolve per-CPU addrs:
+ * dst_reg = src_reg + <percpu_base_off>
+ * BPF_ADDR_PERCPU is used as a special insn->off value.
+ */
+#define BPF_ADDR_PERCPU (-1)
+
+#define BPF_MOV64_PERCPU_REG(DST, SRC) \
+ ((struct bpf_insn) { \
+ .code = BPF_ALU64 | BPF_MOV | BPF_X, \
+ .dst_reg = DST, \
+ .src_reg = SRC, \
+ .off = BPF_ADDR_PERCPU, \
+ .imm = 0 })
+
+static inline bool insn_is_mov_percpu_addr(const struct bpf_insn *insn)
+{
+ return insn->code == (BPF_ALU64 | BPF_MOV | BPF_X) && insn->off == BPF_ADDR_PERCPU;
+}
+
/* Short form of mov, dst_reg = imm32 */
#define BPF_MOV64_IMM(DST, IMM) \
@@ -970,6 +989,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog);
void bpf_jit_compile(struct bpf_prog *prog);
bool bpf_jit_needs_zext(void);
bool bpf_jit_supports_subprog_tailcalls(void);
+bool bpf_jit_supports_percpu_insn(void);
bool bpf_jit_supports_kfunc_call(void);
bool bpf_jit_supports_far_kfunc_call(void);
bool bpf_jit_supports_exceptions(void);
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
index ab400cdd7d7a..a996fd8dd303 100644
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -2945,6 +2945,11 @@ bool __weak bpf_jit_supports_subprog_tailcalls(void)
return false;
}
+bool __weak bpf_jit_supports_percpu_insn(void)
+{
+ return false;
+}
+
bool __weak bpf_jit_supports_kfunc_call(void)
{
return false;
diff --git a/kernel/bpf/disasm.c b/kernel/bpf/disasm.c
index bd2e2dd04740..309c4aa1b026 100644
--- a/kernel/bpf/disasm.c
+++ b/kernel/bpf/disasm.c
@@ -172,6 +172,17 @@ static bool is_addr_space_cast(const struct bpf_insn *insn)
insn->off == BPF_ADDR_SPACE_CAST;
}
+/* Special (internal-only) form of mov, used to resolve per-CPU addrs:
+ * dst_reg = src_reg + <percpu_base_off>
+ * BPF_ADDR_PERCPU is used as a special insn->off value.
+ */
+#define BPF_ADDR_PERCPU (-1)
+
+static inline bool is_mov_percpu_addr(const struct bpf_insn *insn)
+{
+ return insn->code == (BPF_ALU64 | BPF_MOV | BPF_X) && insn->off == BPF_ADDR_PERCPU;
+}
+
void print_bpf_insn(const struct bpf_insn_cbs *cbs,
const struct bpf_insn *insn,
bool allow_ptr_leaks)
@@ -194,6 +205,9 @@ void print_bpf_insn(const struct bpf_insn_cbs *cbs,
verbose(cbs->private_data, "(%02x) r%d = addr_space_cast(r%d, %d, %d)\n",
insn->code, insn->dst_reg,
insn->src_reg, ((u32)insn->imm) >> 16, (u16)insn->imm);
+ } else if (is_mov_percpu_addr(insn)) {
+ verbose(cbs->private_data, "(%02x) r%d = &(void __percpu *)(r%d)\n",
+ insn->code, insn->dst_reg, insn->src_reg);
} else if (BPF_SRC(insn->code) == BPF_X) {
verbose(cbs->private_data, "(%02x) %c%d %s %s%c%d\n",
insn->code, class == BPF_ALU ? 'w' : 'r',
--
2.43.0
next prev parent reply other threads:[~2024-04-02 2:13 UTC|newest]
Thread overview: 24+ messages / expand[flat|nested] mbox.gz Atom feed top
2024-04-02 2:13 [PATCH v2 bpf-next 0/4] Add internal-only BPF per-CPU instruction Andrii Nakryiko
2024-04-02 2:13 ` Andrii Nakryiko [this message]
2024-04-02 4:35 ` [PATCH v2 bpf-next 1/4] bpf: add special internal-only MOV instruction to resolve per-CPU addrs John Fastabend
2024-04-02 2:13 ` [PATCH v2 bpf-next 2/4] bpf: inline bpf_get_smp_processor_id() helper Andrii Nakryiko
2024-04-02 4:41 ` John Fastabend
2024-04-02 2:13 ` [PATCH v2 bpf-next 3/4] bpf: inline bpf_map_lookup_elem() for PERCPU_ARRAY maps Andrii Nakryiko
2024-04-02 5:02 ` John Fastabend
2024-04-02 16:20 ` Andrii Nakryiko
2024-04-02 2:13 ` [PATCH v2 bpf-next 4/4] bpf: inline bpf_map_lookup_elem() helper for PERCPU_HASH map Andrii Nakryiko
2024-04-02 5:04 ` John Fastabend
2024-04-02 4:57 ` [PATCH v2 bpf-next 0/4] Add internal-only BPF per-CPU instruction John Fastabend
2024-04-02 16:20 ` Andrii Nakryiko
2024-04-03 22:01 ` John Fastabend
2024-04-03 17:40 ` patchwork-bot+netdevbpf
2024-04-03 17:41 ` Alexei Starovoitov
2024-04-03 18:03 ` Andrii Nakryiko
2024-04-03 18:14 ` Alexei Starovoitov
2024-04-04 16:12 ` Andrii Nakryiko
2024-04-04 16:16 ` Puranjay Mohan
2024-04-04 16:18 ` Andrii Nakryiko
2024-04-05 12:48 ` Puranjay Mohan
2024-04-05 13:19 ` Pu Lehui
2024-04-04 21:02 ` Puranjay Mohan
2024-04-04 21:21 ` Andrii Nakryiko
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20240402021307.1012571-2-andrii@kernel.org \
--to=andrii@kernel.org \
--cc=ast@kernel.org \
--cc=bpf@vger.kernel.org \
--cc=daniel@iogearbox.net \
--cc=kernel-team@meta.com \
--cc=martin.lau@kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox