From: Blaise Boscaccy <bboscaccy@linux.microsoft.com>
To: bpf@vger.kernel.org
Cc: nkapron@google.com, teknoraver@meta.com,
roberto.sassu@huawei.com, gregkh@linuxfoundation.org,
paul@paul-moore.com, code@tyhicks.com,
flaniel@linux.microsoft.com
Subject: [PATCH 13/14] bpf: Apply in-kernel bpf instruction relocations
Date: Thu, 9 Jan 2025 13:43:55 -0800 [thread overview]
Message-ID: <20250109214617.485144-14-bboscaccy@linux.microsoft.com> (raw)
In-Reply-To: <20250109214617.485144-1-bboscaccy@linux.microsoft.com>
This code heavily borrows from libbpf, in particular,
bpf_object__relocate. CO-RE relocation facilities already exist in
the kernel.
All the previously collected relocations are applied and the immediate
of the instructions are re-written accordingly. Any values
corresponding to map offsets depend upon the user-supplied map array.
Signed-off-by: Blaise Boscaccy <bboscaccy@linux.microsoft.com>
---
kernel/bpf/syscall.c | 489 +++++++++++++++++++++++++++++++++++++++++++
1 file changed, 489 insertions(+)
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index b766c790ae3f4..ea0401634e752 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -41,6 +41,8 @@
#include <net/netkit.h>
#include <net/tcx.h>
+#include "../tools/lib/bpf/relo_core.h"
+
#define IS_FD_ARRAY(map) ((map)->map_type == BPF_MAP_TYPE_PERF_EVENT_ARRAY || \
(map)->map_type == BPF_MAP_TYPE_CGROUP_ARRAY || \
(map)->map_type == BPF_MAP_TYPE_ARRAY_OF_MAPS)
@@ -7237,6 +7239,489 @@ static int resolve_externs(struct bpf_obj *obj)
return 0;
}
+static int relocate_core(struct bpf_obj *obj)
+{
+ const struct btf_ext_info_sec *sec;
+ const struct bpf_core_relo *rec;
+ const struct btf_ext_info *seg;
+ int i, insn_idx, sec_idx, sec_num;
+ struct bpf_prog_obj *prog;
+ struct bpf_insn *insn;
+ const char *sec_name;
+
+ struct bpf_core_ctx ctx = {
+ .log = NULL,
+ .btf = obj->btf,
+ };
+
+ seg = &obj->btf_ext->core_relo_info;
+ sec_num = 0;
+
+ for_each_btf_ext_sec(seg, sec) {
+ sec_idx = seg->sec_idxs[sec_num];
+ sec_num++;
+ sec_name = btf_str_by_offset(obj->btf, sec->sec_name_off);
+
+ for_each_btf_ext_rec(seg, sec, i, rec) {
+ insn_idx = rec->insn_off / sizeof(struct bpf_insn);
+ prog = find_prog_by_sec_insn(obj, sec_idx, insn_idx);
+
+ if (!prog)
+ continue;
+
+ insn_idx = insn_idx - prog->sec_insn_off;
+ if (insn_idx >= prog->insn_cnt)
+ return -EINVAL;
+ insn = &prog->insn[insn_idx];
+
+ bpf_core_apply(&ctx, rec, i, insn);
+ }
+ }
+
+ return 0;
+}
+
+static int append_subprog_relos(struct bpf_prog_obj *main_prog, struct bpf_prog_obj *subprog)
+{
+ int new_cnt = main_prog->nr_reloc + subprog->nr_reloc;
+ struct bpf_reloc_desc *relos;
+ int i;
+
+ if (main_prog == subprog)
+ return 0;
+ relos = krealloc_array(main_prog->reloc_desc, new_cnt, sizeof(*relos), GFP_KERNEL);
+ /* if new count is zero, reallocarray can return a valid NULL result;
+ * in this case the previous pointer will be freed, so we *have to*
+ * reassign old pointer to the new value (even if it's NULL)
+ */
+ if (!relos && new_cnt)
+ return -ENOMEM;
+ if (subprog->nr_reloc)
+ memcpy(relos + main_prog->nr_reloc, subprog->reloc_desc,
+ sizeof(*relos) * subprog->nr_reloc);
+
+ for (i = main_prog->nr_reloc; i < new_cnt; i++)
+ relos[i].insn_idx += subprog->sub_insn_off;
+ /* After insn_idx adjustment the 'relos' array is still sorted
+ * by insn_idx and doesn't break bsearch.
+ */
+ main_prog->reloc_desc = relos;
+ main_prog->nr_reloc = new_cnt;
+ return 0;
+}
+
+static int cmp_relo_by_insn_idx(const void *key, const void *elem)
+{
+ size_t insn_idx = *(const size_t *)key;
+ const struct bpf_reloc_desc *relo = elem;
+
+ if (insn_idx == relo->insn_idx)
+ return 0;
+ return insn_idx < relo->insn_idx ? -1 : 1;
+}
+
+static struct bpf_reloc_desc *find_prog_insn_relo(const struct bpf_prog_obj *prog, size_t insn_idx)
+{
+ if (!prog->nr_reloc)
+ return NULL;
+ return bsearch(&insn_idx, prog->reloc_desc, prog->nr_reloc,
+ sizeof(*prog->reloc_desc), cmp_relo_by_insn_idx);
+}
+
+static int append_subprog_code(struct bpf_obj *obj, struct bpf_prog_obj *main_prog,
+ struct bpf_prog_obj *subprog)
+{
+ struct bpf_insn *insns;
+ size_t new_cnt;
+ int err;
+
+ subprog->sub_insn_off = main_prog->insn_cnt;
+
+ new_cnt = main_prog->insn_cnt + subprog->insn_cnt;
+ insns = krealloc_array(main_prog->insn, new_cnt, sizeof(*insns), GFP_KERNEL);
+ if (!insns) {
+ pr_warn("prog '%s': failed to realloc prog code\n", main_prog->name);
+ return -ENOMEM;
+ }
+
+ main_prog->insn = insns;
+ main_prog->insn_cnt = new_cnt;
+
+ memcpy(main_prog->insn + subprog->sub_insn_off, subprog->insn,
+ subprog->insn_cnt * sizeof(*insns));
+
+ /* The subprog insns are now appended. Append its relos too. */
+ err = append_subprog_relos(main_prog, subprog);
+ if (err)
+ return err;
+ return 0;
+}
+
+static int reloc_code(struct bpf_obj *obj, struct bpf_prog_obj *main_prog,
+ struct bpf_prog_obj *prog)
+{
+
+ size_t sub_idx, insn_idx;
+ struct bpf_prog_obj *subprog;
+ struct bpf_reloc_desc *relo;
+ struct bpf_insn *insn;
+ int err;
+
+ for (insn_idx = 0; insn_idx < prog->sec_insn_cnt; insn_idx++) {
+ insn = &main_prog->insn[prog->sub_insn_off + insn_idx];
+ if (!insn_is_subprog_call(insn) && !insn_is_pseudo_func(insn))
+ continue;
+
+ relo = find_prog_insn_relo(prog, insn_idx);
+ if (relo && relo->type == RELO_EXTERN_CALL)
+ /* kfunc relocations will be handled later
+ * in bpf_object__relocate_data()
+ */
+ continue;
+ if (relo && relo->type != RELO_CALL && relo->type != RELO_SUBPROG_ADDR) {
+ pr_warn("prog '%s': unexpected relo for insn #%zu, type %d\n",
+ prog->name, insn_idx, relo->type);
+ return -EOPNOTSUPP;
+ }
+ if (relo) {
+ /* sub-program instruction index is a combination of
+ * an offset of a symbol pointed to by relocation and
+ * call instruction's imm field; for global functions,
+ * call always has imm = -1, but for static functions
+ * relocation is against STT_SECTION and insn->imm
+ * points to a start of a static function
+ *
+ * for subprog addr relocation, the relo->sym_off + insn->imm is
+ * the byte offset in the corresponding section.
+ */
+ if (relo->type == RELO_CALL)
+ sub_idx = relo->sym_off / sizeof(struct bpf_insn) + insn->imm + 1;
+ else
+ sub_idx = (relo->sym_off + insn->imm) / sizeof(struct bpf_insn);
+ } else if (insn_is_pseudo_func(insn)) {
+ /*
+ * RELO_SUBPROG_ADDR relo is always emitted even if both
+ * functions are in the same section, so it shouldn't reach here.
+ */
+ pr_warn("prog '%s': missing subprog addr relo for insn #%zu\n",
+ prog->name, insn_idx);
+ return -EOPNOTSUPP;
+ } else {
+ /* if subprogram call is to a static function within
+ * the same ELF section, there won't be any relocation
+ * emitted, but it also means there is no additional
+ * offset necessary, insns->imm is relative to
+ * instruction's original position within the section
+ */
+ sub_idx = prog->sec_insn_off + insn_idx + insn->imm + 1;
+ }
+
+ /* we enforce that sub-programs should be in .text section */
+ subprog = find_prog_by_sec_insn(obj, obj->index.text, sub_idx);
+ if (!subprog) {
+ pr_warn("prog '%s': no .text section found yet sub-program call exists\n",
+ prog->name);
+ return -EOPNOTSUPP;
+ }
+
+ /* if it's the first call instruction calling into this
+ * subprogram (meaning this subprog hasn't been processed
+ * yet) within the context of current main program:
+ * - append it at the end of main program's instructions blog;
+ * - process is recursively, while current program is put on hold;
+ * - if that subprogram calls some other not yet processes
+ * subprogram, same thing will happen recursively until
+ * there are no more unprocesses subprograms left to append
+ * and relocate.
+ */
+ if (subprog->sub_insn_off == 0) {
+ err = append_subprog_code(obj, main_prog, subprog);
+ if (err)
+ return err;
+ err = reloc_code(obj, main_prog, subprog);
+ if (err)
+ return err;
+ }
+
+ /* main_prog->insns memory could have been re-allocated, so
+ * calculate pointer again
+ */
+ insn = &main_prog->insn[prog->sub_insn_off + insn_idx];
+ /* calculate correct instruction position within current main
+ * prog; each main prog can have a different set of
+ * subprograms appended (potentially in different order as
+ * well), so position of any subprog can be different for
+ * different main programs
+ */
+ insn->imm = subprog->sub_insn_off - (prog->sub_insn_off + insn_idx) - 1;
+
+ pr_debug("prog '%s': insn #%zu relocated, imm %d points to subprog '%s' (now at %zu offset)\n",
+ prog->name, insn_idx, insn->imm, subprog->name, subprog->sub_insn_off);
+ }
+
+ return 0;
+}
+
+static bool prog_is_subprog(const struct bpf_obj *obj, const struct bpf_prog_obj *prog)
+{
+ return prog->sec_idx == obj->index.text && obj->nr_programs > 1;
+}
+
+static int relocate_calls(struct bpf_obj *obj, struct bpf_prog_obj *prog)
+{
+ struct bpf_prog_obj *subprog;
+ int i, err;
+
+ /* mark all subprogs as not relocated (yet) within the context of
+ * current main program
+ */
+ for (i = 0; i < obj->nr_programs; i++) {
+ subprog = &obj->progs[i];
+ if (!prog_is_subprog(obj, subprog))
+ continue;
+
+ subprog->sub_insn_off = 0;
+ }
+
+ err = reloc_code(obj, prog, prog);
+ if (err)
+ return err;
+ return 0;
+
+}
+
+/* unresolved kfunc call special constant, used also for log fixup logic */
+#define POISON_CALL_KFUNC_BASE 2002000000
+#define POISON_CALL_KFUNC_PFX "2002"
+
+static void poison_kfunc_call(struct bpf_prog_obj *prog, int relo_idx,
+ int insn_idx, struct bpf_insn *insn,
+ int ext_idx, const struct bpf_extern_desc *ext)
+{
+ pr_debug("prog '%s': relo #%d: poisoning insn #%d that calls kfunc '%s'\n",
+ prog->name, relo_idx, insn_idx, ext->name);
+
+ /* we turn kfunc call into invalid helper call with identifiable constant */
+ insn->code = BPF_JMP | BPF_CALL;
+ insn->dst_reg = 0;
+ insn->src_reg = 0;
+ insn->off = 0;
+ /* if this instruction is reachable (not a dead code),
+ * verifier will complain with something like:
+ * invalid func unknown#2001000123
+ * where lower 123 is extern index into obj->externs[] array
+ */
+ insn->imm = POISON_CALL_KFUNC_BASE + ext_idx;
+}
+
+static int relocate_data(struct bpf_obj *obj, struct bpf_prog_obj *prog)
+{
+ int i;
+
+ for (i = 0; i < prog->nr_reloc; i++) {
+ struct bpf_reloc_desc *relo = &prog->reloc_desc[i];
+ struct bpf_insn *insn = &prog->insn[relo->insn_idx];
+ const struct bpf_map_obj *map;
+ struct bpf_extern_desc *ext;
+
+ switch (relo->type) {
+ case RELO_LD64:
+ map = &obj->maps[relo->map_idx];
+ insn[0].src_reg = BPF_PSEUDO_MAP_FD;
+ insn[0].imm = map->fd;
+ break;
+ case RELO_DATA:
+ map = &obj->maps[relo->map_idx];
+ insn[0].src_reg = BPF_PSEUDO_MAP_VALUE;
+ insn[0].imm = map->fd;
+ break;
+ case RELO_EXTERN_LD64:
+ ext = &obj->externs[relo->ext_idx];
+ if (ext->type == EXT_KCFG) {
+ insn[0].src_reg = BPF_PSEUDO_MAP_VALUE;
+ insn[0].imm = obj->maps[obj->kconfig_map_idx].fd;
+ insn[1].imm = ext->kcfg.data_off;
+ } else /* EXT_KSYM */ {
+ if (ext->ksym.type_id && ext->is_set) { /* typed ksyms */
+ insn[0].src_reg = BPF_PSEUDO_BTF_ID;
+ insn[0].imm = ext->ksym.kernel_btf_id;
+ insn[1].imm = ext->ksym.kernel_btf_obj_fd;
+ } else { /* typeless ksyms or unresolved typed ksyms */
+ insn[0].imm = (__u32)ext->ksym.addr;
+ insn[1].imm = ext->ksym.addr >> 32;
+ }
+ }
+ break;
+ case RELO_EXTERN_CALL:
+ ext = &obj->externs[relo->ext_idx];
+ insn[0].src_reg = BPF_PSEUDO_KFUNC_CALL;
+ if (ext->is_set) {
+ insn[0].imm = ext->ksym.kernel_btf_id;
+ insn[0].off = ext->ksym.btf_fd_idx;
+ } else { /* unresolved weak kfunc call */
+ poison_kfunc_call(prog, i, relo->insn_idx, insn,
+ relo->ext_idx, ext);
+ }
+ break;
+ case RELO_SUBPROG_ADDR:
+ case RELO_CALL:
+ case RELO_CORE:
+ break;
+ }
+ }
+
+ return 0;
+}
+
+static int prog_assign_exc_cb(struct bpf_obj *obj, struct bpf_prog_obj *prog)
+{
+ const char *str = "exception_callback:";
+ size_t pfx_len = strlen(str);
+ int i, j, n;
+ const char *name;
+ const struct btf_type *t;
+
+ if (!obj->btf)
+ return 0;
+
+ n = btf_type_cnt(obj->btf);
+ for (i = 1; i < n; i++) {
+ t = btf_type_by_id(obj->btf, i);
+ if (!btf_is_decl_tag(t) || btf_decl_tag(t)->component_idx != -1)
+ continue;
+
+ name = btf_str_by_offset(obj->btf, t->name_off);
+ if (strncmp(name, str, pfx_len) != 0)
+ continue;
+
+ t = btf_type_by_id(obj->btf, t->type);
+ if (!btf_is_func(t) || btf_func_linkage(t) != BTF_FUNC_GLOBAL) {
+ pr_warn("prog '%s': exception_callback:<value> decl tag not applied to the main program\n",
+ prog->name);
+ return -EINVAL;
+ }
+ if (strcmp(prog->name, btf_str_by_offset(obj->btf, t->name_off)) != 0)
+ continue;
+ /* Multiple callbacks are specified for the same prog,
+ * the verifier will eventually return an error for this
+ * case, hence simply skip appending a subprog.
+ */
+ if (prog->exception_cb_idx >= 0) {
+ prog->exception_cb_idx = -1;
+ break;
+ }
+
+ name += pfx_len;
+ if (str_is_empty(name)) {
+ pr_warn("prog '%s': exception_callback:<value> decl tag contains empty value\n",
+ prog->name);
+ return -EINVAL;
+ }
+
+ for (j = 0; j < obj->nr_programs; j++) {
+ struct bpf_prog_obj *subprog = &obj->progs[j];
+
+ if (!prog_is_subprog(obj, subprog))
+ continue;
+ if (strcmp(name, subprog->name) != 0)
+ continue;
+ /* Let's see if we already saw a static exception callback with this name */
+ if (prog->exception_cb_idx >= 0) {
+ pr_warn("prog '%s': multiple subprogs with same name as exception callback '%s'\n",
+ prog->name, subprog->name);
+ return -EINVAL;
+ }
+ prog->exception_cb_idx = j;
+ break;
+ }
+
+ if (prog->exception_cb_idx >= 0)
+ continue;
+
+ pr_warn("prog '%s': cannot find exception callback '%s'\n", prog->name, name);
+ return -ENOENT;
+ }
+
+ return 0;
+
+}
+
+static int relocate_object(struct bpf_obj *obj)
+{
+ struct bpf_prog_obj *prog;
+ int i, j, err;
+
+ if (obj->btf)
+ relocate_core(obj);
+
+ for (i = 0; i < obj->nr_programs; i++) {
+ prog = &obj->progs[i];
+ for (j = 0; j < prog->nr_reloc; j++) {
+ struct bpf_reloc_desc *relo = &prog->reloc_desc[j];
+ struct bpf_insn *insn = &prog->insn[relo->insn_idx];
+
+ /* mark the insn, so it's recognized by insn_is_pseudo_func() */
+ if (relo->type == RELO_SUBPROG_ADDR)
+ insn[0].src_reg = BPF_PSEUDO_FUNC;
+ }
+ }
+
+ for (i = 0; i < obj->nr_programs; i++) {
+ prog = &obj->progs[i];
+ /* sub-program's sub-calls are relocated within the context of
+ * its main program only
+ */
+ if (prog_is_subprog(obj, prog))
+ continue;
+
+ err = relocate_calls(obj, prog);
+ if (err) {
+ pr_warn("prog '%s': failed to relocate calls: %d\n",
+ prog->name, err);
+ return err;
+ }
+
+ err = prog_assign_exc_cb(obj, prog);
+ if (err)
+ return err;
+
+ /* Now, also append exception callback if it has not been done already. */
+ if (prog->exception_cb_idx >= 0) {
+ struct bpf_prog_obj *subprog = &obj->progs[prog->exception_cb_idx];
+
+ /* Calling exception callback directly is disallowed, which the
+ * verifier will reject later. In case it was processed already,
+ * we can skip this step, otherwise for all other valid cases we
+ * have to append exception callback now.
+ */
+ if (subprog->sub_insn_off == 0) {
+ err = append_subprog_code(obj, prog, subprog);
+ if (err)
+ return err;
+ err = reloc_code(obj, prog, subprog);
+ if (err)
+ return err;
+ }
+ }
+ }
+
+ for (i = 0; i < obj->nr_programs; i++) {
+ prog = &obj->progs[i];
+ if (prog_is_subprog(obj, prog))
+ continue;
+
+ /* Process data relos for main programs */
+ err = relocate_data(obj, prog);
+ if (err) {
+ pr_warn("prog '%s': failed to relocate data references: %d\n",
+ prog->name, err);
+ return err;
+ }
+ }
+
+ return 0;
+}
+
static void free_bpf_obj(struct bpf_obj *obj)
{
int i;
@@ -7488,6 +7973,10 @@ static int load_fd(union bpf_attr *attr)
if (err < 0)
goto free;
+ err = relocate_object(obj);
+ if (err < 0)
+ goto free;
+
return obj_f;
free:
free_bpf_obj(obj);
--
2.47.1
next prev parent reply other threads:[~2025-01-09 21:48 UTC|newest]
Thread overview: 30+ messages / expand[flat|nested] mbox.gz Atom feed top
2025-01-09 21:43 [POC][RFC][PATCH] bpf: in-kernel bpf relocations on raw elf files Blaise Boscaccy
2025-01-09 21:43 ` [PATCH 01/14] bpf: Port prerequiste BTF handling functions from userspace Blaise Boscaccy
2025-01-09 21:43 ` [PATCH 02/14] bpf: Add data structures for managing in-kernel eBPF relocations Blaise Boscaccy
2025-01-09 21:43 ` [PATCH 03/14] bpf: Port .btf.ext parsing functions from userspace Blaise Boscaccy
2025-01-09 21:43 ` [PATCH 04/14] bpf: Port elf and btf utility helper " Blaise Boscaccy
2025-01-09 21:43 ` [PATCH 05/14] fs/kernel_read_file: Add an eBPF specifier to kernel_read_file Blaise Boscaccy
2025-01-09 21:43 ` [PATCH 06/14] bpf: Add BPF_LOAD_FD subcommand Blaise Boscaccy
2025-01-09 21:43 ` [PATCH 07/14] bpf: Implement BPF_LOAD_FD subcommand handler Blaise Boscaccy
2025-01-10 6:05 ` Greg KH
2025-01-10 22:41 ` Blaise Boscaccy
2025-01-11 0:41 ` kernel test robot
2025-01-09 21:43 ` [PATCH 08/14] bpf: Add elf parsing support to the BPF_LOAD_FD subcommand Blaise Boscaccy
2025-01-09 21:43 ` [PATCH 09/14] bpf: Collect extern relocations Blaise Boscaccy
2025-01-11 1:35 ` kernel test robot
2025-01-09 21:43 ` [PATCH 10/14] bpf: Implement BTF fixup functionality Blaise Boscaccy
2025-01-11 3:19 ` kernel test robot
2025-01-09 21:43 ` [PATCH 11/14] bpf: Implement relocation collection Blaise Boscaccy
2025-01-09 21:43 ` [PATCH 12/14] bpf: Resolve external relocations Blaise Boscaccy
2025-01-09 21:43 ` Blaise Boscaccy [this message]
2025-01-09 21:43 ` [PATCH 14/14] bpf: Augment BPF_PROG_LOAD to use in-kernel relocations Blaise Boscaccy
2025-01-10 18:40 ` [POC][RFC][PATCH] bpf: in-kernel bpf relocations on raw elf files Alexei Starovoitov
2025-01-10 23:27 ` Blaise Boscaccy
2025-01-13 17:54 ` Alexei Starovoitov
2025-01-14 18:24 ` Blaise Boscaccy
2025-01-24 5:08 ` bpf signing. " Alexei Starovoitov
2025-01-24 7:05 ` John Fastabend
2025-01-28 22:32 ` Blaise Boscaccy
2025-01-30 1:13 ` Cong Wang
2025-01-30 19:22 ` Blaise Boscaccy
2025-02-01 22:24 ` Cong Wang
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20250109214617.485144-14-bboscaccy@linux.microsoft.com \
--to=bboscaccy@linux.microsoft.com \
--cc=bpf@vger.kernel.org \
--cc=code@tyhicks.com \
--cc=flaniel@linux.microsoft.com \
--cc=gregkh@linuxfoundation.org \
--cc=nkapron@google.com \
--cc=paul@paul-moore.com \
--cc=roberto.sassu@huawei.com \
--cc=teknoraver@meta.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).