* [PATCH 01/14] bpf: Port prerequiste BTF handling functions from userspace
2025-01-09 21:43 [POC][RFC][PATCH] bpf: in-kernel bpf relocations on raw elf files Blaise Boscaccy
@ 2025-01-09 21:43 ` Blaise Boscaccy
2025-01-09 21:43 ` [PATCH 02/14] bpf: Add data structures for managing in-kernel eBPF relocations Blaise Boscaccy
` (14 subsequent siblings)
15 siblings, 0 replies; 30+ messages in thread
From: Blaise Boscaccy @ 2025-01-09 21:43 UTC (permalink / raw)
To: bpf; +Cc: nkapron, teknoraver, roberto.sassu, gregkh, paul, code, flaniel
The kernel and userspace btf libraries unfortunately differ in
functionality, API, and scope. There are many functions missing from
the kernel implementation that are used in logic for calculating
instruction relocation metadata for bpf instructions. Here we port
over functions directly from the userspace implementation, as-is, that
are used in these calculations.
Signed-off-by: Blaise Boscaccy <bboscaccy@linux.microsoft.com>
---
include/linux/btf.h | 68 ++++++++++-
kernel/bpf/btf.c | 272 ++++++++++++++++++++++++++++++++++++++++----
2 files changed, 317 insertions(+), 23 deletions(-)
diff --git a/include/linux/btf.h b/include/linux/btf.h
index 4214e76c91686..0c6a4ef47a581 100644
--- a/include/linux/btf.h
+++ b/include/linux/btf.h
@@ -258,6 +258,11 @@ static inline bool btf_type_is_int(const struct btf_type *t)
return BTF_INFO_KIND(t->info) == BTF_KIND_INT;
}
+static inline u8 btf_type_int_bits(const struct btf_type *t)
+{
+ return BTF_INT_BITS(*(__u32 *)(t + 1));
+}
+
static inline bool btf_type_is_small_int(const struct btf_type *t)
{
return btf_type_is_int(t) && t->size <= sizeof(u64);
@@ -278,6 +283,21 @@ static inline bool btf_type_is_enum(const struct btf_type *t)
return BTF_INFO_KIND(t->info) == BTF_KIND_ENUM;
}
+static inline bool btf_is_typedef(const struct btf_type *t)
+{
+ return BTF_INFO_KIND(t->info) == BTF_KIND_TYPEDEF;
+}
+
+static inline bool btf_is_mod(const struct btf_type *t)
+{
+ u16 kind = BTF_INFO_KIND(t->info);
+
+ return kind == BTF_KIND_VOLATILE ||
+ kind == BTF_KIND_CONST ||
+ kind == BTF_KIND_RESTRICT ||
+ kind == BTF_KIND_TYPE_TAG;
+}
+
static inline bool btf_is_any_enum(const struct btf_type *t)
{
return BTF_INFO_KIND(t->info) == BTF_KIND_ENUM ||
@@ -353,6 +373,16 @@ static inline bool btf_type_is_scalar(const struct btf_type *t)
return btf_type_is_int(t) || btf_type_is_enum(t);
}
+static inline bool btf_type_is_mod(const struct btf_type *t)
+{
+ u16 kind = btf_kind(t);
+
+ return kind == BTF_KIND_VOLATILE ||
+ kind == BTF_KIND_CONST ||
+ kind == BTF_KIND_RESTRICT ||
+ kind == BTF_KIND_TYPE_TAG;
+}
+
static inline bool btf_type_is_typedef(const struct btf_type *t)
{
return BTF_INFO_KIND(t->info) == BTF_KIND_TYPEDEF;
@@ -383,6 +413,21 @@ static inline bool btf_type_is_type_tag(const struct btf_type *t)
return BTF_INFO_KIND(t->info) == BTF_KIND_TYPE_TAG;
}
+static inline bool btf_type_is_datasec(const struct btf_type *t)
+{
+ return BTF_INFO_KIND(t->info) == BTF_KIND_DATASEC;
+}
+
+static inline bool btf_is_decl_tag(const struct btf_type *t)
+{
+ return BTF_INFO_KIND(t->info) == BTF_KIND_DECL_TAG;
+}
+
+static inline bool btf_is_func(const struct btf_type *t)
+{
+ return BTF_INFO_KIND(t->info) == BTF_KIND_FUNC;
+}
+
/* union is only a special case of struct:
* all its offsetof(member) == 0
*/
@@ -482,14 +527,19 @@ static inline const struct btf_var_secinfo *btf_type_var_secinfo(
return (const struct btf_var_secinfo *)(t + 1);
}
-static inline struct btf_param *btf_params(const struct btf_type *t)
+static inline struct btf_decl_tag *btf_decl_tag(const struct btf_type *t)
{
- return (struct btf_param *)(t + 1);
+ return (struct btf_decl_tag *)(t + 1);
}
-static inline struct btf_decl_tag *btf_decl_tag(const struct btf_type *t)
+static inline struct btf_var *btf_var(const struct btf_type *t)
{
- return (struct btf_decl_tag *)(t + 1);
+ return (struct btf_var *)(t + 1);
+}
+
+static inline struct btf_param *btf_params(const struct btf_type *t)
+{
+ return (struct btf_param *)(t + 1);
}
static inline int btf_id_cmp_func(const void *a, const void *b)
@@ -517,6 +567,16 @@ int btf_ctx_arg_offset(const struct btf *btf, const struct btf_type *func_proto,
struct bpf_verifier_log;
+struct btf *btf_init_mem(void *btf_data,
+ u32 size,
+ u64 btf_log_buf,
+ u32 btf_log_level,
+ u32 btf_log_size);
+int btf_parse_mem(struct btf *btf);
+const char *btf_str_by_offset(const struct btf *btf, u32 offset);
+u32 btf_type_cnt(const struct btf *btf);
+int btf_align_of(const struct btf *btf, u32 id);
+int btf_add_var(struct btf *btf, int name_off, int linkage, int type_id);
#if defined(CONFIG_BPF_JIT) && defined(CONFIG_BPF_SYSCALL)
struct bpf_struct_ops;
int __register_bpf_struct_ops(struct bpf_struct_ops *st_ops);
diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c
index e7a59e6462a93..02d300b8de0bc 100644
--- a/kernel/bpf/btf.c
+++ b/kernel/bpf/btf.c
@@ -503,11 +503,6 @@ static bool btf_type_is_fwd(const struct btf_type *t)
return BTF_INFO_KIND(t->info) == BTF_KIND_FWD;
}
-static bool btf_type_is_datasec(const struct btf_type *t)
-{
- return BTF_INFO_KIND(t->info) == BTF_KIND_DATASEC;
-}
-
static bool btf_type_is_decl_tag(const struct btf_type *t)
{
return BTF_INFO_KIND(t->info) == BTF_KIND_DECL_TAG;
@@ -1585,10 +1580,8 @@ static void btf_verifier_log_hdr(struct btf_verifier_env *env,
__btf_verifier_log(log, "btf_total_size: %u\n", btf_data_size);
}
-static int btf_add_type(struct btf_verifier_env *env, struct btf_type *t)
+static int btf_add_type(struct btf *btf, struct btf_verifier_env *env, struct btf_type *t)
{
- struct btf *btf = env->btf;
-
if (btf->types_size == btf->nr_types) {
/* Expand 'types' array */
@@ -1630,6 +1623,23 @@ static int btf_add_type(struct btf_verifier_env *env, struct btf_type *t)
return 0;
}
+int btf_add_var(struct btf *btf, int name_off, int linkage, int type_id)
+{
+ struct btf_var *v;
+ struct btf_type *t = kmalloc(sizeof(struct btf_type) + sizeof(struct btf_var), GFP_KERNEL);
+
+ if (!t)
+ return -ENOMEM;
+
+ t->name_off = name_off;
+ t->info = BTF_KIND_VAR;
+ t->type = type_id;
+ v = btf_var(t);
+ v->linkage = linkage;
+
+ return btf_add_type(btf, NULL, t);
+}
+
static int btf_alloc_id(struct btf *btf)
{
int id;
@@ -1965,6 +1975,7 @@ __btf_resolve_size(const struct btf *btf, const struct btf_type *type,
case BTF_KIND_CONST:
case BTF_KIND_RESTRICT:
case BTF_KIND_TYPE_TAG:
+ case BTF_KIND_VAR:
id = type->type;
type = btf_type_by_id(btf, type->type);
break;
@@ -1978,7 +1989,6 @@ __btf_resolve_size(const struct btf *btf, const struct btf_type *type,
nelems *= array->nelems;
type = btf_type_by_id(btf, array->type);
break;
-
/* type without size */
default:
return ERR_PTR(-EINVAL);
@@ -4667,13 +4677,6 @@ static s32 btf_var_check_meta(struct btf_verifier_env *env,
return -EINVAL;
}
- var = btf_type_var(t);
- if (var->linkage != BTF_VAR_STATIC &&
- var->linkage != BTF_VAR_GLOBAL_ALLOCATED) {
- btf_verifier_log_type(env, t, "Linkage not supported");
- return -EINVAL;
- }
-
btf_verifier_log_type(env, t, NULL);
return meta_needed;
@@ -5232,7 +5235,7 @@ static int btf_check_all_metas(struct btf_verifier_env *env)
if (meta_size < 0)
return meta_size;
- btf_add_type(env, t);
+ btf_add_type(btf, env, t);
cur += meta_size;
env->log_type_id++;
}
@@ -5348,6 +5351,71 @@ static int btf_check_all_types(struct btf_verifier_env *env)
return 0;
}
+static int btf_type_size(const struct btf_type *t)
+{
+ const int base_size = sizeof(struct btf_type);
+ __u16 vlen = btf_vlen(t);
+
+ switch (btf_kind(t)) {
+ case BTF_KIND_FWD:
+ case BTF_KIND_CONST:
+ case BTF_KIND_VOLATILE:
+ case BTF_KIND_RESTRICT:
+ case BTF_KIND_PTR:
+ case BTF_KIND_TYPEDEF:
+ case BTF_KIND_FUNC:
+ case BTF_KIND_FLOAT:
+ case BTF_KIND_TYPE_TAG:
+ return base_size;
+ case BTF_KIND_INT:
+ return base_size + sizeof(__u32);
+ case BTF_KIND_ENUM:
+ return base_size + vlen * sizeof(struct btf_enum);
+ case BTF_KIND_ENUM64:
+ return base_size + vlen * sizeof(struct btf_enum64);
+ case BTF_KIND_ARRAY:
+ return base_size + sizeof(struct btf_array);
+ case BTF_KIND_STRUCT:
+ case BTF_KIND_UNION:
+ return base_size + vlen * sizeof(struct btf_member);
+ case BTF_KIND_FUNC_PROTO:
+ return base_size + vlen * sizeof(struct btf_param);
+ case BTF_KIND_VAR:
+ return base_size + sizeof(struct btf_var);
+ case BTF_KIND_DATASEC:
+ return base_size + vlen * sizeof(struct btf_var_secinfo);
+ case BTF_KIND_DECL_TAG:
+ return base_size + sizeof(struct btf_decl_tag);
+ default:
+ pr_debug("Unsupported BTF_KIND:%u\n", btf_kind(t));
+ return -EINVAL;
+ }
+}
+
+static int btf_parse_type_sec_loose(struct btf_verifier_env *env)
+{
+ struct btf *btf = env->btf;
+ struct btf_header *hdr;
+ void *cur, *end;
+
+ hdr = &btf->hdr;
+ cur = btf->nohdr_data + hdr->type_off;
+ end = cur + hdr->type_len;
+
+ env->log_type_id = btf->base_btf ? btf->start_id : 1;
+ while (cur < end) {
+ struct btf_type *t = cur;
+ s32 meta_size;
+
+ meta_size = btf_type_size(t);
+ btf_add_type(btf, env, t);
+ cur += meta_size;
+ env->log_type_id++;
+ }
+
+ return 0;
+}
+
static int btf_parse_type_sec(struct btf_verifier_env *env)
{
const struct btf_header *hdr = &env->btf->hdr;
@@ -5367,7 +5435,6 @@ static int btf_parse_type_sec(struct btf_verifier_env *env)
err = btf_check_all_metas(env);
if (err)
return err;
-
return btf_check_all_types(env);
}
@@ -5736,6 +5803,173 @@ static int finalize_log(struct bpf_verifier_log *log, bpfptr_t uattr, u32 uattr_
return err;
}
+u32 btf_type_cnt(const struct btf *btf)
+{
+ return btf->start_id + btf->nr_types;
+}
+
+static u32 determine_ptr_size(const struct btf *btf)
+{
+ static const char * const long_aliases[] = {
+ "long",
+ "long int",
+ "int long",
+ "unsigned long",
+ "long unsigned",
+ "unsigned long int",
+ "unsigned int long",
+ "long unsigned int",
+ "long int unsigned",
+ "int unsigned long",
+ "int long unsigned",
+ };
+ const struct btf_type *t;
+ const char *name;
+ int i, j, n;
+
+ n = btf_type_cnt(btf);
+ for (i = 1; i < n; i++) {
+ t = btf_type_by_id(btf, i);
+ if (!btf_type_is_int(t))
+ continue;
+
+ if (t->size != 4 && t->size != 8)
+ continue;
+
+ name = btf_str_by_offset(btf, t->name_off);
+ if (!name)
+ continue;
+
+ for (j = 0; j < ARRAY_SIZE(long_aliases); j++) {
+ if (strcmp(name, long_aliases[j]) == 0)
+ return t->size;
+ }
+ }
+
+ return -1;
+}
+
+int btf_align_of(const struct btf *btf, u32 id)
+{
+ const struct btf_type *t = btf_type_by_id(btf, id);
+ __u16 kind = btf_kind(t);
+
+ switch (kind) {
+ case BTF_KIND_INT:
+ case BTF_KIND_ENUM:
+ case BTF_KIND_ENUM64:
+ case BTF_KIND_FLOAT:
+ return min(determine_ptr_size(btf), (size_t)t->size);
+ case BTF_KIND_PTR:
+ return determine_ptr_size(btf);
+ case BTF_KIND_TYPEDEF:
+ case BTF_KIND_VOLATILE:
+ case BTF_KIND_CONST:
+ case BTF_KIND_RESTRICT:
+ case BTF_KIND_TYPE_TAG:
+ return btf_align_of(btf, t->type);
+ case BTF_KIND_ARRAY:
+ return btf_align_of(btf, btf_array(t)->type);
+ case BTF_KIND_STRUCT:
+ case BTF_KIND_UNION: {
+ const struct btf_member *m = btf_members(t);
+ __u16 vlen = btf_vlen(t);
+ int i, max_align = 1, align;
+
+ for (i = 0; i < vlen; i++, m++) {
+ align = btf_align_of(btf, m->type);
+ if (align <= 0)
+ return -EINVAL;
+ max_align = max(max_align, align);
+
+ /* if field offset isn't aligned according to field
+ * type's alignment, then struct must be packed
+ */
+ if (btf_member_bitfield_size(t, i) == 0 &&
+ (m->offset % (8 * align)) != 0)
+ return 1;
+ }
+
+ /* if struct/union size isn't a multiple of its alignment,
+ * then struct must be packed
+ */
+ if ((t->size % max_align) != 0)
+ return 1;
+
+ return max_align;
+ }
+ default:
+ pr_warn("unsupported BTF_KIND:%u\n", btf_kind(t));
+ return -EINVAL;
+ }
+}
+
+struct btf *btf_init_mem(void *btf_data,
+ u32 size,
+ u64 btf_log_buf,
+ u32 btf_log_level,
+ u32 btf_log_size)
+{
+ struct btf_verifier_env *env = NULL;
+ char __user *log_ubuf = u64_to_user_ptr(btf_log_buf);
+ struct btf *btf = NULL;
+ u8 *data;
+ int err;
+
+ env = kzalloc(sizeof(*env), GFP_KERNEL | __GFP_NOWARN);
+ if (!env)
+ return ERR_PTR(-ENOMEM);
+
+ err = bpf_vlog_init(&env->log, btf_log_level,
+ log_ubuf, btf_log_size);
+ if (err)
+ goto errout_free;
+
+ btf = kzalloc(sizeof(*btf), GFP_KERNEL | __GFP_NOWARN);
+ if (!btf) {
+ err = -ENOMEM;
+ goto errout;
+ }
+ env->btf = btf;
+
+ data = kvmalloc(size, GFP_KERNEL | __GFP_NOWARN);
+ if (!data) {
+ err = -ENOMEM;
+ goto errout;
+ }
+
+ btf->data = data;
+ btf->data_size = size;
+
+ memcpy(btf->data, btf_data, size);
+
+ err = btf_parse_hdr(env);
+ if (err)
+ goto errout;
+
+ btf->nohdr_data = btf->data + btf->hdr.hdr_len;
+
+ err = btf_parse_str_sec(env);
+ if (err)
+ goto errout;
+
+
+ err = btf_parse_type_sec_loose(env);
+ if (err)
+ goto errout;
+
+ btf_verifier_env_free(env);
+ refcount_set(&btf->refcnt, 1);
+ return btf;
+
+errout:
+errout_free:
+ btf_verifier_env_free(env);
+ if (btf)
+ btf_free(btf);
+ return ERR_PTR(err);
+}
+
static struct btf *btf_parse(const union bpf_attr *attr, bpfptr_t uattr, u32 uattr_size)
{
bpfptr_t btf_data = make_bpfptr(attr->btf, uattr.is_kernel);
@@ -9045,7 +9279,7 @@ int bpf_core_apply(struct bpf_core_ctx *ctx, const struct bpf_core_relo *relo,
if (need_cands) {
kfree(cands.cands);
mutex_unlock(&cand_cache_mutex);
- if (ctx->log->level & BPF_LOG_LEVEL2)
+ if (ctx->log && ctx->log->level & BPF_LOG_LEVEL2)
print_cand_cache(ctx->log);
}
return err;
--
2.47.1
^ permalink raw reply related [flat|nested] 30+ messages in thread* [PATCH 02/14] bpf: Add data structures for managing in-kernel eBPF relocations
2025-01-09 21:43 [POC][RFC][PATCH] bpf: in-kernel bpf relocations on raw elf files Blaise Boscaccy
2025-01-09 21:43 ` [PATCH 01/14] bpf: Port prerequiste BTF handling functions from userspace Blaise Boscaccy
@ 2025-01-09 21:43 ` Blaise Boscaccy
2025-01-09 21:43 ` [PATCH 03/14] bpf: Port .btf.ext parsing functions from userspace Blaise Boscaccy
` (13 subsequent siblings)
15 siblings, 0 replies; 30+ messages in thread
From: Blaise Boscaccy @ 2025-01-09 21:43 UTC (permalink / raw)
To: bpf; +Cc: nkapron, teknoraver, roberto.sassu, gregkh, paul, code, flaniel
Here we introduce several new structs used in the management of ebpf
instruction relocation metadata. These structs are heavily inspired by
existing definitions from libbpf, albeit a bit stripped down for
kernel use along with some semantical changes due to differing elf
abstractions between userspace and kernelspace.
Additionally we introduce several struct definitions and macros for
the handling of .btf.ext sections which are utilized by libbpf but
missing from the kernel.
Signed-off-by: Blaise Boscaccy <bboscaccy@linux.microsoft.com>
---
include/linux/bpf.h | 257 ++++++++++++++++++++++++++++++++++++++++++++
1 file changed, 257 insertions(+)
diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 3ace0d6227e3c..0859e71e2641c 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -1631,6 +1631,263 @@ struct bpf_prog {
};
};
+struct btf_ext_info {
+ /*
+ * info points to the individual info section (e.g. func_info and
+ * line_info) from the .BTF.ext. It does not include the __u32 rec_size.
+ */
+ void *info;
+ __u32 rec_size;
+ __u32 len;
+ /* optional (maintained internally by libbpf) mapping between .BTF.ext
+ * section and corresponding ELF section. This is used to join
+ * information like CO-RE relocation records with corresponding BPF
+ * programs defined in ELF sections
+ */
+ __u32 *sec_idxs;
+ int sec_cnt;
+};
+
+#define for_each_btf_ext_sec(seg, sec) \
+ for (sec = (seg)->info; \
+ (void *)sec < (seg)->info + (seg)->len; \
+ sec = (void *)sec + sizeof(struct btf_ext_info_sec) + \
+ (seg)->rec_size * sec->num_info)
+
+#define for_each_btf_ext_rec(seg, sec, i, rec) \
+ for (i = 0, rec = (void *)&(sec)->data; \
+ i < (sec)->num_info; \
+ i++, rec = (void *)rec + (seg)->rec_size)
+
+/*
+ * The .BTF.ext ELF section layout defined as
+ * struct btf_ext_header
+ * func_info subsection
+ *
+ * The func_info subsection layout:
+ * record size for struct bpf_func_info in the func_info subsection
+ * struct btf_sec_func_info for section #1
+ * a list of bpf_func_info records for section #1
+ * where struct bpf_func_info mimics one in include/uapi/linux/bpf.h
+ * but may not be identical
+ * struct btf_sec_func_info for section #2
+ * a list of bpf_func_info records for section #2
+ * ......
+ *
+ * Note that the bpf_func_info record size in .BTF.ext may not
+ * be the same as the one defined in include/uapi/linux/bpf.h.
+ * The loader should ensure that record_size meets minimum
+ * requirement and pass the record as is to the kernel. The
+ * kernel will handle the func_info properly based on its contents.
+ */
+struct btf_ext_header {
+ __u16 magic;
+ __u8 version;
+ __u8 flags;
+ __u32 hdr_len;
+
+ /* All offsets are in bytes relative to the end of this header */
+ __u32 func_info_off;
+ __u32 func_info_len;
+ __u32 line_info_off;
+ __u32 line_info_len;
+
+ /* optional part of .BTF.ext header */
+ __u32 core_relo_off;
+ __u32 core_relo_len;
+};
+
+struct btf_ext {
+ union {
+ struct btf_ext_header *hdr;
+ void *data;
+ };
+ struct btf_ext_info func_info;
+ struct btf_ext_info line_info;
+ struct btf_ext_info core_relo_info;
+ __u32 data_size;
+};
+
+struct btf_ext_info_sec {
+ __u32 sec_name_off;
+ __u32 num_info;
+ /* Followed by num_info * record_size number of bytes */
+ __u8 data[];
+};
+
+
+enum bpf_reloc_type {
+ RELO_LD64,
+ RELO_CALL,
+ RELO_DATA,
+ RELO_EXTERN_LD64,
+ RELO_EXTERN_CALL,
+ RELO_SUBPROG_ADDR,
+ RELO_CORE,
+};
+
+struct bpf_reloc_desc {
+ enum bpf_reloc_type type;
+ int insn_idx;
+ union {
+ const struct bpf_core_relo *core_relo; /* used when type == RELO_CORE */
+ struct {
+ int map_idx;
+ int sym_off;
+ int ext_idx;
+ };
+ };
+};
+
+enum bpf_extern_type {
+ EXT_UNKNOWN,
+ EXT_KCFG,
+ EXT_KSYM,
+};
+
+enum bpf_kcfg_type {
+ KCFG_UNKNOWN,
+ KCFG_CHAR,
+ KCFG_BOOL,
+ KCFG_INT,
+ KCFG_TRISTATE,
+ KCFG_CHAR_ARR,
+};
+
+struct bpf_extern_desc {
+ enum bpf_extern_type type;
+ int sym_idx;
+ int btf_id;
+ int sec_btf_id;
+ const char *name;
+ char *essent_name;
+ bool is_set;
+ bool is_weak;
+ union {
+ struct {
+ enum bpf_kcfg_type type;
+ int sz;
+ int align;
+ int data_off;
+ bool is_signed;
+ } kcfg;
+ struct {
+ unsigned long long addr;
+
+ /* target btf_id of the corresponding kernel var. */
+ int kernel_btf_obj_fd;
+ int kernel_btf_id;
+
+ /* local btf_id of the ksym extern's type. */
+ __u32 type_id;
+ /* BTF fd index to be patched in for insn->off, this is
+ * 0 for vmlinux BTF, index in obj->fd_array for module
+ * BTF
+ */
+ __s16 btf_fd_idx;
+ } ksym;
+ };
+};
+
+
+struct bpf_prog_obj {
+ char *name;
+
+ struct bpf_insn *insn;
+ unsigned int insn_cnt;
+
+ size_t sec_idx;
+ size_t sec_insn_off;
+ size_t sec_insn_cnt;
+ size_t sub_insn_off;
+
+ struct bpf_reloc_desc *reloc_desc;
+ int nr_reloc;
+
+ int exception_cb_idx;
+
+};
+
+struct bpf_st_ops {
+ const char *tname;
+ const struct btf_type *type;
+ struct bpf_program **progs;
+ __u32 *kern_func_off;
+ /* e.g. struct tcp_congestion_ops in bpf_prog's btf format */
+ void *data;
+ /* e.g. struct bpf_struct_ops_tcp_congestion_ops in
+ * btf_vmlinux's format.
+ * struct bpf_struct_ops_tcp_congestion_ops {
+ * [... some other kernel fields ...]
+ * struct tcp_congestion_ops data;
+ * }
+ * kern_vdata-size == sizeof(struct bpf_struct_ops_tcp_congestion_ops)
+ * bpf_map__init_kern_struct_ops() will populate the "kern_vdata"
+ * from "data".
+ */
+ void *kern_vdata;
+ __u32 type_id;
+};
+
+enum libbpf_map_type {
+ LIBBPF_MAP_UNSPEC,
+ LIBBPF_MAP_DATA,
+ LIBBPF_MAP_BSS,
+ LIBBPF_MAP_RODATA,
+ LIBBPF_MAP_KCONFIG,
+};
+
+struct bpf_map_obj {
+ u32 map_type;
+ u32 fd;
+ u32 sec_idx;
+ u32 sec_offset;
+};
+
+struct bpf_module_obj {
+ u32 id;
+ u32 fd;
+ u32 fd_array_idx;
+};
+
+struct bpf_module_btf {
+ struct btf *btf;
+ u32 id;
+ int fd;
+ int fd_array_idx;
+};
+
+struct bpf_obj {
+ u32 nr_programs;
+ Elf_Ehdr *hdr;
+ unsigned long len;
+ Elf_Shdr *sechdrs;
+ char *secstrings, *strtab;
+
+ struct {
+ unsigned int sym, str, btf, btf_ext, text, arena;
+ } index;
+
+ struct bpf_prog_obj *progs;
+
+ struct btf *btf;
+ struct btf_ext *btf_ext;
+
+ struct bpf_extern_desc *externs;
+ int nr_extern;
+
+ struct bpf_map_obj *maps;
+ int nr_maps;
+
+ int arena_map_idx;
+ int kconfig_map_idx;
+
+ struct btf *btf_vmlinux;
+
+ struct bpf_module_btf *btf_modules;
+ int btf_modules_cnt;
+};
+
struct bpf_array_aux {
/* Programs with direct jumps into programs part of this array. */
struct list_head poke_progs;
--
2.47.1
^ permalink raw reply related [flat|nested] 30+ messages in thread* [PATCH 03/14] bpf: Port .btf.ext parsing functions from userspace
2025-01-09 21:43 [POC][RFC][PATCH] bpf: in-kernel bpf relocations on raw elf files Blaise Boscaccy
2025-01-09 21:43 ` [PATCH 01/14] bpf: Port prerequiste BTF handling functions from userspace Blaise Boscaccy
2025-01-09 21:43 ` [PATCH 02/14] bpf: Add data structures for managing in-kernel eBPF relocations Blaise Boscaccy
@ 2025-01-09 21:43 ` Blaise Boscaccy
2025-01-09 21:43 ` [PATCH 04/14] bpf: Port elf and btf utility helper " Blaise Boscaccy
` (12 subsequent siblings)
15 siblings, 0 replies; 30+ messages in thread
From: Blaise Boscaccy @ 2025-01-09 21:43 UTC (permalink / raw)
To: bpf; +Cc: nkapron, teknoraver, roberto.sassu, gregkh, paul, code, flaniel
Functionality to parse .btf.ext sections of elf files is currently
missing from the kernel. This code simply copies some needed functions
from tools/lib/bpf/btf.c to aid in porting to minimize changes to code
ported over from libbpf.
Signed-off-by: Blaise Boscaccy <bboscaccy@linux.microsoft.com>
---
kernel/bpf/syscall.c | 247 +++++++++++++++++++++++++++++++++++++++++++
1 file changed, 247 insertions(+)
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 58190ca724a26..907cc0b34f822 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -1,5 +1,6 @@
// SPDX-License-Identifier: GPL-2.0-only
/* Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com
+ * Copyright (c) 2018 Facebook
*/
#include <linux/bpf.h>
#include <linux/bpf-cgroup.h>
@@ -5717,6 +5718,252 @@ static int token_create(union bpf_attr *attr)
return bpf_token_create(attr);
}
+struct btf_ext_sec_setup_param {
+ __u32 off;
+ __u32 len;
+ __u32 min_rec_size;
+ struct btf_ext_info *ext_info;
+ const char *desc;
+};
+
+struct bpf_func_info_min {
+ __u32 insn_off;
+ __u32 type_id;
+};
+
+/* The minimum bpf_line_info checked by the loader */
+struct bpf_line_info_min {
+ __u32 insn_off;
+ __u32 file_name_off;
+ __u32 line_off;
+ __u32 line_col;
+};
+
+static int btf_ext_setup_info(struct btf_ext *btf_ext,
+ struct btf_ext_sec_setup_param *ext_sec)
+{
+ const struct btf_ext_info_sec *sinfo;
+ struct btf_ext_info *ext_info;
+ __u32 info_left, record_size;
+ size_t sec_cnt = 0;
+ /* The start of the info sec (including the __u32 record_size). */
+ void *info;
+
+ if (ext_sec->len == 0)
+ return 0;
+
+ if (ext_sec->off & 0x03) {
+ pr_debug(".BTF.ext %s section is not aligned to 4 bytes\n",
+ ext_sec->desc);
+ return -EINVAL;
+ }
+
+ info = btf_ext->data + btf_ext->hdr->hdr_len + ext_sec->off;
+ info_left = ext_sec->len;
+
+ if (btf_ext->data + btf_ext->data_size < info + ext_sec->len) {
+ pr_debug("%s section (off:%u len:%u) is beyond the end of the ELF section .BTF.ext\n",
+ ext_sec->desc, ext_sec->off, ext_sec->len);
+ return -EINVAL;
+ }
+
+ /* At least a record size */
+ if (info_left < sizeof(__u32)) {
+ pr_debug(".BTF.ext %s record size not found\n", ext_sec->desc);
+ return -EINVAL;
+ }
+
+ /* The record size needs to meet the minimum standard */
+ record_size = *(__u32 *)info;
+ if (record_size < ext_sec->min_rec_size ||
+ record_size & 0x03) {
+ pr_debug("%s section in .BTF.ext has invalid record size %u\n",
+ ext_sec->desc, record_size);
+ return -EINVAL;
+ }
+
+ sinfo = info + sizeof(__u32);
+ info_left -= sizeof(__u32);
+
+ /* If no records, return failure now so .BTF.ext won't be used. */
+ if (!info_left) {
+ pr_debug("%s section in .BTF.ext has no records", ext_sec->desc);
+ return -EINVAL;
+ }
+
+ while (info_left) {
+ unsigned int sec_hdrlen = sizeof(struct btf_ext_info_sec);
+ __u64 total_record_size;
+ __u32 num_records;
+
+ if (info_left < sec_hdrlen) {
+ pr_debug("%s section header is not found in .BTF.ext\n",
+ ext_sec->desc);
+ return -EINVAL;
+ }
+
+ num_records = sinfo->num_info;
+ if (num_records == 0) {
+ pr_debug("%s section has incorrect num_records in .BTF.ext\n",
+ ext_sec->desc);
+ return -EINVAL;
+ }
+
+ total_record_size = sec_hdrlen + (__u64)num_records * record_size;
+ if (info_left < total_record_size) {
+ pr_debug("%s section has incorrect num_records in .BTF.ext\n",
+ ext_sec->desc);
+ return -EINVAL;
+ }
+
+ info_left -= total_record_size;
+ sinfo = (void *)sinfo + total_record_size;
+ sec_cnt++;
+ }
+
+ ext_info = ext_sec->ext_info;
+ ext_info->len = ext_sec->len - sizeof(__u32);
+ ext_info->rec_size = record_size;
+ ext_info->info = info + sizeof(__u32);
+ ext_info->sec_cnt = sec_cnt;
+
+ return 0;
+}
+
+static int btf_ext_setup_func_info(struct btf_ext *btf_ext)
+{
+ struct btf_ext_sec_setup_param param = {
+ .off = btf_ext->hdr->func_info_off,
+ .len = btf_ext->hdr->func_info_len,
+ .min_rec_size = sizeof(struct bpf_func_info_min),
+ .ext_info = &btf_ext->func_info,
+ .desc = "func_info"
+ };
+
+ return btf_ext_setup_info(btf_ext, ¶m);
+}
+
+static int btf_ext_setup_line_info(struct btf_ext *btf_ext)
+{
+ struct btf_ext_sec_setup_param param = {
+ .off = btf_ext->hdr->line_info_off,
+ .len = btf_ext->hdr->line_info_len,
+ .min_rec_size = sizeof(struct bpf_line_info_min),
+ .ext_info = &btf_ext->line_info,
+ .desc = "line_info",
+ };
+
+ return btf_ext_setup_info(btf_ext, ¶m);
+}
+
+static int btf_ext_setup_core_relos(struct btf_ext *btf_ext)
+{
+ struct btf_ext_sec_setup_param param = {
+ .off = btf_ext->hdr->core_relo_off,
+ .len = btf_ext->hdr->core_relo_len,
+ .min_rec_size = sizeof(struct bpf_core_relo),
+ .ext_info = &btf_ext->core_relo_info,
+ .desc = "core_relo",
+ };
+
+ return btf_ext_setup_info(btf_ext, ¶m);
+}
+
+static int btf_ext_parse_hdr(__u8 *data, __u32 data_size)
+{
+
+ const struct btf_ext_header *hdr = (struct btf_ext_header *)data;
+
+ if (data_size < offsetofend(struct btf_ext_header, hdr_len) ||
+ data_size < hdr->hdr_len) {
+ pr_debug("BTF.ext header not found");
+ return -EINVAL;
+ }
+
+ if (hdr->magic != BTF_MAGIC) {
+ pr_debug("Invalid BTF.ext magic:%x\n", hdr->magic);
+ return -EINVAL;
+ }
+
+ if (hdr->version != BTF_VERSION) {
+ pr_debug("Unsupported BTF.ext version:%u\n", hdr->version);
+ return -EOPNOTSUPP;
+ }
+
+ if (hdr->flags) {
+ pr_debug("Unsupported BTF.ext flags:%x\n", hdr->flags);
+ return -EOPNOTSUPP;
+ }
+
+ if (data_size == hdr->hdr_len) {
+ pr_debug("BTF.ext has no data\n");
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static void btf_ext__free(struct btf_ext *btf_ext)
+{
+ if (IS_ERR_OR_NULL(btf_ext))
+ return;
+ kfree(btf_ext->func_info.sec_idxs);
+ kfree(btf_ext->line_info.sec_idxs);
+ kfree(btf_ext->core_relo_info.sec_idxs);
+ kfree(btf_ext->data);
+ kfree(btf_ext);
+}
+
+static struct btf_ext *btf_ext__new(const __u8 *data, __u32 size)
+{
+ struct btf_ext *btf_ext;
+ int err;
+
+ btf_ext = kzalloc(sizeof(struct btf_ext), GFP_KERNEL);
+ if (!btf_ext)
+ return ERR_PTR(-ENOMEM);
+
+ btf_ext->data_size = size;
+ btf_ext->data = kmalloc(size, GFP_KERNEL);
+ if (!btf_ext->data) {
+ err = -ENOMEM;
+ goto done;
+ }
+ memcpy(btf_ext->data, data, size);
+
+ err = btf_ext_parse_hdr(btf_ext->data, size);
+ if (err)
+ goto done;
+
+ if (btf_ext->hdr->hdr_len < offsetofend(struct btf_ext_header, line_info_len)) {
+ err = -EINVAL;
+ goto done;
+ }
+
+ err = btf_ext_setup_func_info(btf_ext);
+ if (err)
+ goto done;
+
+ err = btf_ext_setup_line_info(btf_ext);
+ if (err)
+ goto done;
+
+ if (btf_ext->hdr->hdr_len < offsetofend(struct btf_ext_header, core_relo_len))
+ goto done; /* skip core relos parsing */
+
+ err = btf_ext_setup_core_relos(btf_ext);
+ if (err)
+ goto done;
+
+done:
+ if (err) {
+ btf_ext__free(btf_ext);
+ return ERR_PTR(err);
+ }
+
+ return btf_ext;
+}
+
static int __sys_bpf(enum bpf_cmd cmd, bpfptr_t uattr, unsigned int size)
{
union bpf_attr attr;
--
2.47.1
^ permalink raw reply related [flat|nested] 30+ messages in thread* [PATCH 04/14] bpf: Port elf and btf utility helper functions from userspace
2025-01-09 21:43 [POC][RFC][PATCH] bpf: in-kernel bpf relocations on raw elf files Blaise Boscaccy
` (2 preceding siblings ...)
2025-01-09 21:43 ` [PATCH 03/14] bpf: Port .btf.ext parsing functions from userspace Blaise Boscaccy
@ 2025-01-09 21:43 ` Blaise Boscaccy
2025-01-09 21:43 ` [PATCH 05/14] fs/kernel_read_file: Add an eBPF specifier to kernel_read_file Blaise Boscaccy
` (11 subsequent siblings)
15 siblings, 0 replies; 30+ messages in thread
From: Blaise Boscaccy @ 2025-01-09 21:43 UTC (permalink / raw)
To: bpf; +Cc: nkapron, teknoraver, roberto.sassu, gregkh, paul, code, flaniel
This code is heavily borrowed from libbpf and is used in the
subsequent commits porting relocation functions from libbbf.
Signed-off-by: Blaise Boscaccy <bboscaccy@linux.microsoft.com>
---
kernel/bpf/syscall.c | 110 +++++++++++++++++++++++++++++++++++++++++++
1 file changed, 110 insertions(+)
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 907cc0b34f822..dc763772b55e5 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -5964,6 +5964,116 @@ static struct btf_ext *btf_ext__new(const __u8 *data, __u32 size)
return btf_ext;
}
+static int elf_sec_idx_by_name(struct bpf_obj *obj, const char *name)
+{
+ int i;
+ Elf_Shdr *shdr;
+
+ for (i = 1; i < obj->hdr->e_shnum; i++) {
+ shdr = &obj->sechdrs[i];
+ if (strcmp(name, obj->secstrings + shdr->sh_name) == 0)
+ return i;
+ }
+ return -ENOENT;
+}
+
+static const struct btf_var *btf_type_var(const struct btf_type *t)
+{
+ return (const struct btf_var *)(t + 1);
+}
+
+static int find_extern_btf_id(const struct btf *btf, const char *ext_name)
+{
+ const struct btf_type *t;
+ const char *tname;
+ int i, n;
+
+ if (!btf)
+ return -ESRCH;
+
+ n = btf_type_cnt(btf);
+
+ for (i = 1; i < n; i++) {
+ t = btf_type_by_id(btf, i);
+
+ if (!btf_type_is_var(t) && !btf_type_is_func(t))
+ continue;
+
+ tname = btf_str_by_offset(btf, t->name_off);
+ if (strcmp(tname, ext_name))
+ continue;
+
+ if (btf_type_is_var(t) &&
+ btf_type_var(t)->linkage != BTF_VAR_GLOBAL_EXTERN)
+ return -EINVAL;
+
+ if (btf_type_is_func(t) && btf_func_linkage(t) != BTF_FUNC_EXTERN)
+ return -EINVAL;
+
+ return i;
+ }
+
+ return -ENOENT;
+}
+
+static inline struct btf_var_secinfo *
+btf_var_secinfos(const struct btf_type *t)
+{
+ return (struct btf_var_secinfo *)(t + 1);
+}
+
+static int find_extern_sec_btf_id(struct btf *btf, int ext_btf_id)
+{
+ const struct btf_var_secinfo *vs;
+ const struct btf_type *t;
+ int i, j, n;
+
+ if (!btf)
+ return -ESRCH;
+
+ n = btf_type_cnt(btf);
+ for (i = 1; i < n; i++) {
+ t = btf_type_by_id(btf, i);
+
+ if (!btf_type_is_datasec(t))
+ continue;
+
+ vs = btf_var_secinfos(t);
+ for (j = 0; j < btf_vlen(t); j++, vs++) {
+ if (vs->type == ext_btf_id)
+ return i;
+ }
+ }
+
+ return -ENOENT;
+}
+
+static bool sym_is_extern(const Elf64_Sym *sym)
+{
+ int bind = ELF64_ST_BIND(sym->st_info);
+ /* externs are symbols w/ type=NOTYPE, bind=GLOBAL|WEAK, section=UND */
+ return sym->st_shndx == SHN_UNDEF &&
+ (bind == STB_GLOBAL || bind == STB_WEAK) &&
+ ELF64_ST_TYPE(sym->st_info) == STT_NOTYPE;
+}
+
+static const struct btf_type *
+skip_mods_and_typedefs(const struct btf *btf, u32 id, u32 *res_id)
+{
+ const struct btf_type *t = btf_type_by_id(btf, id);
+
+ if (res_id)
+ *res_id = id;
+
+ while (btf_type_is_mod(t) || btf_type_is_typedef(t)) {
+ if (res_id)
+ *res_id = t->type;
+ t = btf_type_by_id(btf, t->type);
+ }
+
+ return t;
+}
+
static int __sys_bpf(enum bpf_cmd cmd, bpfptr_t uattr, unsigned int size)
{
union bpf_attr attr;
--
2.47.1
^ permalink raw reply related [flat|nested] 30+ messages in thread* [PATCH 05/14] fs/kernel_read_file: Add an eBPF specifier to kernel_read_file
2025-01-09 21:43 [POC][RFC][PATCH] bpf: in-kernel bpf relocations on raw elf files Blaise Boscaccy
` (3 preceding siblings ...)
2025-01-09 21:43 ` [PATCH 04/14] bpf: Port elf and btf utility helper " Blaise Boscaccy
@ 2025-01-09 21:43 ` Blaise Boscaccy
2025-01-09 21:43 ` [PATCH 06/14] bpf: Add BPF_LOAD_FD subcommand Blaise Boscaccy
` (10 subsequent siblings)
15 siblings, 0 replies; 30+ messages in thread
From: Blaise Boscaccy @ 2025-01-09 21:43 UTC (permalink / raw)
To: bpf; +Cc: nkapron, teknoraver, roberto.sassu, gregkh, paul, code, flaniel
This allows for BPF_LOAD_FD to integrate with the existing LSM
infrastructure to make policy decisions about loading eBPF objects.
Signed-off-by: Blaise Boscaccy <bboscaccy@linux.microsoft.com>
---
include/linux/kernel_read_file.h | 1 +
1 file changed, 1 insertion(+)
diff --git a/include/linux/kernel_read_file.h b/include/linux/kernel_read_file.h
index 90451e2e12bd1..39f9ed584df5a 100644
--- a/include/linux/kernel_read_file.h
+++ b/include/linux/kernel_read_file.h
@@ -14,6 +14,7 @@
id(KEXEC_INITRAMFS, kexec-initramfs) \
id(POLICY, security-policy) \
id(X509_CERTIFICATE, x509-certificate) \
+ id(EBPF, ebf-object) \
id(MAX_ID, )
#define __fid_enumify(ENUM, dummy) READING_ ## ENUM,
--
2.47.1
^ permalink raw reply related [flat|nested] 30+ messages in thread* [PATCH 06/14] bpf: Add BPF_LOAD_FD subcommand
2025-01-09 21:43 [POC][RFC][PATCH] bpf: in-kernel bpf relocations on raw elf files Blaise Boscaccy
` (4 preceding siblings ...)
2025-01-09 21:43 ` [PATCH 05/14] fs/kernel_read_file: Add an eBPF specifier to kernel_read_file Blaise Boscaccy
@ 2025-01-09 21:43 ` Blaise Boscaccy
2025-01-09 21:43 ` [PATCH 07/14] bpf: Implement BPF_LOAD_FD subcommand handler Blaise Boscaccy
` (9 subsequent siblings)
15 siblings, 0 replies; 30+ messages in thread
From: Blaise Boscaccy @ 2025-01-09 21:43 UTC (permalink / raw)
To: bpf; +Cc: nkapron, teknoraver, roberto.sassu, gregkh, paul, code, flaniel
Here we define a new subcommand for the bpf syscall.
The new subcommand takes a file descriptor to a raw elf object file,
an a array of file descriptors for maps created in userspace, and a
file descriptor pointing to a sysfs entry that is later used to store
relocated instructions.
Additionally some book-keeping data for kconfig and arena map offsets
is passed in along with file descriptors corresponding to any kernel
modules being used.
The basic strategy employed with BPF_LOAD_FD is to allow userspace
and libbpf to continue to operate as they do now with respect to maps,
while deferring all of the relocation to the kernel so that userspace
isn't required to process the program before loading.
Signed-off-by: Blaise Boscaccy <bboscaccy@linux.microsoft.com>
---
include/uapi/linux/bpf.h | 28 ++++++++++++++++++++++++++++
kernel/bpf/syscall.c | 2 +-
tools/include/uapi/linux/bpf.h | 14 ++++++++++++++
3 files changed, 43 insertions(+), 1 deletion(-)
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 4162afc6b5d0d..6dd01db541c26 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -902,6 +902,20 @@ union bpf_iter_link_info {
* Return
* A new file descriptor (a nonnegative integer), or -1 if an
* error occurred (in which case, *errno* is set appropriately).
+ * BPF_LOAD_FD
+ * Description
+ * Load a file descriptor corresponding to a raw elf object file
+ * into the kernel, and associate it with a sysfs entry. The
+ * kernel will then perform relocation calculations and instruction
+ * rewriting on behalf of the user.
+ *
+ * Programs contained in the elf file can later be loaded via
+ * BPF_PROG_LOAD, by passing in a sysfs file descirptor along with
+ * the symbol name of the program.
+ *
+ * Return
+ * Returns zero on success. On error, -1 is returned and *errno*
+ * is set appropriately.
*
* NOTES
* eBPF objects (maps and programs) can be shared between processes.
@@ -958,6 +972,7 @@ enum bpf_cmd {
BPF_LINK_DETACH,
BPF_PROG_BIND_MAP,
BPF_TOKEN_CREATE,
+ BPF_LOAD_FD,
__MAX_BPF_CMD,
};
@@ -1573,6 +1588,8 @@ union bpf_attr {
* If provided, prog_flags should have BPF_F_TOKEN_FD flag set.
*/
__s32 prog_token_fd;
+ __s32 prog_loader_fd;
+ __aligned_u64 symbol_loader_name;
};
struct { /* anonymous struct used by BPF_OBJ_* commands */
@@ -1827,6 +1844,17 @@ union bpf_attr {
__u32 bpffs_fd;
} token_create;
+ struct { /* struct used by BPF_PROG_LOAD command */
+ __u32 bpffs_fd;
+ __u32 obj_fd;
+ __aligned_u64 maps;
+ __u32 map_cnt;
+ __s32 kconfig_map_idx;
+ __s32 arena_map_idx;
+ __aligned_u64 modules;
+ __u32 module_cnt;
+ } load_fd;
+
} __attribute__((aligned(8)));
/* The description below is an attempt at providing documentation to eBPF
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index dc763772b55e5..37e45145e113b 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -2730,7 +2730,7 @@ static bool is_perfmon_prog_type(enum bpf_prog_type prog_type)
}
/* last field in 'union bpf_attr' used by this command */
-#define BPF_PROG_LOAD_LAST_FIELD prog_token_fd
+#define BPF_PROG_LOAD_LAST_FIELD symbol_loader_name
static int bpf_prog_load(union bpf_attr *attr, bpfptr_t uattr, u32 uattr_size)
{
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index 4162afc6b5d0d..89d47c8c43c79 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -958,6 +958,7 @@ enum bpf_cmd {
BPF_LINK_DETACH,
BPF_PROG_BIND_MAP,
BPF_TOKEN_CREATE,
+ BPF_LOAD_FD,
__MAX_BPF_CMD,
};
@@ -1573,6 +1574,8 @@ union bpf_attr {
* If provided, prog_flags should have BPF_F_TOKEN_FD flag set.
*/
__s32 prog_token_fd;
+ __s32 prog_loader_fd;
+ __aligned_u64 symbol_loader_name;
};
struct { /* anonymous struct used by BPF_OBJ_* commands */
@@ -1827,6 +1830,17 @@ union bpf_attr {
__u32 bpffs_fd;
} token_create;
+ struct { /* struct used by BPF_PROG_LOAD command */
+ __u32 bpffs_fd;
+ __u32 obj_fd;
+ __aligned_u64 maps;
+ __u32 map_cnt;
+ __s32 kconfig_map_idx;
+ __s32 arena_map_idx;
+ __aligned_u64 modules;
+ __u32 module_cnt;
+ } load_fd;
+
} __attribute__((aligned(8)));
/* The description below is an attempt at providing documentation to eBPF
--
2.47.1
^ permalink raw reply related [flat|nested] 30+ messages in thread* [PATCH 07/14] bpf: Implement BPF_LOAD_FD subcommand handler
2025-01-09 21:43 [POC][RFC][PATCH] bpf: in-kernel bpf relocations on raw elf files Blaise Boscaccy
` (5 preceding siblings ...)
2025-01-09 21:43 ` [PATCH 06/14] bpf: Add BPF_LOAD_FD subcommand Blaise Boscaccy
@ 2025-01-09 21:43 ` Blaise Boscaccy
2025-01-10 6:05 ` Greg KH
2025-01-11 0:41 ` kernel test robot
2025-01-09 21:43 ` [PATCH 08/14] bpf: Add elf parsing support to the BPF_LOAD_FD subcommand Blaise Boscaccy
` (8 subsequent siblings)
15 siblings, 2 replies; 30+ messages in thread
From: Blaise Boscaccy @ 2025-01-09 21:43 UTC (permalink / raw)
To: bpf; +Cc: nkapron, teknoraver, roberto.sassu, gregkh, paul, code, flaniel
The new LOAD_FD subcommand keys off of a sysfs entry file descriptor
and a file descriptor pointing to a raw elf object file.
After performing some sysfs bookkeeping, the object file is copied
into the kernel, and with map and module metadata arrays. Userspace
is expected to provide an array of file descriptors that correspond to
maps, along with module information, and offsets into the map array
that correspond with the arena allocator and the kconfig map if
applicable.
Signed-off-by: Blaise Boscaccy <bboscaccy@linux.microsoft.com>
---
kernel/bpf/syscall.c | 242 +++++++++++++++++++++++++++++++++++++++++++
1 file changed, 242 insertions(+)
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 37e45145e113b..3cfb497e1b236 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -6074,6 +6074,245 @@ skip_mods_and_typedefs(const struct btf *btf, u32 id, u32 *res_id)
return t;
}
+static void free_bpf_obj(struct bpf_obj *obj)
+{
+ int i;
+
+ if (!obj)
+ return;
+
+ for (i = 0; i < obj->nr_programs; i++) {
+ kfree(obj->progs[i].insn);
+ kfree(obj->progs[i].reloc_desc);
+ }
+
+ kfree(obj->progs);
+ vfree(obj->hdr);
+
+ btf_put(obj->btf);
+ btf_put(obj->btf_vmlinux);
+ btf_ext__free(obj->btf_ext);
+
+ for (i = 0; i < obj->btf_modules_cnt; i++)
+ btf_put(obj->btf_modules[i].btf);
+
+ kfree(obj->btf_modules);
+ kfree(obj->externs);
+ kfree(obj->maps);
+}
+
+#define BPF_LOADER_INODE_NAME "bpf-loader"
+
+static const struct inode_operations bpf_loader_iops = { };
+
+static int bpf_loader_release(struct inode *inode, struct file *filp)
+{
+ struct bpf_obj *obj = filp->private_data;
+
+ free_bpf_obj(obj);
+ return 0;
+}
+
+static void bpf_loader_show_fdinfo(struct seq_file *m, struct file *filp)
+{
+ int i;
+ struct bpf_obj *obj = filp->private_data;
+
+ for (i = 0; i < obj->nr_programs; i++)
+ seq_printf(m, "program: %s\n", obj->progs[i].name);
+}
+
+static const struct file_operations bpf_loader_fops = {
+ .release = bpf_loader_release,
+ .show_fdinfo = bpf_loader_show_fdinfo,
+};
+
+static int loader_create(unsigned int bpffs_fd)
+{
+ struct inode *inode;
+ struct bpf_obj *obj = NULL;
+ struct file *file;
+ struct path path;
+ struct fd f;
+ umode_t mode;
+ int err, fd;
+
+ f = fdget(bpffs_fd);
+ if (!fd_file(f))
+ return -EBADF;
+
+ path = fd_file(f)->f_path;
+ path_get(&path);
+ fdput(f);
+
+ if (path.dentry != path.mnt->mnt_sb->s_root) {
+ err = -EINVAL;
+ goto out_path;
+ }
+ if (path.mnt->mnt_sb->s_op != &bpf_super_ops) {
+ err = -EINVAL;
+ goto out_path;
+ }
+ err = path_permission(&path, MAY_ACCESS);
+ if (err)
+ goto out_path;
+
+ mode = S_IFREG | (0600 & ~current_umask());
+ inode = bpf_get_inode(path.mnt->mnt_sb, NULL, mode);
+ if (IS_ERR(inode)) {
+ err = PTR_ERR(inode);
+ goto out_path;
+ }
+
+ inode->i_op = &bpf_loader_iops;
+ inode->i_fop = &bpf_loader_fops;
+ clear_nlink(inode);
+
+ file = alloc_file_pseudo(inode, path.mnt, BPF_LOADER_INODE_NAME, O_RDWR, &bpf_loader_fops);
+ if (IS_ERR(file)) {
+ err = PTR_ERR(file);
+ goto out_inode;
+ }
+
+ obj = kzalloc(sizeof(*obj), GFP_KERNEL);
+ if (!obj) {
+ err = -ENOMEM;
+ goto out_inode;
+ }
+
+ fd = get_unused_fd_flags(O_CLOEXEC);
+ if (fd < 0) {
+ err = fd;
+ kfree(obj);
+ goto out_inode;
+ }
+
+ file->private_data = obj;
+ fd_install(fd, file);
+ path_put(&path);
+ return fd;
+
+out_inode:
+ iput(inode);
+ fput(file);
+out_path:
+ path_put(&path);
+ return err;
+}
+
+static int load_fd(union bpf_attr *attr)
+{
+ void *buf = NULL;
+ int len;
+ int i;
+ int obj_f;
+ struct fd obj_fd;
+ struct bpf_module_obj *modules;
+ struct bpf_obj *obj;
+ int err;
+
+ struct fd f;
+ struct fd bpffs_fd;
+
+ f = fdget(attr->load_fd.obj_fd);
+ if (!fd_file(f)) {
+ err = -EBADF;
+ goto out;
+ }
+
+ bpffs_fd = fdget(attr->load_fd.bpffs_fd);
+ if (!fd_file(bpffs_fd)) {
+ fdput(f);
+ err = -EBADF;
+ goto out;
+ }
+
+ obj_f = loader_create(attr->load_fd.bpffs_fd);
+ if (obj_f < 0) {
+ err = obj_f;
+ fdput(f);
+ fdput(bpffs_fd);
+ goto out;
+ }
+
+ obj_fd = fdget(obj_f);
+ obj = fd_file(obj_fd)->private_data;
+
+ len = kernel_read_file(fd_file(f), 0, &buf, INT_MAX, NULL, READING_EBPF);
+ if (len < 0) {
+ fdput(obj_fd);
+ err = len;
+ goto out;
+ }
+
+ obj->hdr = buf;
+ obj->len = len;
+ obj->nr_maps = attr->load_fd.map_cnt;
+ obj->maps = kmalloc_array(attr->load_fd.map_cnt, sizeof(struct bpf_map_obj), GFP_KERNEL);
+
+ if (!obj->maps) {
+ err = -ENOMEM;
+ goto free;
+ }
+
+ if (attr->load_fd.map_cnt) {
+ if (copy_from_user(obj->maps, (const void *)attr->load_fd.maps,
+ sizeof(struct bpf_map_obj) * attr->load_fd.map_cnt) != 0) {
+ err = -EFAULT;
+ goto free;
+ }
+ }
+
+ obj->kconfig_map_idx = attr->load_fd.kconfig_map_idx;
+ obj->arena_map_idx = attr->load_fd.arena_map_idx;
+ obj->btf_vmlinux = bpf_get_btf_vmlinux();
+ modules = kmalloc_array(attr->load_fd.module_cnt,
+ sizeof(struct bpf_module_obj), GFP_KERNEL);
+
+ if (!modules) {
+ err = -ENOMEM;
+ goto free;
+ }
+
+
+ if (attr->load_fd.module_cnt) {
+ if (copy_from_user(modules, (const void *)attr->load_fd.modules,
+ sizeof(struct bpf_module_obj) * attr->load_fd.module_cnt) != 0) {
+ err = -EFAULT;
+ goto free;
+ }
+ }
+
+ obj->btf_modules_cnt = attr->load_fd.module_cnt;
+ obj->btf_modules = kmalloc_array(attr->load_fd.module_cnt,
+ sizeof(struct bpf_module_btf), GFP_KERNEL);
+
+ if (!obj->btf_modules) {
+ err = -ENOMEM;
+ goto free;
+ }
+
+ for (i = 0; i < obj->btf_modules_cnt; i++) {
+ obj->btf_modules[i].fd = modules[i].fd;
+ obj->btf_modules[i].id = modules[i].id;
+ obj->btf_modules[i].fd_array_idx = modules[i].fd_array_idx;
+ obj->btf_modules[i].btf = btf_get_by_fd(obj->btf_modules[i].fd);
+ if (IS_ERR(obj->btf_modules[i].btf)) {
+ err = PTR_ERR(obj->btf_modules[i].btf);
+ kfree(modules);
+ goto free;
+ }
+ }
+ kfree(modules);
+
+ return obj_f;
+free:
+ free_bpf_obj(obj);
+ fd_file(obj_fd)->private_data = NULL;
+out:
+ return err;
+}
+
static int __sys_bpf(enum bpf_cmd cmd, bpfptr_t uattr, unsigned int size)
{
union bpf_attr attr;
@@ -6210,6 +6449,9 @@ static int __sys_bpf(enum bpf_cmd cmd, bpfptr_t uattr, unsigned int size)
case BPF_TOKEN_CREATE:
err = token_create(&attr);
break;
+ case BPF_LOAD_FD:
+ err = load_fd(&attr);
+ break;
default:
err = -EINVAL;
break;
--
2.47.1
^ permalink raw reply related [flat|nested] 30+ messages in thread* Re: [PATCH 07/14] bpf: Implement BPF_LOAD_FD subcommand handler
2025-01-09 21:43 ` [PATCH 07/14] bpf: Implement BPF_LOAD_FD subcommand handler Blaise Boscaccy
@ 2025-01-10 6:05 ` Greg KH
2025-01-10 22:41 ` Blaise Boscaccy
2025-01-11 0:41 ` kernel test robot
1 sibling, 1 reply; 30+ messages in thread
From: Greg KH @ 2025-01-10 6:05 UTC (permalink / raw)
To: Blaise Boscaccy
Cc: bpf, nkapron, teknoraver, roberto.sassu, paul, code, flaniel
On Thu, Jan 09, 2025 at 01:43:49PM -0800, Blaise Boscaccy wrote:
> The new LOAD_FD subcommand keys off of a sysfs entry file descriptor
> and a file descriptor pointing to a raw elf object file.
A sysfs file descriptor? That feels very odd and is not how sysfs
should be used, as it's only for text files and binary pass-through
stuff.
> +static void bpf_loader_show_fdinfo(struct seq_file *m, struct file *filp)
> +{
> + int i;
> + struct bpf_obj *obj = filp->private_data;
> +
> + for (i = 0; i < obj->nr_programs; i++)
> + seq_printf(m, "program: %s\n", obj->progs[i].name);
So what file is printing this out in sysfs? Where is the
Documentation/ABI/ entry for it?
confused,
greg k-h
^ permalink raw reply [flat|nested] 30+ messages in thread* Re: [PATCH 07/14] bpf: Implement BPF_LOAD_FD subcommand handler
2025-01-10 6:05 ` Greg KH
@ 2025-01-10 22:41 ` Blaise Boscaccy
0 siblings, 0 replies; 30+ messages in thread
From: Blaise Boscaccy @ 2025-01-10 22:41 UTC (permalink / raw)
To: Greg KH; +Cc: bpf, nkapron, teknoraver, roberto.sassu, paul, code, flaniel
Hi Greg,
Greg KH <gregkh@linuxfoundation.org> writes:
> On Thu, Jan 09, 2025 at 01:43:49PM -0800, Blaise Boscaccy wrote:
>> The new LOAD_FD subcommand keys off of a sysfs entry file descriptor
>> and a file descriptor pointing to a raw elf object file.
>
> A sysfs file descriptor? That feels very odd and is not how sysfs
> should be used, as it's only for text files and binary pass-through
> stuff.
>
Yeah, libbpf has a feature where it can load multiple independent
ebpf programs from a single object file. It parses the whole object file
and then for each program, calls BPF_PROG_LOAD. I was trying to mimic that
flow here, by having a single call to BPF_LOAD_FD and allowing
userspace to repeatedly call BPF_PROG_LOAD as needed referencing that
result.
bpffs would probably be a more appropriate choice for this. The purpose
of the PoC was mostly to test whether or not kernel relocs where even doable
and if there was any support for it upstream. The interface could
definitely use some polishing.
I'm also not sure how pervasive that use case is in the wild and if it is
more of a premature optimization here than anything. Alternatively, it
may be acceptable to combine BPF_LOAD_FD and BPF_PROG_LOAD into a single
operation and reparse/relocate for each discrete program load and then
remove all this.
>> +static void bpf_loader_show_fdinfo(struct seq_file *m, struct file *filp)
>> +{
>> + int i;
>> + struct bpf_obj *obj = filp->private_data;
>> +
>> + for (i = 0; i < obj->nr_programs; i++)
>> + seq_printf(m, "program: %s\n", obj->progs[i].name);
>
> So what file is printing this out in sysfs?
There are two file descriptors passed into BPF_LOAD_FD, this uses the
first one (bpffs_fd).
> Where is the
> Documentation/ABI/ entry for it?
>
That's still a TODO and an oversight on my part.
> confused,
>
> greg k-h
Thanks for the feedback.
-blaise
^ permalink raw reply [flat|nested] 30+ messages in thread
* Re: [PATCH 07/14] bpf: Implement BPF_LOAD_FD subcommand handler
2025-01-09 21:43 ` [PATCH 07/14] bpf: Implement BPF_LOAD_FD subcommand handler Blaise Boscaccy
2025-01-10 6:05 ` Greg KH
@ 2025-01-11 0:41 ` kernel test robot
1 sibling, 0 replies; 30+ messages in thread
From: kernel test robot @ 2025-01-11 0:41 UTC (permalink / raw)
To: Blaise Boscaccy, bpf
Cc: oe-kbuild-all, nkapron, teknoraver, roberto.sassu, gregkh, paul,
code, flaniel
Hi Blaise,
kernel test robot noticed the following build warnings:
[auto build test WARNING on bpf/master]
[also build test WARNING on linus/master v6.13-rc6]
[cannot apply to bpf-next/master next-20250110]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch#_base_tree_information]
url: https://github.com/intel-lab-lkp/linux/commits/Blaise-Boscaccy/bpf-Add-data-structures-for-managing-in-kernel-eBPF-relocations/20250110-064354
base: https://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf.git master
patch link: https://lore.kernel.org/r/20250109214617.485144-8-bboscaccy%40linux.microsoft.com
patch subject: [PATCH 07/14] bpf: Implement BPF_LOAD_FD subcommand handler
config: i386-buildonly-randconfig-003-20250111 (https://download.01.org/0day-ci/archive/20250111/202501110812.QzSvbAtK-lkp@intel.com/config)
compiler: gcc-12 (Debian 12.2.0-14) 12.2.0
reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20250111/202501110812.QzSvbAtK-lkp@intel.com/reproduce)
If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <lkp@intel.com>
| Closes: https://lore.kernel.org/oe-kbuild-all/202501110812.QzSvbAtK-lkp@intel.com/
All warnings (new ones prefixed by >>):
kernel/bpf/syscall.c: In function 'load_fd':
>> kernel/bpf/syscall.c:6290:47: warning: cast to pointer from integer of different size [-Wint-to-pointer-cast]
6290 | if (copy_from_user(obj->maps, (const void *)attr->load_fd.maps,
| ^
kernel/bpf/syscall.c:6310:45: warning: cast to pointer from integer of different size [-Wint-to-pointer-cast]
6310 | if (copy_from_user(modules, (const void *)attr->load_fd.modules,
| ^
kernel/bpf/syscall.c: At top level:
kernel/bpf/syscall.c:6092:1: warning: 'skip_mods_and_typedefs' defined but not used [-Wunused-function]
6092 | skip_mods_and_typedefs(const struct btf *btf, u32 id, u32 *res_id)
| ^~~~~~~~~~~~~~~~~~~~~~
kernel/bpf/syscall.c:6082:13: warning: 'sym_is_extern' defined but not used [-Wunused-function]
6082 | static bool sym_is_extern(const Elf64_Sym *sym)
| ^~~~~~~~~~~~~
kernel/bpf/syscall.c:6056:12: warning: 'find_extern_sec_btf_id' defined but not used [-Wunused-function]
6056 | static int find_extern_sec_btf_id(struct btf *btf, int ext_btf_id)
| ^~~~~~~~~~~~~~~~~~~~~~
kernel/bpf/syscall.c:6016:12: warning: 'find_extern_btf_id' defined but not used [-Wunused-function]
6016 | static int find_extern_btf_id(const struct btf *btf, const char *ext_name)
| ^~~~~~~~~~~~~~~~~~
kernel/bpf/syscall.c:5998:12: warning: 'elf_sec_idx_by_name' defined but not used [-Wunused-function]
5998 | static int elf_sec_idx_by_name(struct bpf_obj *obj, const char *name)
| ^~~~~~~~~~~~~~~~~~~
kernel/bpf/syscall.c:5948:24: warning: 'btf_ext__new' defined but not used [-Wunused-function]
5948 | static struct btf_ext *btf_ext__new(const __u8 *data, __u32 size)
| ^~~~~~~~~~~~
vim +6290 kernel/bpf/syscall.c
6233
6234 static int load_fd(union bpf_attr *attr)
6235 {
6236 void *buf = NULL;
6237 int len;
6238 int i;
6239 int obj_f;
6240 struct fd obj_fd;
6241 struct bpf_module_obj *modules;
6242 struct bpf_obj *obj;
6243 int err;
6244
6245 struct fd f;
6246 struct fd bpffs_fd;
6247
6248 f = fdget(attr->load_fd.obj_fd);
6249 if (!fd_file(f)) {
6250 err = -EBADF;
6251 goto out;
6252 }
6253
6254 bpffs_fd = fdget(attr->load_fd.bpffs_fd);
6255 if (!fd_file(bpffs_fd)) {
6256 fdput(f);
6257 err = -EBADF;
6258 goto out;
6259 }
6260
6261 obj_f = loader_create(attr->load_fd.bpffs_fd);
6262 if (obj_f < 0) {
6263 err = obj_f;
6264 fdput(f);
6265 fdput(bpffs_fd);
6266 goto out;
6267 }
6268
6269 obj_fd = fdget(obj_f);
6270 obj = fd_file(obj_fd)->private_data;
6271
6272 len = kernel_read_file(fd_file(f), 0, &buf, INT_MAX, NULL, READING_EBPF);
6273 if (len < 0) {
6274 fdput(obj_fd);
6275 err = len;
6276 goto out;
6277 }
6278
6279 obj->hdr = buf;
6280 obj->len = len;
6281 obj->nr_maps = attr->load_fd.map_cnt;
6282 obj->maps = kmalloc_array(attr->load_fd.map_cnt, sizeof(struct bpf_map_obj), GFP_KERNEL);
6283
6284 if (!obj->maps) {
6285 err = -ENOMEM;
6286 goto free;
6287 }
6288
6289 if (attr->load_fd.map_cnt) {
> 6290 if (copy_from_user(obj->maps, (const void *)attr->load_fd.maps,
6291 sizeof(struct bpf_map_obj) * attr->load_fd.map_cnt) != 0) {
6292 err = -EFAULT;
6293 goto free;
6294 }
6295 }
6296
6297 obj->kconfig_map_idx = attr->load_fd.kconfig_map_idx;
6298 obj->arena_map_idx = attr->load_fd.arena_map_idx;
6299 obj->btf_vmlinux = bpf_get_btf_vmlinux();
6300 modules = kmalloc_array(attr->load_fd.module_cnt,
6301 sizeof(struct bpf_module_obj), GFP_KERNEL);
6302
6303 if (!modules) {
6304 err = -ENOMEM;
6305 goto free;
6306 }
6307
6308
6309 if (attr->load_fd.module_cnt) {
6310 if (copy_from_user(modules, (const void *)attr->load_fd.modules,
6311 sizeof(struct bpf_module_obj) * attr->load_fd.module_cnt) != 0) {
6312 err = -EFAULT;
6313 goto free;
6314 }
6315 }
6316
6317 obj->btf_modules_cnt = attr->load_fd.module_cnt;
6318 obj->btf_modules = kmalloc_array(attr->load_fd.module_cnt,
6319 sizeof(struct bpf_module_btf), GFP_KERNEL);
6320
6321 if (!obj->btf_modules) {
6322 err = -ENOMEM;
6323 goto free;
6324 }
6325
6326 for (i = 0; i < obj->btf_modules_cnt; i++) {
6327 obj->btf_modules[i].fd = modules[i].fd;
6328 obj->btf_modules[i].id = modules[i].id;
6329 obj->btf_modules[i].fd_array_idx = modules[i].fd_array_idx;
6330 obj->btf_modules[i].btf = btf_get_by_fd(obj->btf_modules[i].fd);
6331 if (IS_ERR(obj->btf_modules[i].btf)) {
6332 err = PTR_ERR(obj->btf_modules[i].btf);
6333 kfree(modules);
6334 goto free;
6335 }
6336 }
6337 kfree(modules);
6338
6339 return obj_f;
6340 free:
6341 free_bpf_obj(obj);
6342 fd_file(obj_fd)->private_data = NULL;
6343 out:
6344 return err;
6345 }
6346
--
0-DAY CI Kernel Test Service
https://github.com/intel/lkp-tests/wiki
^ permalink raw reply [flat|nested] 30+ messages in thread
* [PATCH 08/14] bpf: Add elf parsing support to the BPF_LOAD_FD subcommand
2025-01-09 21:43 [POC][RFC][PATCH] bpf: in-kernel bpf relocations on raw elf files Blaise Boscaccy
` (6 preceding siblings ...)
2025-01-09 21:43 ` [PATCH 07/14] bpf: Implement BPF_LOAD_FD subcommand handler Blaise Boscaccy
@ 2025-01-09 21:43 ` Blaise Boscaccy
2025-01-09 21:43 ` [PATCH 09/14] bpf: Collect extern relocations Blaise Boscaccy
` (7 subsequent siblings)
15 siblings, 0 replies; 30+ messages in thread
From: Blaise Boscaccy @ 2025-01-09 21:43 UTC (permalink / raw)
To: bpf; +Cc: nkapron, teknoraver, roberto.sassu, gregkh, paul, code, flaniel
Using the sysfs entry passed into the subcommand, the previosly loaded
elf object file is parsed. The objective of this parse is to identify
key elf file sections, specfically the text and btf sections. From
there, indicies are saved to relevant sections. Armed with the initial
parse info, we search for and create program definitions, along with
any respective btf information for them.
Signed-off-by: Blaise Boscaccy <bboscaccy@linux.microsoft.com>
---
kernel/bpf/syscall.c | 175 +++++++++++++++++++++++++++++++++++++++++++
1 file changed, 175 insertions(+)
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 3cfb497e1b236..03ab0bb7bf076 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -6074,6 +6074,177 @@ skip_mods_and_typedefs(const struct btf *btf, u32 id, u32 *res_id)
return t;
}
+static int init_btf(struct bpf_obj *obj, unsigned int btf_idx, unsigned int btf_ext_idx)
+{
+ Elf_Shdr *shdr = &obj->sechdrs[btf_idx];
+ void *buffer = (void *)obj->hdr + shdr->sh_offset;
+ struct btf_ext_info *ext_segs[3];
+ int seg_num, sec_num;
+ int idx;
+ struct btf_ext_info *seg;
+ const struct btf_ext_info_sec *sec;
+ const char *sec_name;
+ struct btf *btf = btf_init_mem(buffer, shdr->sh_size, 0, 0, 0);
+
+ obj->btf = btf;
+ shdr = &obj->sechdrs[btf_ext_idx];
+ buffer = (void *)obj->hdr + shdr->sh_offset;
+ obj->btf_ext = btf_ext__new(buffer, shdr->sh_size);
+ obj->index.btf = btf_idx;
+ obj->index.btf_ext = btf_ext_idx;
+
+ /* setup .BTF.ext to ELF section mapping */
+ ext_segs[0] = &obj->btf_ext->func_info;
+ ext_segs[1] = &obj->btf_ext->line_info;
+ ext_segs[2] = &obj->btf_ext->core_relo_info;
+ for (seg_num = 0; seg_num < ARRAY_SIZE(ext_segs); seg_num++) {
+ seg = ext_segs[seg_num];
+
+ if (seg->sec_cnt == 0)
+ continue;
+
+ seg->sec_idxs = kcalloc(seg->sec_cnt, sizeof(*seg->sec_idxs), GFP_KERNEL);
+ if (!seg->sec_idxs)
+ return -ENOMEM;
+
+ sec_num = 0;
+ for_each_btf_ext_sec(seg, sec) {
+ /* preventively increment index to avoid doing
+ * this before every continue below
+ */
+ sec_num++;
+
+ sec_name = btf_str_by_offset(obj->btf, sec->sec_name_off);
+ if (str_is_empty(sec_name))
+ continue;
+
+ idx = elf_sec_idx_by_name(obj, sec_name);
+ if (idx < 0)
+ continue;
+ seg->sec_idxs[sec_num - 1] = idx;
+ }
+ }
+ return 0;
+}
+
+static int find_progs(struct bpf_obj *obj, unsigned int sec_idx)
+{
+ unsigned int i;
+ unsigned int prog_sz;
+ unsigned int sec_off;
+ Elf_Shdr *symsec = &obj->sechdrs[obj->index.sym];
+ Elf_Sym *sym = (void *)obj->hdr + symsec->sh_offset;
+ Elf_Shdr *shdr = &obj->sechdrs[sec_idx];
+ struct bpf_prog_obj *progs;
+ int err;
+ struct bpf_insn *insns = NULL;
+ void *buffer;
+ unsigned int insn_cnt, ndx;
+ char *name;
+
+ for (i = 1; i < symsec->sh_size / sizeof(Elf_Sym); i++) {
+ name = obj->strtab + sym[i].st_name;
+
+ if (sym[i].st_shndx != sec_idx)
+ continue;
+ if (ELF64_ST_TYPE(sym[i].st_info) != STT_FUNC)
+ continue;
+
+ prog_sz = sym[i].st_size;
+ sec_off = sym[i].st_value;
+ buffer = (void *)obj->hdr + shdr->sh_offset + sec_off;
+
+ insns = kmalloc(prog_sz, GFP_KERNEL);
+ if (!insns)
+ return -ENOMEM;
+
+ memcpy(insns, buffer, prog_sz);
+ insn_cnt = prog_sz / sizeof(struct bpf_insn);
+
+ progs = krealloc_array(obj->progs, obj->nr_programs + 1,
+ sizeof(struct bpf_prog_obj), GFP_KERNEL);
+ if (!progs) {
+ err = -ENOMEM;
+ goto free_insns;
+ }
+
+ obj->progs = progs;
+ ndx = obj->nr_programs;
+ obj->progs[ndx].insn = insns;
+ obj->progs[ndx].insn_cnt = insn_cnt;
+ obj->progs[ndx].sec_idx = sec_idx;
+ obj->progs[ndx].sec_insn_off = sec_off / sizeof(struct bpf_insn);
+ obj->progs[ndx].sec_insn_cnt = insn_cnt;
+ obj->progs[ndx].name = name;
+ obj->progs[ndx].exception_cb_idx = -1;
+ obj->nr_programs++;
+
+ }
+ return 0;
+
+free_insns:
+ kfree(insns);
+ return err;
+}
+
+static int elf_collect(struct bpf_obj *obj)
+{
+ unsigned int i;
+ Elf_Shdr *shdr, *strhdr;
+ unsigned int sym_idx;
+ unsigned int sec_idx = 0;
+ unsigned int btf_idx = 0, btf_ext_idx = 0;
+ int err = 0;
+
+ obj->sechdrs = (void *)obj->hdr + obj->hdr->e_shoff;
+ strhdr = &obj->sechdrs[obj->hdr->e_shstrndx];
+ obj->secstrings = (void *)obj->hdr + strhdr->sh_offset;
+
+ for (i = 1; i < obj->hdr->e_shnum; i++) {
+ shdr = &obj->sechdrs[i];
+ switch (shdr->sh_type) {
+ case SHT_NULL:
+ case SHT_NOBITS:
+ continue;
+ case SHT_SYMTAB:
+ sym_idx = i;
+ fallthrough;
+ default:
+ break;
+ }
+ }
+
+ obj->index.sym = sym_idx;
+ shdr = &obj->sechdrs[sym_idx];
+ obj->index.str = shdr->sh_link;
+ obj->strtab = (char *)obj->hdr + obj->sechdrs[obj->index.str].sh_offset;
+
+ for (i = 1; i < obj->hdr->e_shnum; i++) {
+ shdr = &obj->sechdrs[i];
+ sec_idx = i;
+ if (strcmp(".text", obj->secstrings + shdr->sh_name) == 0)
+ obj->index.text = sec_idx;
+
+ if (shdr->sh_type == SHT_PROGBITS && shdr->sh_size > 0) {
+ err = find_progs(obj, sec_idx);
+ if (err)
+ return err;
+ }
+
+ if (strcmp(".BTF", obj->secstrings + shdr->sh_name) == 0)
+ btf_idx = i;
+
+ if (strcmp(".BTF.ext", obj->secstrings + shdr->sh_name) == 0)
+ btf_ext_idx = i;
+
+ if (strcmp(".addr_space.1", obj->secstrings + shdr->sh_name) == 0)
+ obj->index.arena = sec_idx;
+ }
+
+ err = init_btf(obj, btf_idx, btf_ext_idx);
+ return err;
+}
+
static void free_bpf_obj(struct bpf_obj *obj)
{
int i;
@@ -6305,6 +6476,10 @@ static int load_fd(union bpf_attr *attr)
}
kfree(modules);
+ err = elf_collect(obj);
+ if (err < 0)
+ goto free;
+
return obj_f;
free:
free_bpf_obj(obj);
--
2.47.1
^ permalink raw reply related [flat|nested] 30+ messages in thread* [PATCH 09/14] bpf: Collect extern relocations
2025-01-09 21:43 [POC][RFC][PATCH] bpf: in-kernel bpf relocations on raw elf files Blaise Boscaccy
` (7 preceding siblings ...)
2025-01-09 21:43 ` [PATCH 08/14] bpf: Add elf parsing support to the BPF_LOAD_FD subcommand Blaise Boscaccy
@ 2025-01-09 21:43 ` Blaise Boscaccy
2025-01-11 1:35 ` kernel test robot
2025-01-09 21:43 ` [PATCH 10/14] bpf: Implement BTF fixup functionality Blaise Boscaccy
` (6 subsequent siblings)
15 siblings, 1 reply; 30+ messages in thread
From: Blaise Boscaccy @ 2025-01-09 21:43 UTC (permalink / raw)
To: bpf; +Cc: nkapron, teknoraver, roberto.sassu, gregkh, paul, code, flaniel
This code heavily borrows from bpf_object__collect_externs in
libbpf. Here we walk the symbol table and attempt to determine which
symbols correspond to external relocations, specifically kconfig
options and kernel or module symbols.
Signed-off-by: Blaise Boscaccy <bboscaccy@linux.microsoft.com>
---
kernel/bpf/syscall.c | 337 +++++++++++++++++++++++++++++++++++++++++++
1 file changed, 337 insertions(+)
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 03ab0bb7bf076..51b14cb9c4ca1 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -6245,6 +6245,339 @@ static int elf_collect(struct bpf_obj *obj)
return err;
}
+static enum bpf_kcfg_type find_kcfg_type(const struct btf *btf, int id,
+ bool *is_signed)
+{
+ const struct btf_type *t;
+ const char *name;
+
+ t = skip_mods_and_typedefs(btf, id, NULL);
+ name = btf_str_by_offset(btf, t->name_off);
+
+ if (is_signed)
+ *is_signed = false;
+ switch (btf_kind(t)) {
+ case BTF_KIND_INT: {
+ int enc = btf_int_encoding(t);
+
+ if (enc & BTF_INT_BOOL)
+ return t->size == 1 ? KCFG_BOOL : KCFG_UNKNOWN;
+ if (is_signed)
+ *is_signed = enc & BTF_INT_SIGNED;
+ if (t->size == 1)
+ return KCFG_CHAR;
+ if (t->size < 1 || t->size > 8 || (t->size & (t->size - 1)))
+ return KCFG_UNKNOWN;
+ return KCFG_INT;
+ }
+ case BTF_KIND_ENUM:
+ if (t->size != 4)
+ return KCFG_UNKNOWN;
+ if (strcmp(name, "libbpf_tristate"))
+ return KCFG_UNKNOWN;
+ return KCFG_TRISTATE;
+ case BTF_KIND_ENUM64:
+ if (strcmp(name, "libbpf_tristate"))
+ return KCFG_UNKNOWN;
+ return KCFG_TRISTATE;
+ case BTF_KIND_ARRAY:
+ if (btf_array(t)->nelems == 0)
+ return KCFG_UNKNOWN;
+ if (find_kcfg_type(btf, btf_array(t)->type, NULL) != KCFG_CHAR)
+ return KCFG_UNKNOWN;
+ return KCFG_CHAR_ARR;
+ default:
+ return KCFG_UNKNOWN;
+ }
+}
+
+static int cmp_externs(const void *_a, const void *_b)
+{
+ const struct bpf_extern_desc *a = _a;
+ const struct bpf_extern_desc *b = _b;
+
+ if (a->type != b->type)
+ return a->type < b->type ? -1 : 1;
+
+ if (a->type == EXT_KCFG) {
+ /* descending order by alignment requirements */
+ if (a->kcfg.align != b->kcfg.align)
+ return a->kcfg.align > b->kcfg.align ? -1 : 1;
+ /* ascending order by size, within same alignment class */
+ if (a->kcfg.sz != b->kcfg.sz)
+ return a->kcfg.sz < b->kcfg.sz ? -1 : 1;
+ }
+
+ /* resolve ties by name */
+ return strcmp(a->name, b->name);
+}
+
+static int find_int_btf_id(const struct btf *btf)
+{
+ const struct btf_type *t;
+ int i, n;
+
+ n = btf_type_cnt(btf);
+ for (i = 1; i < n; i++) {
+ t = btf_type_by_id(btf, i);
+
+ if (btf_type_is_int(t) && btf_type_int_bits(t) == 32)
+ return i;
+ }
+ return 0;
+}
+
+static struct bpf_extern_desc *find_extern_by_name(const struct bpf_obj *obj,
+ const void *name)
+{
+ int i;
+
+ for (i = 0; i < obj->nr_extern; i++) {
+ if (strcmp(obj->externs[i].name, name) == 0)
+ return &obj->externs[i];
+ }
+ return NULL;
+}
+
+static int add_dummy_ksym_var(struct btf *btf)
+{
+ int i, int_btf_id, sec_btf_id, dummy_var_btf_id;
+ const struct btf_var_secinfo *vs;
+ const struct btf_type *sec;
+
+ if (!btf)
+ return 0;
+
+ sec_btf_id = btf_find_by_name_kind(btf, ".ksyms",
+ BTF_KIND_DATASEC);
+ if (sec_btf_id < 0)
+ return 0;
+
+ sec = btf_type_by_id(btf, sec_btf_id);
+ vs = btf_var_secinfos(sec);
+ for (i = 0; i < btf_vlen(sec); i++, vs++) {
+ const struct btf_type *vt;
+
+ vt = btf_type_by_id(btf, vs->type);
+ if (btf_type_is_func(vt))
+ break;
+ }
+
+ /* No func in ksyms sec. No need to add dummy var. */
+ if (i == btf_vlen(sec))
+ return 0;
+
+ int_btf_id = find_int_btf_id(btf);
+
+ dummy_var_btf_id = btf_add_var(btf,
+ sec->name_off,
+ BTF_VAR_GLOBAL_ALLOCATED,
+ int_btf_id);
+ if (dummy_var_btf_id < 0)
+ pr_warn("cannot create a dummy_ksym var\n");
+
+ return dummy_var_btf_id;
+}
+
+static int collect_externs(struct bpf_obj *obj)
+{
+ int i, n, off, dummy_var_btf_id;
+ Elf_Shdr *symsec = &obj->sechdrs[obj->index.sym];
+ Elf_Sym *sym = (void *)obj->hdr + symsec->sh_offset;
+ const char *ext_name;
+ const char *sec_name;
+ struct bpf_extern_desc *ext;
+ const struct btf_type *t;
+ size_t ext_essent_len;
+ struct btf_type *sec, *kcfg_sec = NULL, *ksym_sec = NULL;
+ int size;
+ int int_btf_id;
+ const struct btf_type *dummy_var;
+ struct btf_type *vt;
+ struct btf_var_secinfo *vs;
+ const struct btf_type *func_proto;
+ struct btf_param *param;
+ int j;
+
+ dummy_var_btf_id = add_dummy_ksym_var(obj->btf);
+ if (dummy_var_btf_id < 0)
+ return dummy_var_btf_id;
+
+ for (i = 1; i < symsec->sh_size / sizeof(Elf_Sym); i++) {
+ if (!sym_is_extern(&sym[i]))
+ continue;
+
+ ext_name = obj->strtab + sym[i].st_name;
+ ext = krealloc_array(obj->externs,
+ obj->nr_extern + 1,
+ sizeof(struct bpf_extern_desc),
+ GFP_KERNEL);
+ if (!ext)
+ return -ENOMEM;
+
+ obj->externs = ext;
+ ext = &ext[obj->nr_extern];
+ memset(ext, 0, sizeof(*ext));
+ obj->nr_extern++;
+
+ ext->btf_id = find_extern_btf_id(obj->btf, ext_name);
+ if (ext->btf_id <= 0)
+ return ext->btf_id;
+
+ t = btf_type_by_id(obj->btf, ext->btf_id);
+ ext->name = btf_str_by_offset(obj->btf, t->name_off);
+ ext->sym_idx = i;
+ ext->is_weak = ELF64_ST_BIND(sym->st_info) == STB_WEAK;
+
+ ext_essent_len = bpf_core_essential_name_len(ext->name);
+ ext->essent_name = NULL;
+ if (ext_essent_len != strlen(ext->name)) {
+ ext->essent_name = kstrndup(ext->name, ext_essent_len, GFP_KERNEL);
+ if (!ext->essent_name)
+ return -ENOMEM;
+ }
+
+ ext->sec_btf_id = find_extern_sec_btf_id(obj->btf, ext->btf_id);
+ if (ext->sec_btf_id <= 0) {
+ pr_warn("failed to find BTF for extern '%s' [%d] section: %d\n",
+ ext_name, ext->btf_id, ext->sec_btf_id);
+ return ext->sec_btf_id;
+ }
+
+ sec = (void *)btf_type_by_id(obj->btf, ext->sec_btf_id);
+ sec_name = btf_str_by_offset(obj->btf, sec->name_off);
+
+ if (strcmp(sec_name, ".kconfig") == 0) {
+ if (btf_type_is_func(t)) {
+ pr_warn("extern function %s is unsupported under .kconfig section\n",
+ ext->name);
+ return -EOPNOTSUPP;
+ }
+ kcfg_sec = sec;
+ ext->type = EXT_KCFG;
+
+ if (!btf_resolve_size(obj->btf, t, &size)) {
+ pr_warn("failed to resolve size of extern (kcfg) '%s': %d\n",
+ ext_name, ext->kcfg.sz);
+ return -EINVAL;
+ }
+ ext->kcfg.sz = size;
+ ext->kcfg.align = btf_align_of(obj->btf, t->type);
+ if (ext->kcfg.align <= 0) {
+ pr_warn("failed to determine alignment of extern (kcfg) '%s': %d\n",
+ ext_name, ext->kcfg.align);
+ return -EINVAL;
+ }
+ ext->kcfg.type = find_kcfg_type(obj->btf, t->type,
+ &ext->kcfg.is_signed);
+ if (ext->kcfg.type == KCFG_UNKNOWN) {
+ pr_warn("extern (kcfg) '%s': type is unsupported\n", ext_name);
+ return -EOPNOTSUPP;
+ }
+ } else if (strcmp(sec_name, ".ksyms") == 0) {
+ ksym_sec = sec;
+ ext->type = EXT_KSYM;
+ skip_mods_and_typedefs(obj->btf, t->type,
+ &ext->ksym.type_id);
+ } else {
+ pr_warn("unrecognized extern section '%s'\n", sec_name);
+ return -EOPNOTSUPP;
+ }
+ }
+
+ sort(obj->externs, obj->nr_extern, sizeof(struct bpf_extern_desc),
+ cmp_externs, NULL);
+
+ if (ksym_sec) {
+ /* find existing 4-byte integer type in BTF to use for fake
+ * extern variables in DATASEC
+ */
+ int_btf_id = find_int_btf_id(obj->btf);
+
+ /* For extern function, a dummy_var added earlier
+ * will be used to replace the vs->type and
+ * its name string will be used to refill
+ * the missing param's name.
+ */
+ dummy_var = btf_type_by_id(obj->btf, dummy_var_btf_id);
+ for (i = 0; i < obj->nr_extern; i++) {
+ ext = &obj->externs[i];
+ if (ext->type != EXT_KSYM)
+ continue;
+ pr_debug("extern (ksym) #%d: symbol %d, name %s\n",
+ i, ext->sym_idx, ext->name);
+ }
+
+ sec = ksym_sec;
+ n = btf_vlen(sec);
+ for (i = 0, off = 0; i < n; i++, off += sizeof(int)) {
+ vs = btf_var_secinfos(sec) + i;
+ vt = (void *)btf_type_by_id(obj->btf, vs->type);
+ ext_name = btf_str_by_offset(obj->btf, vt->name_off);
+ ext = find_extern_by_name(obj, ext_name);
+ if (!ext) {
+ pr_warn("failed to find extern definition for BTF %s\n",
+ ext_name);
+ return -ESRCH;
+ }
+ if (btf_type_is_func(vt)) {
+ func_proto = btf_type_by_id(obj->btf,
+ vt->type);
+ param = btf_params(func_proto);
+ /* Reuse the dummy_var string if the
+ * func proto does not have param name.
+ */
+ for (j = 0; j < btf_vlen(func_proto); j++)
+ if (param[j].type && !param[j].name_off)
+ param[j].name_off =
+ dummy_var->name_off;
+ vs->type = dummy_var_btf_id;
+ vt->info &= ~0xffff;
+ vt->info |= BTF_FUNC_GLOBAL;
+ } else {
+ btf_var(vt)->linkage = BTF_VAR_GLOBAL_ALLOCATED;
+ vt->type = int_btf_id;
+ }
+ vs->offset = off;
+ vs->size = sizeof(int);
+ }
+ sec->size = off;
+ }
+
+ if (kcfg_sec) {
+ sec = kcfg_sec;
+ /* for kcfg externs calculate their offsets within a .kconfig map */
+ off = 0;
+ for (i = 0; i < obj->nr_extern; i++) {
+ ext = &obj->externs[i];
+ if (ext->type != EXT_KCFG)
+ continue;
+
+ ext->kcfg.data_off = roundup(off, ext->kcfg.align);
+ off = ext->kcfg.data_off + ext->kcfg.sz;
+ pr_debug("extern (kcfg) #%d: symbol %d, off %u, name %s\n",
+ i, ext->sym_idx, ext->kcfg.data_off, ext->name);
+ }
+ sec->size = off;
+ n = btf_vlen(sec);
+ for (i = 0; i < n; i++) {
+ vs = btf_var_secinfos(sec) + i;
+ t = btf_type_by_id(obj->btf, vs->type);
+ ext_name = btf_str_by_offset(obj->btf, t->name_off);
+
+ ext = find_extern_by_name(obj, ext_name);
+ if (!ext) {
+ pr_warn("failed to find extern definition for BTF var '%s'\n",
+ ext_name);
+ return -ESRCH;
+ }
+ btf_var(t)->linkage = BTF_VAR_GLOBAL_ALLOCATED;
+ vs->offset = ext->kcfg.data_off;
+ }
+ }
+ return 0;
+}
+
static void free_bpf_obj(struct bpf_obj *obj)
{
int i;
@@ -6480,6 +6813,10 @@ static int load_fd(union bpf_attr *attr)
if (err < 0)
goto free;
+ err = collect_externs(obj);
+ if (err < 0)
+ goto free;
+
return obj_f;
free:
free_bpf_obj(obj);
--
2.47.1
^ permalink raw reply related [flat|nested] 30+ messages in thread* Re: [PATCH 09/14] bpf: Collect extern relocations
2025-01-09 21:43 ` [PATCH 09/14] bpf: Collect extern relocations Blaise Boscaccy
@ 2025-01-11 1:35 ` kernel test robot
0 siblings, 0 replies; 30+ messages in thread
From: kernel test robot @ 2025-01-11 1:35 UTC (permalink / raw)
To: Blaise Boscaccy, bpf
Cc: llvm, oe-kbuild-all, nkapron, teknoraver, roberto.sassu, gregkh,
paul, code, flaniel
Hi Blaise,
kernel test robot noticed the following build errors:
[auto build test ERROR on bpf/master]
[also build test ERROR on linus/master v6.13-rc6]
[cannot apply to bpf-next/master next-20250110]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch#_base_tree_information]
url: https://github.com/intel-lab-lkp/linux/commits/Blaise-Boscaccy/bpf-Add-data-structures-for-managing-in-kernel-eBPF-relocations/20250110-064354
base: https://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf.git master
patch link: https://lore.kernel.org/r/20250109214617.485144-10-bboscaccy%40linux.microsoft.com
patch subject: [PATCH 09/14] bpf: Collect extern relocations
config: i386-buildonly-randconfig-006-20250111 (https://download.01.org/0day-ci/archive/20250111/202501110801.7aGt26Oh-lkp@intel.com/config)
compiler: clang version 19.1.3 (https://github.com/llvm/llvm-project ab51eccf88f5321e7c60591c5546b254b6afab99)
reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20250111/202501110801.7aGt26Oh-lkp@intel.com/reproduce)
If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <lkp@intel.com>
| Closes: https://lore.kernel.org/oe-kbuild-all/202501110801.7aGt26Oh-lkp@intel.com/
All errors (new ones prefixed by >>):
>> kernel/bpf/syscall.c:6438:22: error: incompatible pointer types passing 'Elf32_Sym *' (aka 'struct elf32_sym *') to parameter of type 'const Elf64_Sym *' (aka 'const struct elf64_sym *') [-Werror,-Wincompatible-pointer-types]
6438 | if (!sym_is_extern(&sym[i]))
| ^~~~~~~
kernel/bpf/syscall.c:6082:44: note: passing argument to parameter 'sym' here
6082 | static bool sym_is_extern(const Elf64_Sym *sym)
| ^
kernel/bpf/syscall.c:6463:20: error: call to undeclared function 'bpf_core_essential_name_len'; ISO C99 and later do not support implicit function declarations [-Wimplicit-function-declaration]
6463 | ext_essent_len = bpf_core_essential_name_len(ext->name);
| ^
kernel/bpf/syscall.c:7097:30: warning: bitwise operation between different enumeration types ('enum bpf_arg_type' and 'enum bpf_type_flag') [-Wenum-enum-conversion]
7097 | .arg2_type = ARG_PTR_TO_MEM | MEM_RDONLY,
| ~~~~~~~~~~~~~~ ^ ~~~~~~~~~~
kernel/bpf/syscall.c:7147:41: warning: bitwise operation between different enumeration types ('enum bpf_arg_type' and 'enum bpf_type_flag') [-Wenum-enum-conversion]
7147 | .arg4_type = ARG_PTR_TO_FIXED_SIZE_MEM | MEM_UNINIT | MEM_WRITE | MEM_ALIGNED,
| ~~~~~~~~~~~~~~~~~~~~~~~~~ ^ ~~~~~~~~~~
2 warnings and 2 errors generated.
vim +6438 kernel/bpf/syscall.c
6412
6413 static int collect_externs(struct bpf_obj *obj)
6414 {
6415 int i, n, off, dummy_var_btf_id;
6416 Elf_Shdr *symsec = &obj->sechdrs[obj->index.sym];
6417 Elf_Sym *sym = (void *)obj->hdr + symsec->sh_offset;
6418 const char *ext_name;
6419 const char *sec_name;
6420 struct bpf_extern_desc *ext;
6421 const struct btf_type *t;
6422 size_t ext_essent_len;
6423 struct btf_type *sec, *kcfg_sec = NULL, *ksym_sec = NULL;
6424 int size;
6425 int int_btf_id;
6426 const struct btf_type *dummy_var;
6427 struct btf_type *vt;
6428 struct btf_var_secinfo *vs;
6429 const struct btf_type *func_proto;
6430 struct btf_param *param;
6431 int j;
6432
6433 dummy_var_btf_id = add_dummy_ksym_var(obj->btf);
6434 if (dummy_var_btf_id < 0)
6435 return dummy_var_btf_id;
6436
6437 for (i = 1; i < symsec->sh_size / sizeof(Elf_Sym); i++) {
> 6438 if (!sym_is_extern(&sym[i]))
6439 continue;
6440
6441 ext_name = obj->strtab + sym[i].st_name;
6442 ext = krealloc_array(obj->externs,
6443 obj->nr_extern + 1,
6444 sizeof(struct bpf_extern_desc),
6445 GFP_KERNEL);
6446 if (!ext)
6447 return -ENOMEM;
6448
6449 obj->externs = ext;
6450 ext = &ext[obj->nr_extern];
6451 memset(ext, 0, sizeof(*ext));
6452 obj->nr_extern++;
6453
6454 ext->btf_id = find_extern_btf_id(obj->btf, ext_name);
6455 if (ext->btf_id <= 0)
6456 return ext->btf_id;
6457
6458 t = btf_type_by_id(obj->btf, ext->btf_id);
6459 ext->name = btf_str_by_offset(obj->btf, t->name_off);
6460 ext->sym_idx = i;
6461 ext->is_weak = ELF64_ST_BIND(sym->st_info) == STB_WEAK;
6462
6463 ext_essent_len = bpf_core_essential_name_len(ext->name);
6464 ext->essent_name = NULL;
6465 if (ext_essent_len != strlen(ext->name)) {
6466 ext->essent_name = kstrndup(ext->name, ext_essent_len, GFP_KERNEL);
6467 if (!ext->essent_name)
6468 return -ENOMEM;
6469 }
6470
6471 ext->sec_btf_id = find_extern_sec_btf_id(obj->btf, ext->btf_id);
6472 if (ext->sec_btf_id <= 0) {
6473 pr_warn("failed to find BTF for extern '%s' [%d] section: %d\n",
6474 ext_name, ext->btf_id, ext->sec_btf_id);
6475 return ext->sec_btf_id;
6476 }
6477
6478 sec = (void *)btf_type_by_id(obj->btf, ext->sec_btf_id);
6479 sec_name = btf_str_by_offset(obj->btf, sec->name_off);
6480
6481 if (strcmp(sec_name, ".kconfig") == 0) {
6482 if (btf_type_is_func(t)) {
6483 pr_warn("extern function %s is unsupported under .kconfig section\n",
6484 ext->name);
6485 return -EOPNOTSUPP;
6486 }
6487 kcfg_sec = sec;
6488 ext->type = EXT_KCFG;
6489
6490 if (!btf_resolve_size(obj->btf, t, &size)) {
6491 pr_warn("failed to resolve size of extern (kcfg) '%s': %d\n",
6492 ext_name, ext->kcfg.sz);
6493 return -EINVAL;
6494 }
6495 ext->kcfg.sz = size;
6496 ext->kcfg.align = btf_align_of(obj->btf, t->type);
6497 if (ext->kcfg.align <= 0) {
6498 pr_warn("failed to determine alignment of extern (kcfg) '%s': %d\n",
6499 ext_name, ext->kcfg.align);
6500 return -EINVAL;
6501 }
6502 ext->kcfg.type = find_kcfg_type(obj->btf, t->type,
6503 &ext->kcfg.is_signed);
6504 if (ext->kcfg.type == KCFG_UNKNOWN) {
6505 pr_warn("extern (kcfg) '%s': type is unsupported\n", ext_name);
6506 return -EOPNOTSUPP;
6507 }
6508 } else if (strcmp(sec_name, ".ksyms") == 0) {
6509 ksym_sec = sec;
6510 ext->type = EXT_KSYM;
6511 skip_mods_and_typedefs(obj->btf, t->type,
6512 &ext->ksym.type_id);
6513 } else {
6514 pr_warn("unrecognized extern section '%s'\n", sec_name);
6515 return -EOPNOTSUPP;
6516 }
6517 }
6518
6519 sort(obj->externs, obj->nr_extern, sizeof(struct bpf_extern_desc),
6520 cmp_externs, NULL);
6521
6522 if (ksym_sec) {
6523 /* find existing 4-byte integer type in BTF to use for fake
6524 * extern variables in DATASEC
6525 */
6526 int_btf_id = find_int_btf_id(obj->btf);
6527
6528 /* For extern function, a dummy_var added earlier
6529 * will be used to replace the vs->type and
6530 * its name string will be used to refill
6531 * the missing param's name.
6532 */
6533 dummy_var = btf_type_by_id(obj->btf, dummy_var_btf_id);
6534 for (i = 0; i < obj->nr_extern; i++) {
6535 ext = &obj->externs[i];
6536 if (ext->type != EXT_KSYM)
6537 continue;
6538 pr_debug("extern (ksym) #%d: symbol %d, name %s\n",
6539 i, ext->sym_idx, ext->name);
6540 }
6541
6542 sec = ksym_sec;
6543 n = btf_vlen(sec);
6544 for (i = 0, off = 0; i < n; i++, off += sizeof(int)) {
6545 vs = btf_var_secinfos(sec) + i;
6546 vt = (void *)btf_type_by_id(obj->btf, vs->type);
6547 ext_name = btf_str_by_offset(obj->btf, vt->name_off);
6548 ext = find_extern_by_name(obj, ext_name);
6549 if (!ext) {
6550 pr_warn("failed to find extern definition for BTF %s\n",
6551 ext_name);
6552 return -ESRCH;
6553 }
6554 if (btf_type_is_func(vt)) {
6555 func_proto = btf_type_by_id(obj->btf,
6556 vt->type);
6557 param = btf_params(func_proto);
6558 /* Reuse the dummy_var string if the
6559 * func proto does not have param name.
6560 */
6561 for (j = 0; j < btf_vlen(func_proto); j++)
6562 if (param[j].type && !param[j].name_off)
6563 param[j].name_off =
6564 dummy_var->name_off;
6565 vs->type = dummy_var_btf_id;
6566 vt->info &= ~0xffff;
6567 vt->info |= BTF_FUNC_GLOBAL;
6568 } else {
6569 btf_var(vt)->linkage = BTF_VAR_GLOBAL_ALLOCATED;
6570 vt->type = int_btf_id;
6571 }
6572 vs->offset = off;
6573 vs->size = sizeof(int);
6574 }
6575 sec->size = off;
6576 }
6577
6578 if (kcfg_sec) {
6579 sec = kcfg_sec;
6580 /* for kcfg externs calculate their offsets within a .kconfig map */
6581 off = 0;
6582 for (i = 0; i < obj->nr_extern; i++) {
6583 ext = &obj->externs[i];
6584 if (ext->type != EXT_KCFG)
6585 continue;
6586
6587 ext->kcfg.data_off = roundup(off, ext->kcfg.align);
6588 off = ext->kcfg.data_off + ext->kcfg.sz;
6589 pr_debug("extern (kcfg) #%d: symbol %d, off %u, name %s\n",
6590 i, ext->sym_idx, ext->kcfg.data_off, ext->name);
6591 }
6592 sec->size = off;
6593 n = btf_vlen(sec);
6594 for (i = 0; i < n; i++) {
6595 vs = btf_var_secinfos(sec) + i;
6596 t = btf_type_by_id(obj->btf, vs->type);
6597 ext_name = btf_str_by_offset(obj->btf, t->name_off);
6598
6599 ext = find_extern_by_name(obj, ext_name);
6600 if (!ext) {
6601 pr_warn("failed to find extern definition for BTF var '%s'\n",
6602 ext_name);
6603 return -ESRCH;
6604 }
6605 btf_var(t)->linkage = BTF_VAR_GLOBAL_ALLOCATED;
6606 vs->offset = ext->kcfg.data_off;
6607 }
6608 }
6609 return 0;
6610 }
6611
--
0-DAY CI Kernel Test Service
https://github.com/intel/lkp-tests/wiki
^ permalink raw reply [flat|nested] 30+ messages in thread
* [PATCH 10/14] bpf: Implement BTF fixup functionality
2025-01-09 21:43 [POC][RFC][PATCH] bpf: in-kernel bpf relocations on raw elf files Blaise Boscaccy
` (8 preceding siblings ...)
2025-01-09 21:43 ` [PATCH 09/14] bpf: Collect extern relocations Blaise Boscaccy
@ 2025-01-09 21:43 ` Blaise Boscaccy
2025-01-11 3:19 ` kernel test robot
2025-01-09 21:43 ` [PATCH 11/14] bpf: Implement relocation collection Blaise Boscaccy
` (5 subsequent siblings)
15 siblings, 1 reply; 30+ messages in thread
From: Blaise Boscaccy @ 2025-01-09 21:43 UTC (permalink / raw)
To: bpf; +Cc: nkapron, teknoraver, roberto.sassu, gregkh, paul, code, flaniel
This code heavily borrows from bpf_object_fixup_btf. There are certain
things that clang doesn't quite handle properly for our needs, mostly
related to zeroed sizes and offsets.
Signed-off-by: Blaise Boscaccy <bboscaccy@linux.microsoft.com>
---
kernel/bpf/syscall.c | 189 +++++++++++++++++++++++++++++++++++++++++++
1 file changed, 189 insertions(+)
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 51b14cb9c4ca1..f47e95c1ab975 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -6578,6 +6578,191 @@ static int collect_externs(struct bpf_obj *obj)
return 0;
}
+static int compare_vsi_off(const void *_a, const void *_b)
+{
+ const struct btf_var_secinfo *a = _a;
+ const struct btf_var_secinfo *b = _b;
+
+ return a->offset - b->offset;
+}
+
+static Elf_Shdr *elf_sec_by_name(const struct bpf_obj *obj, const char *name)
+{
+ unsigned int i;
+ Elf_Shdr *shdr;
+
+ for (i = 1; i < obj->hdr->e_shnum; i++) {
+ shdr = &obj->sechdrs[i];
+ if (strcmp(name, obj->secstrings + shdr->sh_name) == 0)
+ return shdr;
+ }
+ return NULL;
+}
+
+static int find_elf_sec_sz(const struct bpf_obj *obj, const char *name, u32 *size)
+{
+ Elf_Shdr *scn;
+
+ if (!name)
+ return -EINVAL;
+
+ scn = elf_sec_by_name(obj, name);
+ if (scn) {
+ *size = scn->sh_size;
+ return 0;
+ }
+
+ return -ENOENT;
+}
+
+static Elf64_Sym *find_elf_var_sym(const struct bpf_obj *obj, const char *name)
+{
+ unsigned int i;
+ Elf_Shdr *symsec = &obj->sechdrs[obj->index.sym];
+ Elf_Sym *sym = (void *)obj->hdr + symsec->sh_offset;
+
+ for (i = 1; i < symsec->sh_size / sizeof(Elf_Sym); i++) {
+ if (ELF64_ST_TYPE(sym[i].st_info) != STT_OBJECT)
+ continue;
+
+ if (ELF64_ST_BIND(sym[i].st_info) != STB_GLOBAL &&
+ ELF64_ST_BIND(sym[i].st_info) != STB_WEAK)
+ continue;
+
+ if (strcmp(name, obj->strtab + sym[i].st_name) == 0)
+ return &sym[i];
+
+ }
+ return ERR_PTR(-ENOENT);
+}
+
+#define ELF64_ST_VISIBILITY(o) ((o) & 0x03)
+
+/* Symbol visibility specification encoded in the st_other field. */
+#define STV_DEFAULT 0 /* Default symbol visibility rules */
+#define STV_INTERNAL 1 /* Processor specific hidden class */
+#define STV_HIDDEN 2 /* Sym unavailable in other modules */
+#define STV_PROTECTED 3 /* Not preemptible, not exported */
+
+static int btf_fixup_datasec(struct bpf_obj *obj, struct btf *btf,
+ struct btf_type *t)
+{
+ __u32 size = 0, i, vars = btf_vlen(t);
+ const char *sec_name = btf_str_by_offset(btf, t->name_off);
+ struct btf_var_secinfo *vsi;
+ bool fixup_offsets = false;
+ int err;
+
+ if (!sec_name) {
+ pr_debug("No name found in string section for DATASEC kind.\n");
+ return -ENOENT;
+ }
+
+ /* Extern-backing datasecs (.ksyms, .kconfig) have their size and
+ * variable offsets set at the previous step. Further, not every
+ * extern BTF VAR has corresponding ELF symbol preserved, so we skip
+ * all fixups altogether for such sections and go straight to sorting
+ * VARs within their DATASEC.
+ */
+ if (strcmp(sec_name, ".kconfig") == 0 || strcmp(sec_name, ".ksyms") == 0)
+ goto sort_vars;
+
+ /* Clang leaves DATASEC size and VAR offsets as zeroes, so we need to
+ * fix this up. But BPF static linker already fixes this up and fills
+ * all the sizes and offsets during static linking. So this step has
+ * to be optional. But the STV_HIDDEN handling is non-optional for any
+ * non-extern DATASEC, so the variable fixup loop below handles both
+ * functions at the same time, paying the cost of BTF VAR <-> ELF
+ * symbol matching just once.
+ */
+ if (t->size == 0) {
+ err = find_elf_sec_sz(obj, sec_name, &size);
+ if (err || !size) {
+ pr_debug("sec '%s': failed to determine size from ELF: size %u, err %d\n",
+ sec_name, size, err);
+ return -ENOENT;
+ }
+
+ t->size = size;
+ fixup_offsets = true;
+ }
+
+ for (i = 0, vsi = btf_var_secinfos(t); i < vars; i++, vsi++) {
+ const struct btf_type *t_var;
+ struct btf_var *var;
+ const char *var_name;
+ Elf64_Sym *sym;
+
+ t_var = btf_type_by_id(btf, vsi->type);
+ if (!t_var || !(btf_kind(t_var) == BTF_KIND_VAR)) {
+ pr_debug("sec '%s': unexpected non-VAR type found\n", sec_name);
+ return -EINVAL;
+ }
+
+ var = btf_var(t_var);
+ if (var->linkage == BTF_VAR_STATIC || var->linkage == BTF_VAR_GLOBAL_EXTERN)
+ continue;
+
+ var_name = btf_str_by_offset(btf, t_var->name_off);
+ if (!var_name) {
+ pr_debug("sec '%s': failed to find name of DATASEC's member #%d\n",
+ sec_name, i);
+ return -ENOENT;
+ }
+
+ sym = find_elf_var_sym(obj, var_name);
+ if (IS_ERR(sym)) {
+ pr_debug("sec '%s': failed to find ELF symbol for VAR '%s'\n",
+ sec_name, var_name);
+ return -ENOENT;
+ }
+
+ if (fixup_offsets)
+ vsi->offset = sym->st_value;
+
+ /* if variable is a global/weak symbol, but has restricted
+ * (STV_HIDDEN or STV_INTERNAL) visibility, mark its BTF VAR
+ * as static. This follows similar logic for functions (BPF
+ * subprogs) and influences libbpf's further decisions about
+ * whether to make global data BPF array maps as
+ * BPF_F_MMAPABLE.
+ */
+ if (ELF64_ST_VISIBILITY(sym->st_other) == STV_HIDDEN
+ || ELF64_ST_VISIBILITY(sym->st_other) == STV_INTERNAL)
+ var->linkage = BTF_VAR_STATIC;
+ }
+
+sort_vars:
+ sort(btf_var_secinfos(t), vars, sizeof(*vsi), compare_vsi_off, NULL);
+ return 0;
+}
+
+static int fixup_btf(struct bpf_obj *obj)
+{
+ int i, n, err = 0;
+
+ if (!obj->btf)
+ return 0;
+
+ n = btf_type_cnt(obj->btf);
+ for (i = 1; i < n; i++) {
+ struct btf_type *t = (struct btf_type *)btf_type_by_id(obj->btf, i);
+
+ /* Loader needs to fix up some of the things compiler
+ * couldn't get its hands on while emitting BTF. This
+ * is section size and global variable offset. We use
+ * the info from the ELF itself for this purpose.
+ */
+ if (btf_kind(t) == BTF_KIND_DATASEC) {
+ err = btf_fixup_datasec(obj, obj->btf, t);
+ if (err)
+ return err;
+ }
+ }
+
+ return 0;
+}
+
static void free_bpf_obj(struct bpf_obj *obj)
{
int i;
@@ -6817,6 +7002,10 @@ static int load_fd(union bpf_attr *attr)
if (err < 0)
goto free;
+ err = fixup_btf(obj);
+ if (err < 0)
+ goto free;
+
return obj_f;
free:
free_bpf_obj(obj);
--
2.47.1
^ permalink raw reply related [flat|nested] 30+ messages in thread* Re: [PATCH 10/14] bpf: Implement BTF fixup functionality
2025-01-09 21:43 ` [PATCH 10/14] bpf: Implement BTF fixup functionality Blaise Boscaccy
@ 2025-01-11 3:19 ` kernel test robot
0 siblings, 0 replies; 30+ messages in thread
From: kernel test robot @ 2025-01-11 3:19 UTC (permalink / raw)
To: Blaise Boscaccy, bpf
Cc: llvm, oe-kbuild-all, nkapron, teknoraver, roberto.sassu, gregkh,
paul, code, flaniel
Hi Blaise,
kernel test robot noticed the following build errors:
[auto build test ERROR on bpf/master]
[also build test ERROR on linus/master v6.13-rc6]
[cannot apply to bpf-next/master next-20250110]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch#_base_tree_information]
url: https://github.com/intel-lab-lkp/linux/commits/Blaise-Boscaccy/bpf-Add-data-structures-for-managing-in-kernel-eBPF-relocations/20250110-064354
base: https://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf.git master
patch link: https://lore.kernel.org/r/20250109214617.485144-11-bboscaccy%40linux.microsoft.com
patch subject: [PATCH 10/14] bpf: Implement BTF fixup functionality
config: i386-buildonly-randconfig-006-20250111 (https://download.01.org/0day-ci/archive/20250111/202501111043.1XoiVhsx-lkp@intel.com/config)
compiler: clang version 19.1.3 (https://github.com/llvm/llvm-project ab51eccf88f5321e7c60591c5546b254b6afab99)
reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20250111/202501111043.1XoiVhsx-lkp@intel.com/reproduce)
If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <lkp@intel.com>
| Closes: https://lore.kernel.org/oe-kbuild-all/202501111043.1XoiVhsx-lkp@intel.com/
All errors (new ones prefixed by >>):
kernel/bpf/syscall.c:6438:22: error: incompatible pointer types passing 'Elf32_Sym *' (aka 'struct elf32_sym *') to parameter of type 'const Elf64_Sym *' (aka 'const struct elf64_sym *') [-Werror,-Wincompatible-pointer-types]
6438 | if (!sym_is_extern(&sym[i]))
| ^~~~~~~
kernel/bpf/syscall.c:6082:44: note: passing argument to parameter 'sym' here
6082 | static bool sym_is_extern(const Elf64_Sym *sym)
| ^
kernel/bpf/syscall.c:6463:20: error: call to undeclared function 'bpf_core_essential_name_len'; ISO C99 and later do not support implicit function declarations [-Wimplicit-function-declaration]
6463 | ext_essent_len = bpf_core_essential_name_len(ext->name);
| ^
>> kernel/bpf/syscall.c:6664:11: error: incompatible pointer types returning 'Elf32_Sym *' (aka 'struct elf32_sym *') from a function with result type 'Elf64_Sym *' (aka 'struct elf64_sym *') [-Werror,-Wincompatible-pointer-types]
6664 | return &sym[i];
| ^~~~~~~
kernel/bpf/syscall.c:7286:30: warning: bitwise operation between different enumeration types ('enum bpf_arg_type' and 'enum bpf_type_flag') [-Wenum-enum-conversion]
7286 | .arg2_type = ARG_PTR_TO_MEM | MEM_RDONLY,
| ~~~~~~~~~~~~~~ ^ ~~~~~~~~~~
kernel/bpf/syscall.c:7336:41: warning: bitwise operation between different enumeration types ('enum bpf_arg_type' and 'enum bpf_type_flag') [-Wenum-enum-conversion]
7336 | .arg4_type = ARG_PTR_TO_FIXED_SIZE_MEM | MEM_UNINIT | MEM_WRITE | MEM_ALIGNED,
| ~~~~~~~~~~~~~~~~~~~~~~~~~ ^ ~~~~~~~~~~
2 warnings and 3 errors generated.
vim +6664 kernel/bpf/syscall.c
6648
6649 static Elf64_Sym *find_elf_var_sym(const struct bpf_obj *obj, const char *name)
6650 {
6651 unsigned int i;
6652 Elf_Shdr *symsec = &obj->sechdrs[obj->index.sym];
6653 Elf_Sym *sym = (void *)obj->hdr + symsec->sh_offset;
6654
6655 for (i = 1; i < symsec->sh_size / sizeof(Elf_Sym); i++) {
6656 if (ELF64_ST_TYPE(sym[i].st_info) != STT_OBJECT)
6657 continue;
6658
6659 if (ELF64_ST_BIND(sym[i].st_info) != STB_GLOBAL &&
6660 ELF64_ST_BIND(sym[i].st_info) != STB_WEAK)
6661 continue;
6662
6663 if (strcmp(name, obj->strtab + sym[i].st_name) == 0)
> 6664 return &sym[i];
6665
6666 }
6667 return ERR_PTR(-ENOENT);
6668 }
6669
--
0-DAY CI Kernel Test Service
https://github.com/intel/lkp-tests/wiki
^ permalink raw reply [flat|nested] 30+ messages in thread
* [PATCH 11/14] bpf: Implement relocation collection
2025-01-09 21:43 [POC][RFC][PATCH] bpf: in-kernel bpf relocations on raw elf files Blaise Boscaccy
` (9 preceding siblings ...)
2025-01-09 21:43 ` [PATCH 10/14] bpf: Implement BTF fixup functionality Blaise Boscaccy
@ 2025-01-09 21:43 ` Blaise Boscaccy
2025-01-09 21:43 ` [PATCH 12/14] bpf: Resolve external relocations Blaise Boscaccy
` (4 subsequent siblings)
15 siblings, 0 replies; 30+ messages in thread
From: Blaise Boscaccy @ 2025-01-09 21:43 UTC (permalink / raw)
To: bpf; +Cc: nkapron, teknoraver, roberto.sassu, gregkh, paul, code, flaniel
This code heavily borrows from bpf_program__record_reloc from
libbpf. This symbol parse is primarily responsible for identifying
subprogram and call instructions that need to be
relocated. Additionally map relocations are discovered in this parse
as well.
Signed-off-by: Blaise Boscaccy <bboscaccy@linux.microsoft.com>
---
kernel/bpf/syscall.c | 308 +++++++++++++++++++++++++++++++++++++++++++
1 file changed, 308 insertions(+)
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index f47e95c1ab975..9c3d037cd6b95 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -6763,6 +6763,310 @@ static int fixup_btf(struct bpf_obj *obj)
return 0;
}
+static bool insn_is_subprog_call(const struct bpf_insn *insn)
+{
+ return BPF_CLASS(insn->code) == BPF_JMP &&
+ BPF_OP(insn->code) == BPF_CALL &&
+ BPF_SRC(insn->code) == BPF_K &&
+ insn->src_reg == BPF_PSEUDO_CALL &&
+ insn->dst_reg == 0 &&
+ insn->off == 0;
+}
+
+static bool is_call_insn(const struct bpf_insn *insn)
+{
+ return insn->code == (BPF_JMP | BPF_CALL);
+}
+
+static inline bool is_ldimm64_insn(struct bpf_insn *insn)
+{
+ return insn->code == (BPF_LD | BPF_IMM | BPF_DW);
+}
+
+static bool insn_is_pseudo_func(struct bpf_insn *insn)
+{
+ return is_ldimm64_insn(insn) && insn->src_reg == BPF_PSEUDO_FUNC;
+}
+
+static bool sym_is_subprog(const Elf64_Sym *sym, int text_shndx)
+{
+ int bind = ELF64_ST_BIND(sym->st_info);
+ int type = ELF64_ST_TYPE(sym->st_info);
+
+ /* in .text section */
+ if (sym->st_shndx != text_shndx)
+ return false;
+
+ /* local function */
+ if (bind == STB_LOCAL && type == STT_SECTION)
+ return true;
+
+ /* global function */
+ return bind == STB_GLOBAL && type == STT_FUNC;
+}
+
+static bool prog_contains_insn(const struct bpf_prog_obj *prog, size_t insn_idx)
+{
+ return insn_idx >= prog->sec_insn_off &&
+ insn_idx < prog->sec_insn_off + prog->sec_insn_cnt;
+}
+
+static struct bpf_prog_obj *find_prog_by_sec_insn(const struct bpf_obj *obj,
+ size_t sec_idx, size_t insn_idx)
+{
+ int l = 0, r = obj->nr_programs - 1, m;
+ struct bpf_prog_obj *prog;
+
+ if (!obj->nr_programs)
+ return NULL;
+
+ while (l < r) {
+ m = l + (r - l + 1) / 2;
+ prog = &obj->progs[m];
+
+ if (prog->sec_idx < sec_idx ||
+ (prog->sec_idx == sec_idx && prog->sec_insn_off <= insn_idx))
+ l = m;
+ else
+ r = m - 1;
+ }
+ /* matching program could be at index l, but it still might be the
+ * wrong one, so we need to double check conditions for the last time
+ */
+ prog = &obj->progs[l];
+ if (prog->sec_idx == sec_idx && prog_contains_insn(prog, insn_idx))
+ return prog;
+ return NULL;
+}
+
+static enum libbpf_map_type section_to_libbpf_map_type(struct bpf_obj *obj, int sec_idx)
+{
+ Elf_Shdr *shdr = &obj->sechdrs[sec_idx];
+
+ if (strcmp(".data", obj->secstrings + shdr->sh_name) == 0)
+ return LIBBPF_MAP_DATA;
+
+ if (str_has_prefix(obj->secstrings + shdr->sh_name, ".rodata"))
+ return LIBBPF_MAP_RODATA;
+
+ if (str_has_prefix(obj->secstrings + shdr->sh_name, ".bss"))
+ return LIBBPF_MAP_BSS;
+
+ return LIBBPF_MAP_UNSPEC;
+}
+
+static int program_record_reloc(struct bpf_obj *obj,
+ struct bpf_prog_obj *prog,
+ struct bpf_reloc_desc *reloc_desc,
+ u32 insn_idx, const char *sym_name,
+ const Elf64_Sym *sym, const Elf64_Rel *rel)
+{
+ struct bpf_insn *insn = &prog->insn[insn_idx];
+ size_t map_idx, nr_maps = obj->nr_maps;
+ u32 shdr_idx = sym->st_shndx;
+ enum libbpf_map_type type;
+ struct bpf_map_obj *map;
+
+ if (!is_call_insn(insn) && !is_ldimm64_insn(insn)) {
+ pr_warn("prog '%s': invalid relo against '%s' for insns[%d].code 0x%x\n",
+ prog->name, sym_name, insn_idx, insn->code);
+ return -EOPNOTSUPP;
+ }
+
+ if (sym_is_extern(sym)) {
+ int sym_idx = ELF64_R_SYM(rel->r_info);
+ int i, n = obj->nr_extern;
+ struct bpf_extern_desc *ext;
+
+ for (i = 0; i < n; i++) {
+ ext = &obj->externs[i];
+ if (ext->sym_idx == sym_idx)
+ break;
+ }
+ if (i >= n) {
+ pr_warn("prog '%s': extern relo failed to find extern for '%s' (%d)\n",
+ prog->name, sym_name, sym_idx);
+ return -EOPNOTSUPP;
+ }
+ pr_debug("prog '%s': found extern #%d '%s' (sym %d) for insn #%u\n",
+ prog->name, i, ext->name, ext->sym_idx, insn_idx);
+ if (insn->code == (BPF_JMP | BPF_CALL))
+ reloc_desc->type = RELO_EXTERN_CALL;
+ else
+ reloc_desc->type = RELO_EXTERN_LD64;
+ reloc_desc->insn_idx = insn_idx;
+ reloc_desc->ext_idx = i;
+ return 0;
+ }
+
+ /* sub-program call relocation */
+ if (is_call_insn(insn)) {
+ if (insn->src_reg != BPF_PSEUDO_CALL) {
+ pr_warn("prog '%s': incorrect bpf_call opcode\n", prog->name);
+ return -EOPNOTSUPP;
+ }
+ /* text_shndx can be 0, if no default "main" program exists */
+ if (!shdr_idx || shdr_idx != obj->index.text)
+ return -EOPNOTSUPP;
+
+ if (sym->st_value % sizeof(struct bpf_insn)) {
+ pr_warn("prog '%s': bad call relo against '%s' at offset %zu\n",
+ prog->name, sym_name, (size_t)sym->st_value);
+ return -EOPNOTSUPP;
+ }
+ reloc_desc->type = RELO_CALL;
+ reloc_desc->insn_idx = insn_idx;
+ reloc_desc->sym_off = sym->st_value;
+ return 0;
+ }
+
+ if (!shdr_idx || shdr_idx >= SHN_LORESERVE) {
+ pr_warn("prog '%s': invalid relo against '%s' in special section 0x%x; forgot to initialize global var?..\n",
+ prog->name, sym_name, shdr_idx);
+ return -EOPNOTSUPP;
+ }
+
+ /* loading subprog addresses */
+ if (sym_is_subprog(sym, obj->index.text)) {
+ /* global_func: sym->st_value = offset in the section, insn->imm = 0.
+ * local_func: sym->st_value = 0, insn->imm = offset in the section.
+ */
+ if ((sym->st_value % sizeof(struct bpf_insn)) ||
+ (insn->imm % sizeof(struct bpf_insn))) {
+ pr_warn("prog '%s': bad subprog addr relo against '%s' at offset %zu+%d\n",
+ prog->name, sym_name, (size_t)sym->st_value, insn->imm);
+ return -EOPNOTSUPP;
+ }
+ reloc_desc->type = RELO_SUBPROG_ADDR;
+ reloc_desc->insn_idx = insn_idx;
+ reloc_desc->sym_off = sym->st_value;
+ return 0;
+ }
+
+
+ type = section_to_libbpf_map_type(obj, shdr_idx);
+
+ if (shdr_idx == obj->index.arena) {
+ reloc_desc->type = RELO_DATA;
+ reloc_desc->insn_idx = insn_idx;
+ reloc_desc->map_idx = obj->arena_map_idx;
+ reloc_desc->sym_off = sym->st_value;
+ return 0;
+ }
+
+ /* generic map reference relocation */
+ if (type == LIBBPF_MAP_UNSPEC) {
+ for (map_idx = 0; map_idx < nr_maps; map_idx++) {
+ map = &obj->maps[map_idx];
+ if (map->map_type != type ||
+ map->sec_idx != sym->st_shndx ||
+ map->sec_offset != sym->st_value)
+ continue;
+ pr_debug("prog '%s': found map %zd (sec %d, off %d) for insn #%u\n",
+ prog->name, map_idx, map->sec_idx,
+ map->sec_offset, insn_idx);
+ break;
+ }
+ if (map_idx >= nr_maps) {
+ pr_warn("prog '%s': map relo failed to find map for section off %lu\n",
+ prog->name, (size_t)sym->st_value);
+ return -EOPNOTSUPP;
+ }
+ reloc_desc->type = RELO_LD64;
+ reloc_desc->insn_idx = insn_idx;
+ reloc_desc->map_idx = map_idx;
+ reloc_desc->sym_off = 0; /* sym->st_value determines map_idx */
+ return 0;
+ }
+
+ for (map_idx = 0; map_idx < nr_maps; map_idx++) {
+ map = &obj->maps[map_idx];
+ if (map->map_type != type || map->sec_idx != sym->st_shndx)
+ continue;
+ pr_debug("prog '%s': found data map %zd (sec %d, off %u) for insn %u\n",
+ prog->name, map_idx, map->sec_idx,
+ map->sec_offset, insn_idx);
+ break;
+ }
+ if (map_idx >= nr_maps) {
+ pr_warn("prog '%s': data relo failed to find map for section (%lu:%lu)\n",
+ prog->name, map_idx, nr_maps);
+ return -EOPNOTSUPP;
+ }
+
+ reloc_desc->type = RELO_DATA;
+ reloc_desc->insn_idx = insn_idx;
+ reloc_desc->map_idx = map_idx;
+ reloc_desc->sym_off = sym->st_value;
+ return 0;
+}
+
+static int collect_prog_relocs(struct bpf_obj *obj, Elf64_Shdr *shdr, unsigned int shdr_idx)
+{
+ unsigned int i, nrels, sym_idx, insn_idx;
+ size_t sec_idx = shdr->sh_info;
+ int err;
+ struct bpf_prog_obj *prog;
+ Elf64_Rel *rel = (void *)obj->hdr + shdr->sh_offset;
+
+ Elf_Shdr *symsec = &obj->sechdrs[obj->index.sym];
+ Elf_Sym *sym = (void *)obj->hdr + symsec->sh_offset;
+ const char *sym_name;
+
+ nrels = shdr->sh_size / shdr->sh_entsize;
+
+ for (i = 0; i < nrels; i++) {
+ sym_idx = ELF64_R_SYM(rel[i].r_info);
+ insn_idx = rel[i].r_offset / sizeof(struct bpf_insn);
+
+ sym_name = obj->strtab + sym[sym_idx].st_name;
+ prog = find_prog_by_sec_insn(obj, sec_idx, insn_idx);
+ if (!prog)
+ continue;
+
+ prog->reloc_desc = krealloc_array(prog->reloc_desc,
+ prog->nr_reloc + 1,
+ sizeof(struct bpf_reloc_desc),
+ GFP_KERNEL);
+ if (!prog->reloc_desc)
+ return -ENOMEM;
+
+ err = program_record_reloc(obj,
+ prog,
+ &prog->reloc_desc[prog->nr_reloc],
+ insn_idx,
+ sym_name,
+ &sym[sym_idx],
+ &rel[i]);
+
+ if (err)
+ return err;
+
+ prog->nr_reloc++;
+
+ }
+ return 0;
+}
+
+static int collect_relos(struct bpf_obj *obj)
+{
+ unsigned int i;
+ Elf_Shdr *shdr;
+ int err;
+
+ for (i = 1; i < obj->hdr->e_shnum; i++) {
+ shdr = &obj->sechdrs[i];
+ if (shdr->sh_type != SHT_REL)
+ continue;
+ if (i != obj->index.btf && i != obj->index.btf_ext) {
+ err = collect_prog_relocs(obj, shdr, i);
+ if (err)
+ return err;
+ }
+ }
+ return 0;
+}
+
static void free_bpf_obj(struct bpf_obj *obj)
{
int i;
@@ -7006,6 +7310,10 @@ static int load_fd(union bpf_attr *attr)
if (err < 0)
goto free;
+ err = collect_relos(obj);
+ if (err < 0)
+ goto free;
+
return obj_f;
free:
free_bpf_obj(obj);
--
2.47.1
^ permalink raw reply related [flat|nested] 30+ messages in thread* [PATCH 12/14] bpf: Resolve external relocations
2025-01-09 21:43 [POC][RFC][PATCH] bpf: in-kernel bpf relocations on raw elf files Blaise Boscaccy
` (10 preceding siblings ...)
2025-01-09 21:43 ` [PATCH 11/14] bpf: Implement relocation collection Blaise Boscaccy
@ 2025-01-09 21:43 ` Blaise Boscaccy
2025-01-09 21:43 ` [PATCH 13/14] bpf: Apply in-kernel bpf instruction relocations Blaise Boscaccy
` (3 subsequent siblings)
15 siblings, 0 replies; 30+ messages in thread
From: Blaise Boscaccy @ 2025-01-09 21:43 UTC (permalink / raw)
To: bpf; +Cc: nkapron, teknoraver, roberto.sassu, gregkh, paul, code, flaniel
Here we attempt to assign addresses to relocations that target
external symbols. This code heavily borrows from
bpf_object__resolve_externs with a few key differences.
Since we are already in the kernel, for kallsyms based relocations, we
simply look them up. For btf based relocations, we consult the
kernel's btf information. There is a key difference in the handling
kconfig based relocations though. Here, we rely upon the userspace
kconfig map data and simply use the values that are passed in.
Signed-off-by: Blaise Boscaccy <bboscaccy@linux.microsoft.com>
---
kernel/bpf/syscall.c | 174 +++++++++++++++++++++++++++++++++++++++++++
1 file changed, 174 insertions(+)
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 9c3d037cd6b95..b766c790ae3f4 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -7067,6 +7067,176 @@ static int collect_relos(struct bpf_obj *obj)
return 0;
}
+static int find_ksym_btf_id(struct bpf_obj *obj, const char *ksym_name, u16 kind,
+ struct btf **res_btf,
+ struct bpf_module_btf **res_mod_btf)
+{
+ struct bpf_module_btf *mod_btf;
+ struct btf *btf;
+ int i, id;
+
+ btf = obj->btf_vmlinux;
+ mod_btf = NULL;
+ id = btf_find_by_name_kind(btf, ksym_name, kind);
+ if (id == -ENOENT) {
+ for (i = 0; i < obj->btf_modules_cnt; i++) {
+ /* we assume module_btf's BTF FD is always >0 */
+ mod_btf = &obj->btf_modules[i];
+ btf = mod_btf->btf;
+ id = btf_find_by_name_kind(btf, ksym_name, kind);
+ if (id != -ENOENT)
+ break;
+ }
+ }
+ if (id <= 0)
+ return -ESRCH;
+
+ *res_btf = btf;
+ *res_mod_btf = mod_btf;
+ return id;
+}
+
+static int resolve_ksym_var_btf_id(struct bpf_obj *obj, struct bpf_extern_desc *ext)
+{
+ const struct btf_type *targ_var, *targ_type;
+ u32 targ_type_id, local_type_id;
+ struct bpf_module_btf *mod_btf = NULL;
+ const char *targ_var_name;
+ struct btf *btf = NULL;
+ int id, err;
+
+ id = find_ksym_btf_id(obj, ext->name, BTF_KIND_VAR, &btf, &mod_btf);
+ if (id < 0) {
+ if (id == -ESRCH && ext->is_weak)
+ return 0;
+ pr_warn("extern (var ksym) '%s': not found in kernel BTF\n",
+ ext->name);
+ return id;
+ }
+
+ /* find local type_id */
+ local_type_id = ext->ksym.type_id;
+
+ /* find target type_id */
+ targ_var = btf_type_by_id(btf, id);
+ targ_var_name = btf_str_by_offset(btf, targ_var->name_off);
+ targ_type = skip_mods_and_typedefs(btf, targ_var->type, &targ_type_id);
+
+ err = bpf_core_types_are_compat(obj->btf, local_type_id,
+ btf, targ_type_id);
+ if (err <= 0) {
+ pr_warn("extern (var ksym) '%s': incompatible types\n", ext->name);
+ return -EINVAL;
+ }
+
+ ext->is_set = true;
+ ext->ksym.kernel_btf_obj_fd = mod_btf ? mod_btf->fd : 0;
+ ext->ksym.kernel_btf_id = id;
+
+ return 0;
+}
+
+static int resolve_ksym_func_btf_id(struct bpf_obj *obj, struct bpf_extern_desc *ext)
+{
+ int local_func_proto_id, kfunc_proto_id, kfunc_id;
+ struct bpf_module_btf *mod_btf = NULL;
+ const struct btf_type *kern_func;
+ struct btf *kern_btf = NULL;
+ int ret;
+
+ local_func_proto_id = ext->ksym.type_id;
+
+ kfunc_id = find_ksym_btf_id(obj, ext->essent_name ?: ext->name, BTF_KIND_FUNC, &kern_btf,
+ &mod_btf);
+ if (kfunc_id < 0) {
+ if (kfunc_id == -ESRCH && ext->is_weak)
+ return 0;
+ pr_warn("extern (func ksym) '%s': not found in kernel or module BTFs\n",
+ ext->name);
+ return kfunc_id;
+ }
+
+ kern_func = btf_type_by_id(kern_btf, kfunc_id);
+ kfunc_proto_id = kern_func->type;
+
+ ret = bpf_core_types_are_compat(obj->btf, local_func_proto_id,
+ kern_btf, kfunc_proto_id);
+ if (ret <= 0) {
+ if (ext->is_weak)
+ return 0;
+
+ pr_warn("extern (func ksym) '%s': func_proto [%d] incompatible with [%d]\n",
+ ext->name, local_func_proto_id,
+ kfunc_proto_id);
+ return -EINVAL;
+ }
+
+ ext->is_set = true;
+ ext->ksym.kernel_btf_id = kfunc_id;
+ ext->ksym.btf_fd_idx = mod_btf ? mod_btf->fd_array_idx : 0;
+
+ /* Also set kernel_btf_obj_fd to make sure that bpf_object__relocate_data()
+ * populates FD into ld_imm64 insn when it's used to point to kfunc.
+ * {kernel_btf_id, btf_fd_idx} -> fixup bpf_call.
+ * {kernel_btf_id, kernel_btf_obj_fd} -> fixup ld_imm64.
+ */
+ ext->ksym.kernel_btf_obj_fd = mod_btf ? mod_btf->fd : 0;
+
+ return 0;
+}
+
+static int resolve_externs(struct bpf_obj *obj)
+{
+ struct bpf_extern_desc *ext;
+ int err, i;
+ const struct btf_type *t;
+ unsigned long addr;
+
+ for (i = 0; i < obj->nr_extern; i++) {
+ ext = &obj->externs[i];
+
+ if (ext->type == EXT_KSYM) {
+ if (ext->ksym.type_id) {
+ t = btf_type_by_id(obj->btf, ext->btf_id);
+ if (btf_kind(t) == BTF_KIND_VAR)
+ err = resolve_ksym_var_btf_id(obj, ext);
+ else
+ err = resolve_ksym_func_btf_id(obj, ext);
+ if (err)
+ return err;
+ } else {
+ addr = kallsyms_lookup_name(ext->name);
+ if (addr > 0) {
+ ext->is_set = true;
+ ext->ksym.addr = addr;
+ }
+ }
+ } else if (ext->type == EXT_KCFG) {
+ pr_debug("extern (kcfg) '%s': loading from offset %d\n",
+ ext->name, ext->kcfg.data_off);
+ ext->is_set = true;
+ } else {
+ pr_warn("extern '%s': unrecognized extern kind\n", ext->name);
+ return -EINVAL;
+ }
+ }
+
+ for (i = 0; i < obj->nr_extern; i++) {
+ ext = &obj->externs[i];
+
+ if (!ext->is_set && !ext->is_weak) {
+ pr_warn("extern '%s' (strong): not resolved\n", ext->name);
+ return -ESRCH;
+ } else if (!ext->is_set) {
+ pr_debug("extern '%s' (weak): not resolved, defaulting to zero\n",
+ ext->name);
+ }
+ }
+
+
+ return 0;
+}
+
static void free_bpf_obj(struct bpf_obj *obj)
{
int i;
@@ -7314,6 +7484,10 @@ static int load_fd(union bpf_attr *attr)
if (err < 0)
goto free;
+ err = resolve_externs(obj);
+ if (err < 0)
+ goto free;
+
return obj_f;
free:
free_bpf_obj(obj);
--
2.47.1
^ permalink raw reply related [flat|nested] 30+ messages in thread* [PATCH 13/14] bpf: Apply in-kernel bpf instruction relocations
2025-01-09 21:43 [POC][RFC][PATCH] bpf: in-kernel bpf relocations on raw elf files Blaise Boscaccy
` (11 preceding siblings ...)
2025-01-09 21:43 ` [PATCH 12/14] bpf: Resolve external relocations Blaise Boscaccy
@ 2025-01-09 21:43 ` Blaise Boscaccy
2025-01-09 21:43 ` [PATCH 14/14] bpf: Augment BPF_PROG_LOAD to use in-kernel relocations Blaise Boscaccy
` (2 subsequent siblings)
15 siblings, 0 replies; 30+ messages in thread
From: Blaise Boscaccy @ 2025-01-09 21:43 UTC (permalink / raw)
To: bpf; +Cc: nkapron, teknoraver, roberto.sassu, gregkh, paul, code, flaniel
This code heavily borrows from libbpf, in particular,
bpf_object__relocate. CO-RE relocation facilities already exist in
the kernel.
All the previously collected relocations are applied and the immediate
of the instructions are re-written accordingly. Any values
corresponding to map offsets depend upon the user-supplied map array.
Signed-off-by: Blaise Boscaccy <bboscaccy@linux.microsoft.com>
---
kernel/bpf/syscall.c | 489 +++++++++++++++++++++++++++++++++++++++++++
1 file changed, 489 insertions(+)
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index b766c790ae3f4..ea0401634e752 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -41,6 +41,8 @@
#include <net/netkit.h>
#include <net/tcx.h>
+#include "../tools/lib/bpf/relo_core.h"
+
#define IS_FD_ARRAY(map) ((map)->map_type == BPF_MAP_TYPE_PERF_EVENT_ARRAY || \
(map)->map_type == BPF_MAP_TYPE_CGROUP_ARRAY || \
(map)->map_type == BPF_MAP_TYPE_ARRAY_OF_MAPS)
@@ -7237,6 +7239,489 @@ static int resolve_externs(struct bpf_obj *obj)
return 0;
}
+static int relocate_core(struct bpf_obj *obj)
+{
+ const struct btf_ext_info_sec *sec;
+ const struct bpf_core_relo *rec;
+ const struct btf_ext_info *seg;
+ int i, insn_idx, sec_idx, sec_num;
+ struct bpf_prog_obj *prog;
+ struct bpf_insn *insn;
+ const char *sec_name;
+
+ struct bpf_core_ctx ctx = {
+ .log = NULL,
+ .btf = obj->btf,
+ };
+
+ seg = &obj->btf_ext->core_relo_info;
+ sec_num = 0;
+
+ for_each_btf_ext_sec(seg, sec) {
+ sec_idx = seg->sec_idxs[sec_num];
+ sec_num++;
+ sec_name = btf_str_by_offset(obj->btf, sec->sec_name_off);
+
+ for_each_btf_ext_rec(seg, sec, i, rec) {
+ insn_idx = rec->insn_off / sizeof(struct bpf_insn);
+ prog = find_prog_by_sec_insn(obj, sec_idx, insn_idx);
+
+ if (!prog)
+ continue;
+
+ insn_idx = insn_idx - prog->sec_insn_off;
+ if (insn_idx >= prog->insn_cnt)
+ return -EINVAL;
+ insn = &prog->insn[insn_idx];
+
+ bpf_core_apply(&ctx, rec, i, insn);
+ }
+ }
+
+ return 0;
+}
+
+static int append_subprog_relos(struct bpf_prog_obj *main_prog, struct bpf_prog_obj *subprog)
+{
+ int new_cnt = main_prog->nr_reloc + subprog->nr_reloc;
+ struct bpf_reloc_desc *relos;
+ int i;
+
+ if (main_prog == subprog)
+ return 0;
+ relos = krealloc_array(main_prog->reloc_desc, new_cnt, sizeof(*relos), GFP_KERNEL);
+ /* if new count is zero, reallocarray can return a valid NULL result;
+ * in this case the previous pointer will be freed, so we *have to*
+ * reassign old pointer to the new value (even if it's NULL)
+ */
+ if (!relos && new_cnt)
+ return -ENOMEM;
+ if (subprog->nr_reloc)
+ memcpy(relos + main_prog->nr_reloc, subprog->reloc_desc,
+ sizeof(*relos) * subprog->nr_reloc);
+
+ for (i = main_prog->nr_reloc; i < new_cnt; i++)
+ relos[i].insn_idx += subprog->sub_insn_off;
+ /* After insn_idx adjustment the 'relos' array is still sorted
+ * by insn_idx and doesn't break bsearch.
+ */
+ main_prog->reloc_desc = relos;
+ main_prog->nr_reloc = new_cnt;
+ return 0;
+}
+
+static int cmp_relo_by_insn_idx(const void *key, const void *elem)
+{
+ size_t insn_idx = *(const size_t *)key;
+ const struct bpf_reloc_desc *relo = elem;
+
+ if (insn_idx == relo->insn_idx)
+ return 0;
+ return insn_idx < relo->insn_idx ? -1 : 1;
+}
+
+static struct bpf_reloc_desc *find_prog_insn_relo(const struct bpf_prog_obj *prog, size_t insn_idx)
+{
+ if (!prog->nr_reloc)
+ return NULL;
+ return bsearch(&insn_idx, prog->reloc_desc, prog->nr_reloc,
+ sizeof(*prog->reloc_desc), cmp_relo_by_insn_idx);
+}
+
+static int append_subprog_code(struct bpf_obj *obj, struct bpf_prog_obj *main_prog,
+ struct bpf_prog_obj *subprog)
+{
+ struct bpf_insn *insns;
+ size_t new_cnt;
+ int err;
+
+ subprog->sub_insn_off = main_prog->insn_cnt;
+
+ new_cnt = main_prog->insn_cnt + subprog->insn_cnt;
+ insns = krealloc_array(main_prog->insn, new_cnt, sizeof(*insns), GFP_KERNEL);
+ if (!insns) {
+ pr_warn("prog '%s': failed to realloc prog code\n", main_prog->name);
+ return -ENOMEM;
+ }
+
+ main_prog->insn = insns;
+ main_prog->insn_cnt = new_cnt;
+
+ memcpy(main_prog->insn + subprog->sub_insn_off, subprog->insn,
+ subprog->insn_cnt * sizeof(*insns));
+
+ /* The subprog insns are now appended. Append its relos too. */
+ err = append_subprog_relos(main_prog, subprog);
+ if (err)
+ return err;
+ return 0;
+}
+
+static int reloc_code(struct bpf_obj *obj, struct bpf_prog_obj *main_prog,
+ struct bpf_prog_obj *prog)
+{
+
+ size_t sub_idx, insn_idx;
+ struct bpf_prog_obj *subprog;
+ struct bpf_reloc_desc *relo;
+ struct bpf_insn *insn;
+ int err;
+
+ for (insn_idx = 0; insn_idx < prog->sec_insn_cnt; insn_idx++) {
+ insn = &main_prog->insn[prog->sub_insn_off + insn_idx];
+ if (!insn_is_subprog_call(insn) && !insn_is_pseudo_func(insn))
+ continue;
+
+ relo = find_prog_insn_relo(prog, insn_idx);
+ if (relo && relo->type == RELO_EXTERN_CALL)
+ /* kfunc relocations will be handled later
+ * in bpf_object__relocate_data()
+ */
+ continue;
+ if (relo && relo->type != RELO_CALL && relo->type != RELO_SUBPROG_ADDR) {
+ pr_warn("prog '%s': unexpected relo for insn #%zu, type %d\n",
+ prog->name, insn_idx, relo->type);
+ return -EOPNOTSUPP;
+ }
+ if (relo) {
+ /* sub-program instruction index is a combination of
+ * an offset of a symbol pointed to by relocation and
+ * call instruction's imm field; for global functions,
+ * call always has imm = -1, but for static functions
+ * relocation is against STT_SECTION and insn->imm
+ * points to a start of a static function
+ *
+ * for subprog addr relocation, the relo->sym_off + insn->imm is
+ * the byte offset in the corresponding section.
+ */
+ if (relo->type == RELO_CALL)
+ sub_idx = relo->sym_off / sizeof(struct bpf_insn) + insn->imm + 1;
+ else
+ sub_idx = (relo->sym_off + insn->imm) / sizeof(struct bpf_insn);
+ } else if (insn_is_pseudo_func(insn)) {
+ /*
+ * RELO_SUBPROG_ADDR relo is always emitted even if both
+ * functions are in the same section, so it shouldn't reach here.
+ */
+ pr_warn("prog '%s': missing subprog addr relo for insn #%zu\n",
+ prog->name, insn_idx);
+ return -EOPNOTSUPP;
+ } else {
+ /* if subprogram call is to a static function within
+ * the same ELF section, there won't be any relocation
+ * emitted, but it also means there is no additional
+ * offset necessary, insns->imm is relative to
+ * instruction's original position within the section
+ */
+ sub_idx = prog->sec_insn_off + insn_idx + insn->imm + 1;
+ }
+
+ /* we enforce that sub-programs should be in .text section */
+ subprog = find_prog_by_sec_insn(obj, obj->index.text, sub_idx);
+ if (!subprog) {
+ pr_warn("prog '%s': no .text section found yet sub-program call exists\n",
+ prog->name);
+ return -EOPNOTSUPP;
+ }
+
+ /* if it's the first call instruction calling into this
+ * subprogram (meaning this subprog hasn't been processed
+ * yet) within the context of current main program:
+ * - append it at the end of main program's instructions blog;
+ * - process is recursively, while current program is put on hold;
+ * - if that subprogram calls some other not yet processes
+ * subprogram, same thing will happen recursively until
+ * there are no more unprocesses subprograms left to append
+ * and relocate.
+ */
+ if (subprog->sub_insn_off == 0) {
+ err = append_subprog_code(obj, main_prog, subprog);
+ if (err)
+ return err;
+ err = reloc_code(obj, main_prog, subprog);
+ if (err)
+ return err;
+ }
+
+ /* main_prog->insns memory could have been re-allocated, so
+ * calculate pointer again
+ */
+ insn = &main_prog->insn[prog->sub_insn_off + insn_idx];
+ /* calculate correct instruction position within current main
+ * prog; each main prog can have a different set of
+ * subprograms appended (potentially in different order as
+ * well), so position of any subprog can be different for
+ * different main programs
+ */
+ insn->imm = subprog->sub_insn_off - (prog->sub_insn_off + insn_idx) - 1;
+
+ pr_debug("prog '%s': insn #%zu relocated, imm %d points to subprog '%s' (now at %zu offset)\n",
+ prog->name, insn_idx, insn->imm, subprog->name, subprog->sub_insn_off);
+ }
+
+ return 0;
+}
+
+static bool prog_is_subprog(const struct bpf_obj *obj, const struct bpf_prog_obj *prog)
+{
+ return prog->sec_idx == obj->index.text && obj->nr_programs > 1;
+}
+
+static int relocate_calls(struct bpf_obj *obj, struct bpf_prog_obj *prog)
+{
+ struct bpf_prog_obj *subprog;
+ int i, err;
+
+ /* mark all subprogs as not relocated (yet) within the context of
+ * current main program
+ */
+ for (i = 0; i < obj->nr_programs; i++) {
+ subprog = &obj->progs[i];
+ if (!prog_is_subprog(obj, subprog))
+ continue;
+
+ subprog->sub_insn_off = 0;
+ }
+
+ err = reloc_code(obj, prog, prog);
+ if (err)
+ return err;
+ return 0;
+
+}
+
+/* unresolved kfunc call special constant, used also for log fixup logic */
+#define POISON_CALL_KFUNC_BASE 2002000000
+#define POISON_CALL_KFUNC_PFX "2002"
+
+static void poison_kfunc_call(struct bpf_prog_obj *prog, int relo_idx,
+ int insn_idx, struct bpf_insn *insn,
+ int ext_idx, const struct bpf_extern_desc *ext)
+{
+ pr_debug("prog '%s': relo #%d: poisoning insn #%d that calls kfunc '%s'\n",
+ prog->name, relo_idx, insn_idx, ext->name);
+
+ /* we turn kfunc call into invalid helper call with identifiable constant */
+ insn->code = BPF_JMP | BPF_CALL;
+ insn->dst_reg = 0;
+ insn->src_reg = 0;
+ insn->off = 0;
+ /* if this instruction is reachable (not a dead code),
+ * verifier will complain with something like:
+ * invalid func unknown#2001000123
+ * where lower 123 is extern index into obj->externs[] array
+ */
+ insn->imm = POISON_CALL_KFUNC_BASE + ext_idx;
+}
+
+static int relocate_data(struct bpf_obj *obj, struct bpf_prog_obj *prog)
+{
+ int i;
+
+ for (i = 0; i < prog->nr_reloc; i++) {
+ struct bpf_reloc_desc *relo = &prog->reloc_desc[i];
+ struct bpf_insn *insn = &prog->insn[relo->insn_idx];
+ const struct bpf_map_obj *map;
+ struct bpf_extern_desc *ext;
+
+ switch (relo->type) {
+ case RELO_LD64:
+ map = &obj->maps[relo->map_idx];
+ insn[0].src_reg = BPF_PSEUDO_MAP_FD;
+ insn[0].imm = map->fd;
+ break;
+ case RELO_DATA:
+ map = &obj->maps[relo->map_idx];
+ insn[0].src_reg = BPF_PSEUDO_MAP_VALUE;
+ insn[0].imm = map->fd;
+ break;
+ case RELO_EXTERN_LD64:
+ ext = &obj->externs[relo->ext_idx];
+ if (ext->type == EXT_KCFG) {
+ insn[0].src_reg = BPF_PSEUDO_MAP_VALUE;
+ insn[0].imm = obj->maps[obj->kconfig_map_idx].fd;
+ insn[1].imm = ext->kcfg.data_off;
+ } else /* EXT_KSYM */ {
+ if (ext->ksym.type_id && ext->is_set) { /* typed ksyms */
+ insn[0].src_reg = BPF_PSEUDO_BTF_ID;
+ insn[0].imm = ext->ksym.kernel_btf_id;
+ insn[1].imm = ext->ksym.kernel_btf_obj_fd;
+ } else { /* typeless ksyms or unresolved typed ksyms */
+ insn[0].imm = (__u32)ext->ksym.addr;
+ insn[1].imm = ext->ksym.addr >> 32;
+ }
+ }
+ break;
+ case RELO_EXTERN_CALL:
+ ext = &obj->externs[relo->ext_idx];
+ insn[0].src_reg = BPF_PSEUDO_KFUNC_CALL;
+ if (ext->is_set) {
+ insn[0].imm = ext->ksym.kernel_btf_id;
+ insn[0].off = ext->ksym.btf_fd_idx;
+ } else { /* unresolved weak kfunc call */
+ poison_kfunc_call(prog, i, relo->insn_idx, insn,
+ relo->ext_idx, ext);
+ }
+ break;
+ case RELO_SUBPROG_ADDR:
+ case RELO_CALL:
+ case RELO_CORE:
+ break;
+ }
+ }
+
+ return 0;
+}
+
+static int prog_assign_exc_cb(struct bpf_obj *obj, struct bpf_prog_obj *prog)
+{
+ const char *str = "exception_callback:";
+ size_t pfx_len = strlen(str);
+ int i, j, n;
+ const char *name;
+ const struct btf_type *t;
+
+ if (!obj->btf)
+ return 0;
+
+ n = btf_type_cnt(obj->btf);
+ for (i = 1; i < n; i++) {
+ t = btf_type_by_id(obj->btf, i);
+ if (!btf_is_decl_tag(t) || btf_decl_tag(t)->component_idx != -1)
+ continue;
+
+ name = btf_str_by_offset(obj->btf, t->name_off);
+ if (strncmp(name, str, pfx_len) != 0)
+ continue;
+
+ t = btf_type_by_id(obj->btf, t->type);
+ if (!btf_is_func(t) || btf_func_linkage(t) != BTF_FUNC_GLOBAL) {
+ pr_warn("prog '%s': exception_callback:<value> decl tag not applied to the main program\n",
+ prog->name);
+ return -EINVAL;
+ }
+ if (strcmp(prog->name, btf_str_by_offset(obj->btf, t->name_off)) != 0)
+ continue;
+ /* Multiple callbacks are specified for the same prog,
+ * the verifier will eventually return an error for this
+ * case, hence simply skip appending a subprog.
+ */
+ if (prog->exception_cb_idx >= 0) {
+ prog->exception_cb_idx = -1;
+ break;
+ }
+
+ name += pfx_len;
+ if (str_is_empty(name)) {
+ pr_warn("prog '%s': exception_callback:<value> decl tag contains empty value\n",
+ prog->name);
+ return -EINVAL;
+ }
+
+ for (j = 0; j < obj->nr_programs; j++) {
+ struct bpf_prog_obj *subprog = &obj->progs[j];
+
+ if (!prog_is_subprog(obj, subprog))
+ continue;
+ if (strcmp(name, subprog->name) != 0)
+ continue;
+ /* Let's see if we already saw a static exception callback with this name */
+ if (prog->exception_cb_idx >= 0) {
+ pr_warn("prog '%s': multiple subprogs with same name as exception callback '%s'\n",
+ prog->name, subprog->name);
+ return -EINVAL;
+ }
+ prog->exception_cb_idx = j;
+ break;
+ }
+
+ if (prog->exception_cb_idx >= 0)
+ continue;
+
+ pr_warn("prog '%s': cannot find exception callback '%s'\n", prog->name, name);
+ return -ENOENT;
+ }
+
+ return 0;
+
+}
+
+static int relocate_object(struct bpf_obj *obj)
+{
+ struct bpf_prog_obj *prog;
+ int i, j, err;
+
+ if (obj->btf)
+ relocate_core(obj);
+
+ for (i = 0; i < obj->nr_programs; i++) {
+ prog = &obj->progs[i];
+ for (j = 0; j < prog->nr_reloc; j++) {
+ struct bpf_reloc_desc *relo = &prog->reloc_desc[j];
+ struct bpf_insn *insn = &prog->insn[relo->insn_idx];
+
+ /* mark the insn, so it's recognized by insn_is_pseudo_func() */
+ if (relo->type == RELO_SUBPROG_ADDR)
+ insn[0].src_reg = BPF_PSEUDO_FUNC;
+ }
+ }
+
+ for (i = 0; i < obj->nr_programs; i++) {
+ prog = &obj->progs[i];
+ /* sub-program's sub-calls are relocated within the context of
+ * its main program only
+ */
+ if (prog_is_subprog(obj, prog))
+ continue;
+
+ err = relocate_calls(obj, prog);
+ if (err) {
+ pr_warn("prog '%s': failed to relocate calls: %d\n",
+ prog->name, err);
+ return err;
+ }
+
+ err = prog_assign_exc_cb(obj, prog);
+ if (err)
+ return err;
+
+ /* Now, also append exception callback if it has not been done already. */
+ if (prog->exception_cb_idx >= 0) {
+ struct bpf_prog_obj *subprog = &obj->progs[prog->exception_cb_idx];
+
+ /* Calling exception callback directly is disallowed, which the
+ * verifier will reject later. In case it was processed already,
+ * we can skip this step, otherwise for all other valid cases we
+ * have to append exception callback now.
+ */
+ if (subprog->sub_insn_off == 0) {
+ err = append_subprog_code(obj, prog, subprog);
+ if (err)
+ return err;
+ err = reloc_code(obj, prog, subprog);
+ if (err)
+ return err;
+ }
+ }
+ }
+
+ for (i = 0; i < obj->nr_programs; i++) {
+ prog = &obj->progs[i];
+ if (prog_is_subprog(obj, prog))
+ continue;
+
+ /* Process data relos for main programs */
+ err = relocate_data(obj, prog);
+ if (err) {
+ pr_warn("prog '%s': failed to relocate data references: %d\n",
+ prog->name, err);
+ return err;
+ }
+ }
+
+ return 0;
+}
+
static void free_bpf_obj(struct bpf_obj *obj)
{
int i;
@@ -7488,6 +7973,10 @@ static int load_fd(union bpf_attr *attr)
if (err < 0)
goto free;
+ err = relocate_object(obj);
+ if (err < 0)
+ goto free;
+
return obj_f;
free:
free_bpf_obj(obj);
--
2.47.1
^ permalink raw reply related [flat|nested] 30+ messages in thread* [PATCH 14/14] bpf: Augment BPF_PROG_LOAD to use in-kernel relocations
2025-01-09 21:43 [POC][RFC][PATCH] bpf: in-kernel bpf relocations on raw elf files Blaise Boscaccy
` (12 preceding siblings ...)
2025-01-09 21:43 ` [PATCH 13/14] bpf: Apply in-kernel bpf instruction relocations Blaise Boscaccy
@ 2025-01-09 21:43 ` Blaise Boscaccy
2025-01-10 18:40 ` [POC][RFC][PATCH] bpf: in-kernel bpf relocations on raw elf files Alexei Starovoitov
2025-01-30 1:13 ` Cong Wang
15 siblings, 0 replies; 30+ messages in thread
From: Blaise Boscaccy @ 2025-01-09 21:43 UTC (permalink / raw)
To: bpf; +Cc: nkapron, teknoraver, roberto.sassu, gregkh, paul, code, flaniel
The basic algorithm here is to allow the user to supply a sysfs entry
corresponding to a previously in-kernel relocated elf object, and a
symbol name that they wish to load. From there the loader ignores any
supplied bpf instruction buffers and relies on the in-kernel
representation. However, maps and other associated file descriptors
passed in from userspace are handled as normal.
Signed-off-by: Blaise Boscaccy <bboscaccy@linux.microsoft.com>
---
kernel/bpf/syscall.c | 56 ++++++++++++++++++++++++++++++++++++++------
1 file changed, 49 insertions(+), 7 deletions(-)
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index ea0401634e752..8159fe75cd359 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -2740,9 +2740,13 @@ static int bpf_prog_load(union bpf_attr *attr, bpfptr_t uattr, u32 uattr_size)
struct bpf_prog *prog, *dst_prog = NULL;
struct btf *attach_btf = NULL;
struct bpf_token *token = NULL;
+ struct bpf_obj *obj = NULL;
+ struct bpf_prog_obj *prog_obj = NULL;
bool bpf_cap;
- int err;
+ int err, i;
char license[128];
+ char symbol_name[32];
+ struct fd loader_fd;
if (CHECK_ATTR(BPF_PROG_LOAD))
return -EINVAL;
@@ -2855,8 +2859,40 @@ static int bpf_prog_load(union bpf_attr *attr, bpfptr_t uattr, u32 uattr_size)
goto put_token;
}
+ if (attr->prog_loader_fd) {
+ loader_fd = fdget(attr->prog_loader_fd);
+ if (!fd_file(loader_fd)) {
+ err = -EBADF;
+ goto put_token;
+ }
+
+ obj = fd_file(loader_fd)->private_data;
+
+ /* copy eBPF program symbol name from user space */
+ if (strncpy_from_bpfptr(symbol_name,
+ make_bpfptr(attr->symbol_loader_name, uattr.is_kernel),
+ sizeof(symbol_name) - 1) < 0)
+ goto put_token;
+
+ symbol_name[sizeof(symbol_name) - 1] = 0;
+
+ for (i = 0; i < obj->nr_programs; i++) {
+ if (strcmp(symbol_name, obj->progs[i].name) == 0) {
+ prog_obj = &obj->progs[i];
+ break;
+ }
+ }
+
+ if (!prog_obj)
+ goto put_token;
+ }
+
/* plain bpf_prog allocation */
- prog = bpf_prog_alloc(bpf_prog_size(attr->insn_cnt), GFP_USER);
+ if (prog_obj)
+ prog = bpf_prog_alloc(bpf_prog_size(prog_obj->insn_cnt), GFP_USER);
+ else
+ prog = bpf_prog_alloc(bpf_prog_size(attr->insn_cnt), GFP_USER);
+
if (!prog) {
if (dst_prog)
bpf_prog_put(dst_prog);
@@ -2879,13 +2915,19 @@ static int bpf_prog_load(union bpf_attr *attr, bpfptr_t uattr, u32 uattr_size)
token = NULL;
prog->aux->user = get_current_user();
- prog->len = attr->insn_cnt;
err = -EFAULT;
- if (copy_from_bpfptr(prog->insns,
- make_bpfptr(attr->insns, uattr.is_kernel),
- bpf_prog_insn_size(prog)) != 0)
- goto free_prog;
+ if (prog_obj) {
+ prog->len = prog_obj->insn_cnt;
+ memcpy(prog->insnsi, prog_obj->insn, prog_obj->insn_cnt * sizeof(struct bpf_insn));
+ } else {
+ prog->len = attr->insn_cnt;
+ if (copy_from_bpfptr(prog->insns,
+ make_bpfptr(attr->insns, uattr.is_kernel),
+ bpf_prog_insn_size(prog)) != 0)
+ goto free_prog;
+ }
+
/* copy eBPF program license from user space */
if (strncpy_from_bpfptr(license,
make_bpfptr(attr->license, uattr.is_kernel),
--
2.47.1
^ permalink raw reply related [flat|nested] 30+ messages in thread* Re: [POC][RFC][PATCH] bpf: in-kernel bpf relocations on raw elf files
2025-01-09 21:43 [POC][RFC][PATCH] bpf: in-kernel bpf relocations on raw elf files Blaise Boscaccy
` (13 preceding siblings ...)
2025-01-09 21:43 ` [PATCH 14/14] bpf: Augment BPF_PROG_LOAD to use in-kernel relocations Blaise Boscaccy
@ 2025-01-10 18:40 ` Alexei Starovoitov
2025-01-10 23:27 ` Blaise Boscaccy
2025-01-30 1:13 ` Cong Wang
15 siblings, 1 reply; 30+ messages in thread
From: Alexei Starovoitov @ 2025-01-10 18:40 UTC (permalink / raw)
To: Blaise Boscaccy
Cc: bpf, nkapron, Matteo Croce, Roberto Sassu, Greg Kroah-Hartman,
Paul Moore, code, Francis Laniel, Daniel Borkmann, John Fastabend
On Thu, Jan 9, 2025 at 1:47 PM Blaise Boscaccy
<bboscaccy@linux.microsoft.com> wrote:
>
>
> This is a proof-of-concept, based off of bpf-next-6.13. The
> implementation will need additional work. The goal of this prototype was
> to be able load raw elf object files directly into the kernel and have
> the kernel perform all the necessary instruction rewriting and
> relocation calculations. Having a file descriptor tied to a bpf program
> allowed us to have tighter integration with the existing LSM
> infrastructure. Additionally, it opens the door for signature and provenance
> checking, along with loading programs without a functioning userspace.
>
> The main goal of this RFC is to get some feedback on the overall
> approach and feasibility of this design.
It's not feasible.
libbpf.a is mainly a loader of bpf ELF files.
There is a specific format of ELF files, a convention on section names,
a protocol between LLVM and libbpf, etc.
These things are stable api from libbpf 1.x pov.
There is a chance that they will change in libbpf 2.x.
There are no plans to do so now, but because it's all user space
there is room for changes.
The kernel doesn't have such luxury.
Hence we cannot copy paste libbpf into the kernel and make
it parse the same ELF data, since it will force us to support
this exact format forever.
Hence the design is not feasible.
This was discussed multiple times on the list and at LSFMMBPF, LPC
conferences over the years.
But if the real goal of these patches to:
> open the door for signature and provenance
> checking, along with loading programs without a functioning userspace.
then please take a look at the light skeleton.
There is an existing mechanism to load bpf ELF files without libbpf
and without user space.
Search for 'bpftool gen skeleton -L'.
Also there were prototype patches to add signature checking on
top of the light skeleton,
and long discussions on the list and conferences about 'gate keeper' concept.
^ permalink raw reply [flat|nested] 30+ messages in thread* Re: [POC][RFC][PATCH] bpf: in-kernel bpf relocations on raw elf files
2025-01-10 18:40 ` [POC][RFC][PATCH] bpf: in-kernel bpf relocations on raw elf files Alexei Starovoitov
@ 2025-01-10 23:27 ` Blaise Boscaccy
2025-01-13 17:54 ` Alexei Starovoitov
0 siblings, 1 reply; 30+ messages in thread
From: Blaise Boscaccy @ 2025-01-10 23:27 UTC (permalink / raw)
To: Alexei Starovoitov
Cc: bpf, nkapron, Matteo Croce, Roberto Sassu, Greg Kroah-Hartman,
Paul Moore, code, Francis Laniel, Daniel Borkmann, John Fastabend
Alexei Starovoitov <alexei.starovoitov@gmail.com> writes:
> On Thu, Jan 9, 2025 at 1:47 PM Blaise Boscaccy
> <bboscaccy@linux.microsoft.com> wrote:
>>
>>
>> This is a proof-of-concept, based off of bpf-next-6.13. The
>> implementation will need additional work. The goal of this prototype was
>> to be able load raw elf object files directly into the kernel and have
>> the kernel perform all the necessary instruction rewriting and
>> relocation calculations. Having a file descriptor tied to a bpf program
>> allowed us to have tighter integration with the existing LSM
>> infrastructure. Additionally, it opens the door for signature and provenance
>> checking, along with loading programs without a functioning userspace.
>>
>> The main goal of this RFC is to get some feedback on the overall
>> approach and feasibility of this design.
>
> It's not feasible.
>
> libbpf.a is mainly a loader of bpf ELF files.
> There is a specific format of ELF files, a convention on section names,
> a protocol between LLVM and libbpf, etc.
> These things are stable api from libbpf 1.x pov.
> There is a chance that they will change in libbpf 2.x.
> There are no plans to do so now, but because it's all user space
> there is room for changes.
> The kernel doesn't have such luxury.
> Hence we cannot copy paste libbpf into the kernel and make
> it parse the same ELF data, since it will force us to support
> this exact format forever.
> Hence the design is not feasible.
>
Noted.
> This was discussed multiple times on the list and at LSFMMBPF, LPC
> conferences over the years.
>
> But if the real goal of these patches to:
>
>> open the door for signature and provenance
>> checking, along with loading programs without a functioning userspace.
>
> then please take a look at the light skeleton.
> There is an existing mechanism to load bpf ELF files without libbpf
> and without user space.
> Search for 'bpftool gen skeleton -L'.
Our goal is to have verifiable ebpf programs that are portable across
multiple kernels. I looked into light skels, it appears that all the
instruction relocations are calculated during skeleton generation and a
static instruction buffer containing those fixed relocation results is
passed into the kernel? For some relocs, those values would be
deterministic, making that a non-issue. For others that rely on btf data
or kernel symbols those might not be portable anymore.
Would it be amenable to possibly alter the light skeleton generation
code to pass btf and some other metadata into the kernel along with
instructions or are you trying to avoid any sort of fixed dependencies
on anything in the kernel other than the bpf instrucion set itself?
-blaise
>
> Also there were prototype patches to add signature checking on
> top of the light skeleton,
> and long discussions on the list and conferences about 'gate keeper' concept.
^ permalink raw reply [flat|nested] 30+ messages in thread
* Re: [POC][RFC][PATCH] bpf: in-kernel bpf relocations on raw elf files
2025-01-10 23:27 ` Blaise Boscaccy
@ 2025-01-13 17:54 ` Alexei Starovoitov
2025-01-14 18:24 ` Blaise Boscaccy
0 siblings, 1 reply; 30+ messages in thread
From: Alexei Starovoitov @ 2025-01-13 17:54 UTC (permalink / raw)
To: Blaise Boscaccy
Cc: bpf, nkapron, Matteo Croce, Roberto Sassu, Greg Kroah-Hartman,
Paul Moore, code, Francis Laniel, Daniel Borkmann, John Fastabend
On Fri, Jan 10, 2025 at 3:27 PM Blaise Boscaccy
<bboscaccy@linux.microsoft.com> wrote:
>
> Alexei Starovoitov <alexei.starovoitov@gmail.com> writes:
>
> > On Thu, Jan 9, 2025 at 1:47 PM Blaise Boscaccy
> > <bboscaccy@linux.microsoft.com> wrote:
> >>
> >>
> >> This is a proof-of-concept, based off of bpf-next-6.13. The
> >> implementation will need additional work. The goal of this prototype was
> >> to be able load raw elf object files directly into the kernel and have
> >> the kernel perform all the necessary instruction rewriting and
> >> relocation calculations. Having a file descriptor tied to a bpf program
> >> allowed us to have tighter integration with the existing LSM
> >> infrastructure. Additionally, it opens the door for signature and provenance
> >> checking, along with loading programs without a functioning userspace.
> >>
> >> The main goal of this RFC is to get some feedback on the overall
> >> approach and feasibility of this design.
> >
> > It's not feasible.
> >
> > libbpf.a is mainly a loader of bpf ELF files.
> > There is a specific format of ELF files, a convention on section names,
> > a protocol between LLVM and libbpf, etc.
> > These things are stable api from libbpf 1.x pov.
> > There is a chance that they will change in libbpf 2.x.
> > There are no plans to do so now, but because it's all user space
> > there is room for changes.
> > The kernel doesn't have such luxury.
> > Hence we cannot copy paste libbpf into the kernel and make
> > it parse the same ELF data, since it will force us to support
> > this exact format forever.
> > Hence the design is not feasible.
> >
>
> Noted.
>
> > This was discussed multiple times on the list and at LSFMMBPF, LPC
> > conferences over the years.
> >
> > But if the real goal of these patches to:
> >
> >> open the door for signature and provenance
> >> checking, along with loading programs without a functioning userspace.
> >
> > then please take a look at the light skeleton.
> > There is an existing mechanism to load bpf ELF files without libbpf
> > and without user space.
> > Search for 'bpftool gen skeleton -L'.
>
> Our goal is to have verifiable ebpf programs that are portable across
> multiple kernels. I looked into light skels, it appears that all the
> instruction relocations are calculated during skeleton generation and a
> static instruction buffer containing those fixed relocation results is
> passed into the kernel? For some relocs, those values would be
> deterministic, making that a non-issue. For others that rely on btf data
> or kernel symbols those might not be portable anymore.
Specifically?
lskel preservers CORE. BTF based relocations are done by the kernel.
> Would it be amenable to possibly alter the light skeleton generation
> code to pass btf and some other metadata into the kernel along with
> instructions or are you trying to avoid any sort of fixed dependencies
> on anything in the kernel other than the bpf instrucion set itself?
BTF is passed in the lskel.
There are few relocation-like things that lskel doesn't support.
One example is __kconfig, but so far there was no request to support that.
This can be added when needs arise.
^ permalink raw reply [flat|nested] 30+ messages in thread
* Re: [POC][RFC][PATCH] bpf: in-kernel bpf relocations on raw elf files
2025-01-13 17:54 ` Alexei Starovoitov
@ 2025-01-14 18:24 ` Blaise Boscaccy
2025-01-24 5:08 ` bpf signing. " Alexei Starovoitov
0 siblings, 1 reply; 30+ messages in thread
From: Blaise Boscaccy @ 2025-01-14 18:24 UTC (permalink / raw)
To: Alexei Starovoitov
Cc: bpf, nkapron, Matteo Croce, Roberto Sassu, Greg Kroah-Hartman,
Paul Moore, code, Francis Laniel, Daniel Borkmann, John Fastabend
Alexei Starovoitov <alexei.starovoitov@gmail.com> writes:
> On Fri, Jan 10, 2025 at 3:27 PM Blaise Boscaccy
> <bboscaccy@linux.microsoft.com> wrote:
>>
>> Alexei Starovoitov <alexei.starovoitov@gmail.com> writes:
>>
>> > On Thu, Jan 9, 2025 at 1:47 PM Blaise Boscaccy
>> > <bboscaccy@linux.microsoft.com> wrote:
>> >>
>> >>
>> >> This is a proof-of-concept, based off of bpf-next-6.13. The
>> >> implementation will need additional work. The goal of this prototype was
>> >> to be able load raw elf object files directly into the kernel and have
>> >> the kernel perform all the necessary instruction rewriting and
>> >> relocation calculations. Having a file descriptor tied to a bpf program
>> >> allowed us to have tighter integration with the existing LSM
>> >> infrastructure. Additionally, it opens the door for signature and provenance
>> >> checking, along with loading programs without a functioning userspace.
>> >>
>> >> The main goal of this RFC is to get some feedback on the overall
>> >> approach and feasibility of this design.
>> >
>> > It's not feasible.
>> >
>> > libbpf.a is mainly a loader of bpf ELF files.
>> > There is a specific format of ELF files, a convention on section names,
>> > a protocol between LLVM and libbpf, etc.
>> > These things are stable api from libbpf 1.x pov.
>> > There is a chance that they will change in libbpf 2.x.
>> > There are no plans to do so now, but because it's all user space
>> > there is room for changes.
>> > The kernel doesn't have such luxury.
>> > Hence we cannot copy paste libbpf into the kernel and make
>> > it parse the same ELF data, since it will force us to support
>> > this exact format forever.
>> > Hence the design is not feasible.
>> >
>>
>> Noted.
>>
>> > This was discussed multiple times on the list and at LSFMMBPF, LPC
>> > conferences over the years.
>> >
>> > But if the real goal of these patches to:
>> >
>> >> open the door for signature and provenance
>> >> checking, along with loading programs without a functioning userspace.
>> >
>> > then please take a look at the light skeleton.
>> > There is an existing mechanism to load bpf ELF files without libbpf
>> > and without user space.
>> > Search for 'bpftool gen skeleton -L'.
>>
>> Our goal is to have verifiable ebpf programs that are portable across
>> multiple kernels. I looked into light skels, it appears that all the
>> instruction relocations are calculated during skeleton generation and a
>> static instruction buffer containing those fixed relocation results is
>> passed into the kernel? For some relocs, those values would be
>> deterministic, making that a non-issue. For others that rely on btf data
>> or kernel symbols those might not be portable anymore.
>
> Specifically?
> lskel preservers CORE. BTF based relocations are done by the kernel.
>
It looks like they are done in the kernel and not necessarily by the
kernel? The relocation logic is emitted by emit_relo* functions during
skeleton generation and the ebpf program is responsible for relocating
itself at runtime, correct? Meaning that the same program is going to
appear very different to the kernel if it's loaded via lskel or libbpf?
>> Would it be amenable to possibly alter the light skeleton generation
>> code to pass btf and some other metadata into the kernel along with
>> instructions or are you trying to avoid any sort of fixed dependencies
>> on anything in the kernel other than the bpf instrucion set itself?
>
> BTF is passed in the lskel.
> There are few relocation-like things that lskel doesn't support.
> One example is __kconfig, but so far there was no request to support that.
> This can be added when needs arise.
Yes, I ran into the lskel generator doing fun stuff like:
libbpf: extern (kcfg) 'LINUX_KERNEL_VERSION': set to 0x6080c
Which caused some concern. Is the feature set for the light skeleton
generator and the feature set for libbpf is expected to drift, whereas
new features will get added to libbpf but they will get added to the
lskel generator if and only if someone requests support for it?
Ancillary, would there be opposition to passing the symbol table into
the kernel via the light skeleton?
I couldn't find anything tangible related to a 'gate keeper' on the bpf
mailing list and haven't attended the conferences. Are you going to
shoot down all attempts at code signing of eBPF programs in the kernel?
Internally, we want to cryptographically verify all running kernel code
with a proper root of trust. Additionally we've been looking into
NIST-800-172 requirements. That's currently making eBPF a no-go. Root
and userspace are not trusted either in these contexts, making userspace
gate-keeper daemons unworkable.
^ permalink raw reply [flat|nested] 30+ messages in thread
* bpf signing. Re: [POC][RFC][PATCH] bpf: in-kernel bpf relocations on raw elf files
2025-01-14 18:24 ` Blaise Boscaccy
@ 2025-01-24 5:08 ` Alexei Starovoitov
2025-01-24 7:05 ` John Fastabend
0 siblings, 1 reply; 30+ messages in thread
From: Alexei Starovoitov @ 2025-01-24 5:08 UTC (permalink / raw)
To: Blaise Boscaccy
Cc: bpf, nkapron, Matteo Croce, Roberto Sassu, Greg Kroah-Hartman,
Paul Moore, code, Francis Laniel, Daniel Borkmann, John Fastabend
On Tue, Jan 14, 2025 at 10:24 AM Blaise Boscaccy
<bboscaccy@linux.microsoft.com> wrote:
>
> It looks like they are done in the kernel and not necessarily by the
> kernel? The relocation logic is emitted by emit_relo* functions during
> skeleton generation and the ebpf program is responsible for relocating
> itself at runtime, correct? Meaning that the same program is going to
> appear very different to the kernel if it's loaded via lskel or libbpf?
Looks like you're reading the code without actually trying to run it.
> >> Would it be amenable to possibly alter the light skeleton generation
> >> code to pass btf and some other metadata into the kernel along with
> >> instructions or are you trying to avoid any sort of fixed dependencies
> >> on anything in the kernel other than the bpf instrucion set itself?
> >
> > BTF is passed in the lskel.
> > There are few relocation-like things that lskel doesn't support.
> > One example is __kconfig, but so far there was no request to support that.
> > This can be added when needs arise.
>
> Yes, I ran into the lskel generator doing fun stuff like:
>
> libbpf: extern (kcfg) 'LINUX_KERNEL_VERSION': set to 0x6080c
>
> Which caused some concern. Is the feature set for the light skeleton
> generator and the feature set for libbpf is expected to drift, whereas
> new features will get added to libbpf but they will get added to the
> lskel generator if and only if someone requests support for it?
Correct.
> Ancillary, would there be opposition to passing the symbol table into
> the kernel via the light skeleton?
Yes, if by "symbol table" you mean ELF symbol table.
> I couldn't find anything tangible related to a 'gate keeper' on the bpf
> mailing list and haven't attended the conferences. Are you going to
> shoot down all attempts at code signing of eBPF programs in the kernel?
gate keeper concept is the sign verification by the kernel.
> Internally, we want to cryptographically verify all running kernel code
> with a proper root of trust. Additionally we've been looking into
> NIST-800-172 requirements. That's currently making eBPF a no-go. Root
> and userspace are not trusted either in these contexts, making userspace
> gate-keeper daemons unworkable.
The idea was to add LSM-like hook in the prog loading path where
"gate keeper" bpf program loaded early during the boot
(without any user space) would validate the signature attached
to lskel and whatever other prog attributes it might need.
KP proposed:
https://lore.kernel.org/bpf/CACYkzJ6xSk_DHO+3JoCYpGrXjFkk9v-LOSWW0=0KLwAj1Gc0SA@mail.gmail.com/
iirc John had the whole design proposal written somewhere,
but I cannot find it now.
John,
can you summarize how gate keeper bpf prog would work?
^ permalink raw reply [flat|nested] 30+ messages in thread
* Re: bpf signing. Re: [POC][RFC][PATCH] bpf: in-kernel bpf relocations on raw elf files
2025-01-24 5:08 ` bpf signing. " Alexei Starovoitov
@ 2025-01-24 7:05 ` John Fastabend
2025-01-28 22:32 ` Blaise Boscaccy
0 siblings, 1 reply; 30+ messages in thread
From: John Fastabend @ 2025-01-24 7:05 UTC (permalink / raw)
To: Alexei Starovoitov
Cc: Blaise Boscaccy, bpf, nkapron, Matteo Croce, Roberto Sassu,
Greg Kroah-Hartman, Paul Moore, code, Francis Laniel,
Daniel Borkmann
On 2025-01-23 21:08:14, Alexei Starovoitov wrote:
> On Tue, Jan 14, 2025 at 10:24 AM Blaise Boscaccy
> <bboscaccy@linux.microsoft.com> wrote:
> >
> > It looks like they are done in the kernel and not necessarily by the
> > kernel? The relocation logic is emitted by emit_relo* functions during
> > skeleton generation and the ebpf program is responsible for relocating
> > itself at runtime, correct? Meaning that the same program is going to
> > appear very different to the kernel if it's loaded via lskel or libbpf?
>
> Looks like you're reading the code without actually trying to run it.
>
> > >> Would it be amenable to possibly alter the light skeleton generation
> > >> code to pass btf and some other metadata into the kernel along with
> > >> instructions or are you trying to avoid any sort of fixed dependencies
> > >> on anything in the kernel other than the bpf instrucion set itself?
> > >
> > > BTF is passed in the lskel.
> > > There are few relocation-like things that lskel doesn't support.
> > > One example is __kconfig, but so far there was no request to support that.
> > > This can be added when needs arise.
> >
> > Yes, I ran into the lskel generator doing fun stuff like:
> >
> > libbpf: extern (kcfg) 'LINUX_KERNEL_VERSION': set to 0x6080c
> >
> > Which caused some concern. Is the feature set for the light skeleton
> > generator and the feature set for libbpf is expected to drift, whereas
> > new features will get added to libbpf but they will get added to the
> > lskel generator if and only if someone requests support for it?
>
> Correct.
>
> > Ancillary, would there be opposition to passing the symbol table into
> > the kernel via the light skeleton?
>
> Yes, if by "symbol table" you mean ELF symbol table.
>
> > I couldn't find anything tangible related to a 'gate keeper' on the bpf
> > mailing list and haven't attended the conferences. Are you going to
> > shoot down all attempts at code signing of eBPF programs in the kernel?
>
> gate keeper concept is the sign verification by the kernel.
>
> > Internally, we want to cryptographically verify all running kernel code
> > with a proper root of trust. Additionally we've been looking into
> > NIST-800-172 requirements. That's currently making eBPF a no-go. Root
> > and userspace are not trusted either in these contexts, making userspace
> > gate-keeper daemons unworkable.
>
> The idea was to add LSM-like hook in the prog loading path where
> "gate keeper" bpf program loaded early during the boot
> (without any user space) would validate the signature attached
> to lskel and whatever other prog attributes it might need.
>
> KP proposed:
> https://lore.kernel.org/bpf/CACYkzJ6xSk_DHO+3JoCYpGrXjFkk9v-LOSWW0=0KLwAj1Gc0SA@mail.gmail.com/
>
> iirc John had the whole design proposal written somewhere,
> but I cannot find it now.
>
> John,
> can you summarize how gate keeper bpf prog would work?
Sure. The gate keeper can attach at bpf_prog_load time, note there is
already a security hook there we can hook to with the bpf_prog struct
as the only arg. At this point any number of policy about what/who can
load BPF programs can be applied by looking at the struct and context
its being called. For better use of crypto functions we would want this
to be a sleepable program.
Why it needs to be a BPF prog in this model is because I expect the
policy may be very different depending on the env. We have K8s
systems, DPUs, VMs, embedded systems all running BPF and each has
different requirements and different policy metadata.
With BPF/IMA or fsverity infra the caller can be identified by a
hash giving the identity of the loader. This works today.
We can also check a signature of the skel here if needed. Maybe some
kfuncs are still needed (and make it sleepable) I haven't done this
part yet. I found binding identity of the loader to types of programs
is a good starting point. A roster of all BPF programs loaded in a
cluster is doable now. Anyways a kfunc to consume bpf_prog and key
details to return good/bad is probably fine? Or break it down into
the individual ops would be more flexible. This should be enough
to solve the cryptographically verify BPF programs.
There is also an idea that we could provide more metadata about the
program by having the verifier include a summary. One proposed example
was to track helpers/kfuns in use. For example a network program that
can inspect traffic, but not redirect it.
End result is we could build a policy that says these programs can
load these specific BPF programs. And keep those in maps so it can
be updated dynamically on a bunch of running systems. I think you
want the dynamic part so you can have some process to say I'm
adding these new debug programs or new critical security fixes
to the list of allowed BPF programs.
Some other commentary:
Also to be complete a way to load BPF programs in early boot would
reduce/eliminate a window between launched trusted kernel and gate
keeper launch.
Either the gate keeper can ensure it can't be unloaded by also
monitoring those paths or we could just pin a refcnt on it when a
flag is set or it comes from early boot.
Map updates/manipulation can also wreck BPF logic so you will want to
also have the gate keeper track that.
As a first step just making it sleepable and exposing the needed
kfuncs would be realtively easy and get what you need I suspect.
Added the gatekeeper BPF prog at early boot would likely be all
you need?
Thanks,
John
^ permalink raw reply [flat|nested] 30+ messages in thread
* Re: bpf signing. Re: [POC][RFC][PATCH] bpf: in-kernel bpf relocations on raw elf files
2025-01-24 7:05 ` John Fastabend
@ 2025-01-28 22:32 ` Blaise Boscaccy
0 siblings, 0 replies; 30+ messages in thread
From: Blaise Boscaccy @ 2025-01-28 22:32 UTC (permalink / raw)
To: John Fastabend, Alexei Starovoitov
Cc: bpf, nkapron, Matteo Croce, Roberto Sassu, Greg Kroah-Hartman,
Paul Moore, code, Francis Laniel, Daniel Borkmann
John Fastabend <john.fastabend@gmail.com> writes:
> On 2025-01-23 21:08:14, Alexei Starovoitov wrote:
>> On Tue, Jan 14, 2025 at 10:24 AM Blaise Boscaccy
>> <bboscaccy@linux.microsoft.com> wrote:
>> >
>> > It looks like they are done in the kernel and not necessarily by the
>> > kernel? The relocation logic is emitted by emit_relo* functions during
>> > skeleton generation and the ebpf program is responsible for relocating
>> > itself at runtime, correct? Meaning that the same program is going to
>> > appear very different to the kernel if it's loaded via lskel or libbpf?
>>
>> Looks like you're reading the code without actually trying to run it.
>>
>> > >> Would it be amenable to possibly alter the light skeleton generation
>> > >> code to pass btf and some other metadata into the kernel along with
>> > >> instructions or are you trying to avoid any sort of fixed dependencies
>> > >> on anything in the kernel other than the bpf instrucion set itself?
>> > >
>> > > BTF is passed in the lskel.
>> > > There are few relocation-like things that lskel doesn't support.
>> > > One example is __kconfig, but so far there was no request to support that.
>> > > This can be added when needs arise.
>> >
>> > Yes, I ran into the lskel generator doing fun stuff like:
>> >
>> > libbpf: extern (kcfg) 'LINUX_KERNEL_VERSION': set to 0x6080c
>> >
>> > Which caused some concern. Is the feature set for the light skeleton
>> > generator and the feature set for libbpf is expected to drift, whereas
>> > new features will get added to libbpf but they will get added to the
>> > lskel generator if and only if someone requests support for it?
>>
>> Correct.
>>
>> > Ancillary, would there be opposition to passing the symbol table into
>> > the kernel via the light skeleton?
>>
>> Yes, if by "symbol table" you mean ELF symbol table.
>>
>> > I couldn't find anything tangible related to a 'gate keeper' on the bpf
>> > mailing list and haven't attended the conferences. Are you going to
>> > shoot down all attempts at code signing of eBPF programs in the kernel?
>>
>> gate keeper concept is the sign verification by the kernel.
>>
>> > Internally, we want to cryptographically verify all running kernel code
>> > with a proper root of trust. Additionally we've been looking into
>> > NIST-800-172 requirements. That's currently making eBPF a no-go. Root
>> > and userspace are not trusted either in these contexts, making userspace
>> > gate-keeper daemons unworkable.
>>
>> The idea was to add LSM-like hook in the prog loading path where
>> "gate keeper" bpf program loaded early during the boot
>> (without any user space) would validate the signature attached
>> to lskel and whatever other prog attributes it might need.
>>
>> KP proposed:
>> https://lore.kernel.org/bpf/CACYkzJ6xSk_DHO+3JoCYpGrXjFkk9v-LOSWW0=0KLwAj1Gc0SA@mail.gmail.com/
>>
>> iirc John had the whole design proposal written somewhere,
>> but I cannot find it now.
>>
>> John,
>> can you summarize how gate keeper bpf prog would work?
>
>
Hi John,
> Sure. The gate keeper can attach at bpf_prog_load time, note there is
> already a security hook there we can hook to with the bpf_prog struct
> as the only arg. At this point any number of policy about what/who can
> load BPF programs can be applied by looking at the struct and context
> its being called. For better use of crypto functions we would want this
> to be a sleepable program.
>
> Why it needs to be a BPF prog in this model is because I expect the
> policy may be very different depending on the env. We have K8s
> systems, DPUs, VMs, embedded systems all running BPF and each has
> different requirements and different policy metadata.
>
> With BPF/IMA or fsverity infra the caller can be identified by a
> hash giving the identity of the loader. This works today.
>
I'm assuming that you are referring to something akin to
https://github.com/isovalent/bpf-verity
> We can also check a signature of the skel here if needed. Maybe some
> kfuncs are still needed (and make it sleepable) I haven't done this
> part yet. I found binding identity of the loader to types of programs
> is a good starting point. A roster of all BPF programs loaded in a
> cluster is doable now. Anyways a kfunc to consume bpf_prog and key
> details to return good/bad is probably fine? Or break it down into
> the individual ops would be more flexible. This should be enough
> to solve the cryptographically verify BPF programs.
>
I think we can try to make something like that work.
> There is also an idea that we could provide more metadata about the
> program by having the verifier include a summary. One proposed example
> was to track helpers/kfuns in use. For example a network program that
> can inspect traffic, but not redirect it.
>
Sure, signature checks and policy checks are complimentary and not
mutually exclusive.
> End result is we could build a policy that says these programs can
> load these specific BPF programs. And keep those in maps so it can
> be updated dynamically on a bunch of running systems. I think you
> want the dynamic part so you can have some process to say I'm
> adding these new debug programs or new critical security fixes
> to the list of allowed BPF programs.
>
> Some other commentary:
>
> Also to be complete a way to load BPF programs in early boot would
> reduce/eliminate a window between launched trusted kernel and gate
> keeper launch.
>
> Either the gate keeper can ensure it can't be unloaded by also
> monitoring those paths or we could just pin a refcnt on it when a
> flag is set or it comes from early boot.
>
We would definitely be a supporter of early boot programs that can be
bundled into a kernel that can't be unloaded or detached. There is
probably some wider usage beyond this as well.
> Map updates/manipulation can also wreck BPF logic so you will want to
> also have the gate keeper track that.
>
> As a first step just making it sleepable and exposing the needed
> kfuncs would be realtively easy and get what you need I suspect.
> Added the gatekeeper BPF prog at early boot would likely be all
> you need?
>
> Thanks,
> John
-blaise
^ permalink raw reply [flat|nested] 30+ messages in thread
* Re: [POC][RFC][PATCH] bpf: in-kernel bpf relocations on raw elf files
2025-01-09 21:43 [POC][RFC][PATCH] bpf: in-kernel bpf relocations on raw elf files Blaise Boscaccy
` (14 preceding siblings ...)
2025-01-10 18:40 ` [POC][RFC][PATCH] bpf: in-kernel bpf relocations on raw elf files Alexei Starovoitov
@ 2025-01-30 1:13 ` Cong Wang
2025-01-30 19:22 ` Blaise Boscaccy
15 siblings, 1 reply; 30+ messages in thread
From: Cong Wang @ 2025-01-30 1:13 UTC (permalink / raw)
To: Blaise Boscaccy
Cc: bpf, nkapron, teknoraver, roberto.sassu, gregkh, paul, code,
flaniel
Hello Blaise,
On Thu, Jan 09, 2025 at 01:43:42PM -0800, Blaise Boscaccy wrote:
>
> This is a proof-of-concept, based off of bpf-next-6.13. The
> implementation will need additional work. The goal of this prototype was
> to be able load raw elf object files directly into the kernel and have
> the kernel perform all the necessary instruction rewriting and
> relocation calculations. Having a file descriptor tied to a bpf program
> allowed us to have tighter integration with the existing LSM
> infrastructure. Additionally, it opens the door for signature and provenance
> checking, along with loading programs without a functioning userspace.
>
> The main goal of this RFC is to get some feedback on the overall
> approach and feasibility of this design.
>
> A new subcommand BPF_LOAD_FD is introduced. This subcommand takes a file
> descriptor to an elf object file, along with an array of map fds, and a
> sysfs entry to associate programs and metadata with. The kernel then
> performs all the relocation calculations and instruction rewriting
> inside the kernel. Later BPF_PROG_LOAD can reference this sysfs entry
> and load/attach previously loaded programs by name. Userspace is
> responsible for generating and populating maps.
>
> CO-RE relocation support already existed in the kernel. Support for
> everything else, maps, externs, etc., was added. In the same vein as
> 29db4bea1d10 ("bpf: Prepare relo_core.c for kernel duty.")
> this prototype directly uses code from libbpf.
>
> One of the challenges encountered was having different elf and btf
> abstractions utilized in the kernel vs libpf. Missing btf functionality
> was ported over to the kernel while trying to minimize the number of
> changes required to the libpf code. As a result, there is some code
> duplication and obvious refactoring opportunities. Additionally, being
> able to directly share code between userspace and kernelspace in a
> similar fashion to relo_core.c would be a TODO.
I recently became aware of this patchset through Alexei's reference
in another thread, and I apologize for my delayed involvement.
Upon reviewing your proposed changes, I have concerns about the scope
of the kernel modifications. This implementation appears to introduce
substantial code changes to the kernel (estimated at approximately
1,000+ lines, though a git diff stat wasn't provided).
If the primary objective is eBPF program signing, I would like to
propose an alternative approach: a two-phase signing mechanism that
eliminates the need for kernel modifications. My solution leverages
the existing eBPF infrastructure, particularly the BPF LSM framework.
So the fundamental architectural difference between these two approaches
is pretty much kernel-based versus userspace implementation, which has
been extensively discussed and debated within the kernel community.
I have also developed a proof-of-concept implementation, which is
available for review at: https://github.com/congwang/ebpf-2-phase-signing
I welcome your thoughts and feedback on this alternative approach.
Thanks!
^ permalink raw reply [flat|nested] 30+ messages in thread* Re: [POC][RFC][PATCH] bpf: in-kernel bpf relocations on raw elf files
2025-01-30 1:13 ` Cong Wang
@ 2025-01-30 19:22 ` Blaise Boscaccy
2025-02-01 22:24 ` Cong Wang
0 siblings, 1 reply; 30+ messages in thread
From: Blaise Boscaccy @ 2025-01-30 19:22 UTC (permalink / raw)
To: Cong Wang
Cc: bpf, nkapron, teknoraver, roberto.sassu, gregkh, paul, code,
flaniel, alexei.starovoitov, daniel, john.fastabend
Cong Wang <xiyou.wangcong@gmail.com> writes:
> Hello Blaise,
>
Hi!
> On Thu, Jan 09, 2025 at 01:43:42PM -0800, Blaise Boscaccy wrote:
>>
>> This is a proof-of-concept, based off of bpf-next-6.13. The
>> implementation will need additional work. The goal of this prototype was
>> to be able load raw elf object files directly into the kernel and have
>> the kernel perform all the necessary instruction rewriting and
>> relocation calculations. Having a file descriptor tied to a bpf program
>> allowed us to have tighter integration with the existing LSM
>> infrastructure. Additionally, it opens the door for signature and provenance
>> checking, along with loading programs without a functioning userspace.
>>
>> The main goal of this RFC is to get some feedback on the overall
>> approach and feasibility of this design.
>>
>> A new subcommand BPF_LOAD_FD is introduced. This subcommand takes a file
>> descriptor to an elf object file, along with an array of map fds, and a
>> sysfs entry to associate programs and metadata with. The kernel then
>> performs all the relocation calculations and instruction rewriting
>> inside the kernel. Later BPF_PROG_LOAD can reference this sysfs entry
>> and load/attach previously loaded programs by name. Userspace is
>> responsible for generating and populating maps.
>>
>> CO-RE relocation support already existed in the kernel. Support for
>> everything else, maps, externs, etc., was added. In the same vein as
>> 29db4bea1d10 ("bpf: Prepare relo_core.c for kernel duty.")
>> this prototype directly uses code from libbpf.
>>
>> One of the challenges encountered was having different elf and btf
>> abstractions utilized in the kernel vs libpf. Missing btf functionality
>> was ported over to the kernel while trying to minimize the number of
>> changes required to the libpf code. As a result, there is some code
>> duplication and obvious refactoring opportunities. Additionally, being
>> able to directly share code between userspace and kernelspace in a
>> similar fashion to relo_core.c would be a TODO.
>
> I recently became aware of this patchset through Alexei's reference
> in another thread, and I apologize for my delayed involvement.
>
> Upon reviewing your proposed changes, I have concerns about the scope
> of the kernel modifications. This implementation appears to introduce
> substantial code changes to the kernel (estimated at approximately
> 1,000+ lines, though a git diff stat wasn't provided).
>
Yes, it ended up way bigger than I anticipated. The ultimate goal of
that was to be able to conditionally compile parts of libbpf directly
into the kernel and unify the btf and elf libraries. That refactoring
work was way out of scope for a PoC.
> If the primary objective is eBPF program signing, I would like to
> propose an alternative approach: a two-phase signing mechanism that
> eliminates the need for kernel modifications. My solution leverages
> the existing eBPF infrastructure, particularly the BPF LSM framework.
> So the fundamental architectural difference between these two approaches
> is pretty much kernel-based versus userspace implementation, which has
> been extensively discussed and debated within the kernel community.
>
Code signing, secure system design and supply-chain attack mitigations
are some active research areas that we are exploring. BPF programs have
some interesting ramifications on those topics. Attacks that were
previously demonstrated in CVE-2021-3444 are an area of interest as
well.
> I have also developed a proof-of-concept implementation, which is
> available for review at: https://github.com/congwang/ebpf-2-phase-signing
>
Sweet, I'll take a look. It sounds super interesting! At a quick
glance, it looks like your approach would probably benefit from John's
suggestions for early-boot un-unloadable bpf programs.
What are your use cases for signature verification if you don't mind me
asking?
> I welcome your thoughts and feedback on this alternative approach.
>
> Thanks!
-blaise
^ permalink raw reply [flat|nested] 30+ messages in thread* Re: [POC][RFC][PATCH] bpf: in-kernel bpf relocations on raw elf files
2025-01-30 19:22 ` Blaise Boscaccy
@ 2025-02-01 22:24 ` Cong Wang
0 siblings, 0 replies; 30+ messages in thread
From: Cong Wang @ 2025-02-01 22:24 UTC (permalink / raw)
To: Blaise Boscaccy
Cc: bpf, nkapron, teknoraver, roberto.sassu, gregkh, paul, code,
flaniel, alexei.starovoitov, daniel, john.fastabend
On Thu, Jan 30, 2025 at 11:22:24AM -0800, Blaise Boscaccy wrote:
> Cong Wang <xiyou.wangcong@gmail.com> writes:
>
> > Hello Blaise,
> >
>
> Hi!
>
> > On Thu, Jan 09, 2025 at 01:43:42PM -0800, Blaise Boscaccy wrote:
> >>
> >> This is a proof-of-concept, based off of bpf-next-6.13. The
> >> implementation will need additional work. The goal of this prototype was
> >> to be able load raw elf object files directly into the kernel and have
> >> the kernel perform all the necessary instruction rewriting and
> >> relocation calculations. Having a file descriptor tied to a bpf program
> >> allowed us to have tighter integration with the existing LSM
> >> infrastructure. Additionally, it opens the door for signature and provenance
> >> checking, along with loading programs without a functioning userspace.
> >>
> >> The main goal of this RFC is to get some feedback on the overall
> >> approach and feasibility of this design.
> >>
> >> A new subcommand BPF_LOAD_FD is introduced. This subcommand takes a file
> >> descriptor to an elf object file, along with an array of map fds, and a
> >> sysfs entry to associate programs and metadata with. The kernel then
> >> performs all the relocation calculations and instruction rewriting
> >> inside the kernel. Later BPF_PROG_LOAD can reference this sysfs entry
> >> and load/attach previously loaded programs by name. Userspace is
> >> responsible for generating and populating maps.
> >>
> >> CO-RE relocation support already existed in the kernel. Support for
> >> everything else, maps, externs, etc., was added. In the same vein as
> >> 29db4bea1d10 ("bpf: Prepare relo_core.c for kernel duty.")
> >> this prototype directly uses code from libbpf.
> >>
> >> One of the challenges encountered was having different elf and btf
> >> abstractions utilized in the kernel vs libpf. Missing btf functionality
> >> was ported over to the kernel while trying to minimize the number of
> >> changes required to the libpf code. As a result, there is some code
> >> duplication and obvious refactoring opportunities. Additionally, being
> >> able to directly share code between userspace and kernelspace in a
> >> similar fashion to relo_core.c would be a TODO.
> >
> > I recently became aware of this patchset through Alexei's reference
> > in another thread, and I apologize for my delayed involvement.
> >
> > Upon reviewing your proposed changes, I have concerns about the scope
> > of the kernel modifications. This implementation appears to introduce
> > substantial code changes to the kernel (estimated at approximately
> > 1,000+ lines, though a git diff stat wasn't provided).
> >
>
> Yes, it ended up way bigger than I anticipated. The ultimate goal of
> that was to be able to conditionally compile parts of libbpf directly
> into the kernel and unify the btf and elf libraries. That refactoring
> work was way out of scope for a PoC.
>
> > If the primary objective is eBPF program signing, I would like to
> > propose an alternative approach: a two-phase signing mechanism that
> > eliminates the need for kernel modifications. My solution leverages
> > the existing eBPF infrastructure, particularly the BPF LSM framework.
> > So the fundamental architectural difference between these two approaches
> > is pretty much kernel-based versus userspace implementation, which has
> > been extensively discussed and debated within the kernel community.
> >
>
> Code signing, secure system design and supply-chain attack mitigations
> are some active research areas that we are exploring. BPF programs have
> some interesting ramifications on those topics. Attacks that were
> previously demonstrated in CVE-2021-3444 are an area of interest as
> well.
Thanks for sharing all the information.
>
> > I have also developed a proof-of-concept implementation, which is
> > available for review at: https://github.com/congwang/ebpf-2-phase-signing
> >
>
> Sweet, I'll take a look. It sounds super interesting! At a quick
> glance, it looks like your approach would probably benefit from John's
> suggestions for early-boot un-unloadable bpf programs.
Oh, mine is unloadable, I just used another eBPF program to do the actual
signature verification. If we unload it (like other regular eBPF
programs), the whole signing enforcement is gone.
>
> What are your use cases for signature verification if you don't mind me
> asking?
>
Sure. To be honest, our use case is actually simpler than 2-phase
signing. What we wanted is merely a centralized way to manage all the
_internal_ eBPF programs within our data center, therefore, signing the
original eBPF binary is probably sufficient because we trust our own
eBPF programs and we just want to use signing as an approval. It should
not be hard to modify my 2-phase signing to just 1-phase (by skipping
the 2nd one).
The reason why I proposed 2-phase signing is that I found that in the
past there were multiple attempts in the community trying to solve this
signing problem, so I looked into it a bit deeper and wanted to see if
I could find a solution to benefit the whole community (and ourselves
too).
Thanks!
^ permalink raw reply [flat|nested] 30+ messages in thread