From: Pingfan Liu <piliu@redhat.com>
To: bpf@vger.kernel.org
Cc: Pingfan Liu <piliu@redhat.com>,
Alexei Starovoitov <ast@kernel.org>,
Daniel Borkmann <daniel@iogearbox.net>,
John Fastabend <john.fastabend@gmail.com>,
Andrii Nakryiko <andrii@kernel.org>,
Martin KaFai Lau <martin.lau@linux.dev>,
Eduard Zingerman <eddyz87@gmail.com>, Song Liu <song@kernel.org>,
Yonghong Song <yonghong.song@linux.dev>,
Jeremy Linton <jeremy.linton@arm.com>,
Catalin Marinas <catalin.marinas@arm.com>,
Will Deacon <will@kernel.org>, Ard Biesheuvel <ardb@kernel.org>,
Simon Horman <horms@kernel.org>,
Gerd Hoffmann <kraxel@redhat.com>,
Vitaly Kuznetsov <vkuznets@redhat.com>,
Philipp Rudo <prudo@redhat.com>, Viktor Malik <vmalik@redhat.com>,
Jan Hendrik Farr <kernel@jfarr.cc>, Baoquan He <bhe@redhat.com>,
Dave Young <dyoung@redhat.com>,
Andrew Morton <akpm@linux-foundation.org>,
kexec@lists.infradead.org, KP Singh <kpsingh@kernel.org>,
Stanislav Fomichev <sdf@fomichev.me>, Hao Luo <haoluo@google.com>,
Jiri Olsa <jolsa@kernel.org>
Subject: [PATCHv3 3/9] bpf: Introduce bpf_copy_to_kernel() to buffer the content from bpf-prog
Date: Thu, 29 May 2025 12:17:38 +0800 [thread overview]
Message-ID: <20250529041744.16458-4-piliu@redhat.com> (raw)
In-Reply-To: <20250529041744.16458-1-piliu@redhat.com>
In the security kexec_file_load case, the buffer which holds the kernel
image is invisible to the userspace.
The common data flow in bpf scheme is from kernel to bpf-prog. In the
case of kexec_file_load, the kexec component needs to buffer the parsed
result by bpf-prog (opposite the usual direction) to the next stage
parsing. bpf_kexec_carrier() makes the opposite data flow possible. A
bpf-prog can publish the parsed payload address to the kernel, and the
latter can copy them for future use.
Signed-off-by: Pingfan Liu <piliu@redhat.com>
Cc: Alexei Starovoitov <ast@kernel.org>
Cc: Daniel Borkmann <daniel@iogearbox.net>
Cc: John Fastabend <john.fastabend@gmail.com>
Cc: Andrii Nakryiko <andrii@kernel.org>
Cc: Martin KaFai Lau <martin.lau@linux.dev>
Cc: Eduard Zingerman <eddyz87@gmail.com>
Cc: Song Liu <song@kernel.org>
Cc: Yonghong Song <yonghong.song@linux.dev>
Cc: KP Singh <kpsingh@kernel.org>
Cc: Stanislav Fomichev <sdf@fomichev.me>
Cc: Hao Luo <haoluo@google.com>
Cc: Jiri Olsa <jolsa@kernel.org>
To: bpf@vger.kernel.org
---
include/linux/bpf.h | 23 +++++
kernel/bpf/Makefile | 2 +-
kernel/bpf/helpers.c | 2 +
kernel/bpf/helpers_carrier.c | 194 +++++++++++++++++++++++++++++++++++
4 files changed, 220 insertions(+), 1 deletion(-)
create mode 100644 kernel/bpf/helpers_carrier.c
diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 3f0cc89c0622c..104974a6d18cb 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -3568,4 +3568,27 @@ static inline bool bpf_is_subprog(const struct bpf_prog *prog)
return prog->aux->func_idx != 0;
}
+struct mem_range_result {
+ struct kref ref;
+ struct rcu_head rcu;
+ char *buf;
+ uint32_t buf_sz;
+ uint32_t data_sz;
+ /* kmalloc-ed or vmalloc-ed */
+ bool kmalloc;
+ int status;
+ struct mem_cgroup *memcg;
+};
+int mem_range_result_put(struct mem_range_result *result);
+
+typedef int (*resource_handler)(const char *name, struct mem_range_result *r);
+
+struct carrier_listener {
+ char *name;
+ bool kmalloc;
+ resource_handler handler;
+};
+
+int register_carrier_listener(struct carrier_listener *listener);
+int unregister_carrier_listener(char *str);
#endif /* _LINUX_BPF_H */
diff --git a/kernel/bpf/Makefile b/kernel/bpf/Makefile
index 70502f038b921..d1f1f50e23cc8 100644
--- a/kernel/bpf/Makefile
+++ b/kernel/bpf/Makefile
@@ -6,7 +6,7 @@ cflags-nogcse-$(CONFIG_X86)$(CONFIG_CC_IS_GCC) := -fno-gcse
endif
CFLAGS_core.o += -Wno-override-init $(cflags-nogcse-yy)
-obj-$(CONFIG_BPF_SYSCALL) += syscall.o verifier.o inode.o helpers.o tnum.o log.o token.o
+obj-$(CONFIG_BPF_SYSCALL) += syscall.o verifier.o inode.o helpers.o helpers_carrier.o tnum.o log.o token.o
obj-$(CONFIG_BPF_SYSCALL) += bpf_iter.o map_iter.o task_iter.o prog_iter.o link_iter.o
obj-$(CONFIG_BPF_SYSCALL) += hashtab.o arraymap.o percpu_freelist.o bpf_lru_list.o lpm_trie.o map_in_map.o bloom_filter.o
obj-$(CONFIG_BPF_SYSCALL) += local_storage.o queue_stack_maps.o ringbuf.o
diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c
index e3a2662f4e336..1f4284e58400b 100644
--- a/kernel/bpf/helpers.c
+++ b/kernel/bpf/helpers.c
@@ -3200,6 +3200,8 @@ BTF_KFUNCS_START(generic_btf_ids)
#ifdef CONFIG_CRASH_DUMP
BTF_ID_FLAGS(func, crash_kexec, KF_DESTRUCTIVE)
#endif
+BTF_ID_FLAGS(func, bpf_mem_range_result_put, KF_RELEASE)
+BTF_ID_FLAGS(func, bpf_copy_to_kernel, KF_TRUSTED_ARGS | KF_SLEEPABLE)
BTF_ID_FLAGS(func, bpf_obj_new_impl, KF_ACQUIRE | KF_RET_NULL)
BTF_ID_FLAGS(func, bpf_percpu_obj_new_impl, KF_ACQUIRE | KF_RET_NULL)
BTF_ID_FLAGS(func, bpf_obj_drop_impl, KF_RELEASE)
diff --git a/kernel/bpf/helpers_carrier.c b/kernel/bpf/helpers_carrier.c
new file mode 100644
index 0000000000000..c4e45fdf0ebb8
--- /dev/null
+++ b/kernel/bpf/helpers_carrier.c
@@ -0,0 +1,194 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#include <linux/bpf.h>
+#include <linux/bpf-cgroup.h>
+#include <linux/cgroup.h>
+#include <linux/rcupdate.h>
+#include <linux/hashtable.h>
+#include <linux/jhash.h>
+#include <linux/mutex.h>
+#include <linux/kref.h>
+#include <linux/vmalloc.h>
+#include <linux/slab.h>
+
+
+struct str_listener {
+ struct hlist_node node;
+ char *str;
+ resource_handler handler;
+ bool kmalloc;
+};
+
+DEFINE_STATIC_SRCU(srcu);
+static DEFINE_MUTEX(str_listeners_mutex);
+static DEFINE_HASHTABLE(str_listeners, 8);
+
+static struct str_listener *find_listener(const char *str)
+{
+ struct str_listener *item;
+ unsigned int hash = jhash(str, strlen(str), 0);
+
+ hash_for_each_possible(str_listeners, item, node, hash) {
+ if (strcmp(item->str, str) == 0)
+ return item;
+ }
+ return NULL;
+}
+
+static void __mem_range_result_free(struct rcu_head *rcu)
+{
+ struct mem_range_result *result = container_of(rcu, struct mem_range_result, rcu);
+ struct mem_cgroup *memcg, *old_memcg;
+
+ memcg = result->memcg;
+ old_memcg = set_active_memcg(memcg);
+ if (likely(!!result->buf)) {
+ if (result->kmalloc)
+ kfree(result->buf);
+ else
+ vfree(result->buf);
+ }
+ kfree(result);
+ set_active_memcg(old_memcg);
+ mem_cgroup_put(memcg);
+}
+
+static void __mem_range_result_put(struct kref *kref)
+{
+ struct mem_range_result *result = container_of(kref, struct mem_range_result, ref);
+
+ call_srcu(&srcu, &result->rcu, __mem_range_result_free);
+}
+
+int mem_range_result_put(struct mem_range_result *result)
+{
+
+ if (!result) {
+ pr_err("%s, receive invalid range\n", __func__);
+ return -EINVAL;
+ }
+
+ kref_put(&result->ref, __mem_range_result_put);
+ return 0;
+}
+
+__bpf_kfunc int bpf_mem_range_result_put(struct mem_range_result *result)
+{
+ return mem_range_result_put(result);
+}
+
+/*
+ * Cache the content in @buf into kernel
+ */
+__bpf_kfunc int bpf_copy_to_kernel(const char *name, char *buf, int size)
+{
+ struct mem_range_result *range;
+ struct mem_cgroup *memcg, *old_memcg;
+ struct str_listener *item;
+ resource_handler handler;
+ bool kmalloc;
+ char *kbuf;
+ int id, ret = 0;
+
+ id = srcu_read_lock(&srcu);
+ item = find_listener(name);
+ if (!item) {
+ srcu_read_unlock(&srcu, id);
+ return -EINVAL;
+ }
+ kmalloc = item->kmalloc;
+ handler = item->handler;
+ srcu_read_unlock(&srcu, id);
+ memcg = get_mem_cgroup_from_current();
+ old_memcg = set_active_memcg(memcg);
+ range = kmalloc(sizeof(struct mem_range_result), GFP_KERNEL);
+ if (!range) {
+ pr_err("fail to allocate mem_range_result\n");
+ ret = -ENOMEM;
+ goto err;
+ }
+
+ kref_init(&range->ref);
+ if (item->kmalloc)
+ kbuf = kmalloc(size, GFP_KERNEL | __GFP_ACCOUNT);
+ else
+ kbuf = __vmalloc(size, GFP_KERNEL | __GFP_ACCOUNT);
+ if (!kbuf) {
+ kfree(range);
+ ret = -ENOMEM;
+ goto err;
+ }
+ ret = copy_from_kernel_nofault(kbuf, buf, size);
+ if (unlikely(ret < 0)) {
+ kfree(range);
+ if (item->kmalloc)
+ kfree(kbuf);
+ else
+ vfree(kbuf);
+ ret = -EINVAL;
+ goto err;
+ }
+ range->kmalloc = item->kmalloc;
+ range->buf = kbuf;
+ range->buf_sz = size;
+ range->data_sz = size;
+ range->memcg = memcg;
+ mem_cgroup_tryget(memcg);
+ range->status = 0;
+ ret = handler(name, range);
+ mem_range_result_put(range);
+err:
+ set_active_memcg(old_memcg);
+ mem_cgroup_put(memcg);
+ return ret;
+}
+
+int register_carrier_listener(struct carrier_listener *listener)
+{
+ struct str_listener *item;
+ unsigned int hash;
+ int ret;
+
+ if (!listener->name)
+ return -EINVAL;
+ item = kmalloc(sizeof(*item), GFP_KERNEL);
+ if (!item)
+ return -ENOMEM;
+ item->str = kstrdup(listener->name, GFP_KERNEL);
+ if (!item->str) {
+ kfree(item);
+ return -ENOMEM;
+ }
+ item->handler = listener->handler;
+ item->kmalloc = listener->kmalloc;
+ hash = jhash(item->str, strlen(item->str), 0);
+ mutex_lock(&str_listeners_mutex);
+ if (!find_listener(item->str)) {
+ hash_add(str_listeners, &item->node, hash);
+ } else {
+ kfree(item->str);
+ kfree(item);
+ ret = -EBUSY;
+ }
+ mutex_unlock(&str_listeners_mutex);
+
+ return ret;
+}
+EXPORT_SYMBOL(register_carrier_listener);
+
+int unregister_carrier_listener(char *str)
+{
+ struct str_listener *item;
+ int ret = 0;
+
+ mutex_lock(&str_listeners_mutex);
+ item = find_listener(str);
+ if (!!item)
+ hash_del(&item->node);
+ else
+ ret = -EINVAL;
+ mutex_unlock(&str_listeners_mutex);
+
+ return ret;
+}
+EXPORT_SYMBOL(unregister_carrier_listener);
+
--
2.49.0
next prev parent reply other threads:[~2025-05-29 4:19 UTC|newest]
Thread overview: 20+ messages / expand[flat|nested] mbox.gz Atom feed top
2025-05-29 4:17 [PATCHv3 0/9] kexec: Use BPF lskel to enable kexec to load PE format boot image Pingfan Liu
2025-05-29 4:17 ` [PATCHv3 1/9] kexec_file: Make kexec_image_load_default global visible Pingfan Liu
2025-05-29 4:17 ` [PATCHv3 2/9] lib/decompress: Keep decompressor when CONFIG_KEXEC_PE_IMAGE Pingfan Liu
2025-05-29 4:17 ` Pingfan Liu [this message]
2025-05-29 11:48 ` [PATCHv3 3/9] bpf: Introduce bpf_copy_to_kernel() to buffer the content from bpf-prog kernel test robot
2025-06-25 18:10 ` Philipp Rudo
2025-05-29 4:17 ` [PATCHv3 4/9] bpf: Introduce decompressor kfunc Pingfan Liu
2025-05-29 12:31 ` kernel test robot
2025-05-29 4:17 ` [PATCHv3 5/9] kexec: Introduce kexec_pe_image to parse and load PE file Pingfan Liu
2025-06-25 18:09 ` Philipp Rudo
2025-06-30 13:45 ` Pingfan Liu
2025-07-02 9:17 ` Philipp Rudo
2025-07-03 1:17 ` Pingfan Liu
2025-05-29 4:17 ` [PATCHv3 6/9] kexec: Integrate with the introduced bpf kfuncs Pingfan Liu
2025-05-29 4:17 ` [PATCHv3 7/9] kexec: Introduce a bpf-prog lskel to parse PE file Pingfan Liu
2025-05-29 4:17 ` [PATCHv3 8/9] kexec: Integrate bpf light skeleton to load zboot image Pingfan Liu
2025-06-25 18:10 ` Philipp Rudo
2025-06-30 12:40 ` Pingfan Liu
2025-05-29 4:17 ` [PATCHv3 9/9] arm64/kexec: Add PE image format support Pingfan Liu
2025-05-29 15:34 ` kernel test robot
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20250529041744.16458-4-piliu@redhat.com \
--to=piliu@redhat.com \
--cc=akpm@linux-foundation.org \
--cc=andrii@kernel.org \
--cc=ardb@kernel.org \
--cc=ast@kernel.org \
--cc=bhe@redhat.com \
--cc=bpf@vger.kernel.org \
--cc=catalin.marinas@arm.com \
--cc=daniel@iogearbox.net \
--cc=dyoung@redhat.com \
--cc=eddyz87@gmail.com \
--cc=haoluo@google.com \
--cc=horms@kernel.org \
--cc=jeremy.linton@arm.com \
--cc=john.fastabend@gmail.com \
--cc=jolsa@kernel.org \
--cc=kernel@jfarr.cc \
--cc=kexec@lists.infradead.org \
--cc=kpsingh@kernel.org \
--cc=kraxel@redhat.com \
--cc=martin.lau@linux.dev \
--cc=prudo@redhat.com \
--cc=sdf@fomichev.me \
--cc=song@kernel.org \
--cc=vkuznets@redhat.com \
--cc=vmalik@redhat.com \
--cc=will@kernel.org \
--cc=yonghong.song@linux.dev \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox