From: Pingfan Liu <piliu@redhat.com>
To: kexec@lists.infradead.org
Cc: Pingfan Liu <piliu@redhat.com>,
"David S. Miller" <davem@davemloft.net>,
Alexei Starovoitov <ast@kernel.org>,
Daniel Borkmann <daniel@iogearbox.net>,
John Fastabend <john.fastabend@gmail.com>,
Andrii Nakryiko <andrii@kernel.org>,
Martin KaFai Lau <martin.lau@linux.dev>,
Eduard Zingerman <eddyz87@gmail.com>, Song Liu <song@kernel.org>,
Yonghong Song <yonghong.song@linux.dev>,
Jeremy Linton <jeremy.linton@arm.com>,
Catalin Marinas <catalin.marinas@arm.com>,
Will Deacon <will@kernel.org>, Ard Biesheuvel <ardb@kernel.org>,
Simon Horman <horms@kernel.org>,
Gerd Hoffmann <kraxel@redhat.com>,
Vitaly Kuznetsov <vkuznets@redhat.com>,
Philipp Rudo <prudo@redhat.com>, Viktor Malik <vmalik@redhat.com>,
Jan Hendrik Farr <kernel@jfarr.cc>, Baoquan He <bhe@redhat.com>,
Dave Young <dyoung@redhat.com>,
Andrew Morton <akpm@linux-foundation.org>,
bpf@vger.kernel.org, systemd-devel@lists.freedesktop.org,
linux-kernel@vger.kernel.org
Subject: [PATCHv7 04/13] kexec_file: Implement decompress method for parser
Date: Sun, 22 Mar 2026 09:43:53 +0800 [thread overview]
Message-ID: <20260322014402.8815-5-piliu@redhat.com> (raw)
In-Reply-To: <20260322014402.8815-1-piliu@redhat.com>
On arm64, there is no boot-time decompression for the kernel image.
Therefore, when a compressed kernel image is loaded, it must be
decompressed.
It is impractical to implement the complex decompression methods in BPF
bytecode. However, decompression routines exist in the kernel. This
patch bridges the compressed data with the kernel's decompression
methods.
Signed-off-by: Pingfan Liu <piliu@redhat.com>
Cc: Baoquan He <bhe@redhat.com>
Cc: Dave Young <dyoung@redhat.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Philipp Rudo <prudo@redhat.com>
To: kexec@lists.infradead.org
---
kernel/Kconfig.kexec | 2 +-
kernel/kexec_bpf_loader.c | 236 ++++++++++++++++++++++++++++++++++++++
2 files changed, 237 insertions(+), 1 deletion(-)
diff --git a/kernel/Kconfig.kexec b/kernel/Kconfig.kexec
index 0c5d619820bcd..dbfdf34a78aa0 100644
--- a/kernel/Kconfig.kexec
+++ b/kernel/Kconfig.kexec
@@ -49,7 +49,7 @@ config KEXEC_FILE
config KEXEC_BPF
bool "Enable bpf-prog to parse the kexec image"
depends on KEXEC_FILE
- depends on DEBUG_INFO_BTF && BPF_SYSCALL
+ depends on DEBUG_INFO_BTF && BPF_SYSCALL && KEEP_DECOMPRESSOR
help
This is a feature to run bpf section inside a kexec image file, which
parses the image properly and help kernel set up kexec boot protocol
diff --git a/kernel/kexec_bpf_loader.c b/kernel/kexec_bpf_loader.c
index bd1800a767824..c79fb777d51bf 100644
--- a/kernel/kexec_bpf_loader.c
+++ b/kernel/kexec_bpf_loader.c
@@ -21,6 +21,7 @@
#include <asm/byteorder.h>
#include <asm/image.h>
#include <asm/memory.h>
+#include <linux/decompress/generic.h>
#include "kexec_internal.h"
/* Load a ELF */
@@ -73,8 +74,243 @@ static int __init kexec_bpf_prog_run_init(void)
}
late_initcall(kexec_bpf_prog_run_init);
+/* Mark the bpf parser success */
+#define KEXEC_BPF_CMD_INVALID 0x0
+#define KEXEC_BPF_CMD_DONE 0x1
+#define KEXEC_BPF_CMD_DECOMPRESS 0x2
+
+#define KEXEC_BPF_SUBCMD_INVALID 0x0
+#define KEXEC_BPF_SUBCMD_KERNEL 0x1
+#define KEXEC_BPF_SUBCMD_INITRD 0x2
+#define KEXEC_BPF_SUBCMD_CMDLINE 0x3
+
+#define KEXEC_BPF_PIPELINE_INVALID 0x0
+#define KEXEC_BPF_PIPELINE_FILL 0x1
+
+struct cmd_hdr {
+ uint16_t cmd;
+ uint8_t subcmd;
+ uint8_t pipeline_flag;
+ /* sizeof(chunks) + sizeof(all data) */
+ uint32_t payload_len;
+ /* 0 */
+ uint16_t num_chunks;
+} __packed;
+
+/* Reserved for extension */
+struct cmd_chunk {
+ uint16_t type;
+ uint32_t len;
+} __packed;
+
+
+/* Max decompressed size is capped at 512M */
+#define MAX_UNCOMPRESSED_BUF_SIZE (1 << 29)
+#define CHUNK_SIZE (1 << 23)
+
+struct decompress_mem_allocator {
+ void *chunk_start;
+ unsigned int chunk_size;
+ void *chunk_cur;
+ unsigned int next_idx;
+ char **chunk_base_addr;
+};
+
+/*
+ * This global allocator for decompression is protected by kexec lock.
+ */
+static struct decompress_mem_allocator dcmpr_allocator;
+
+/*
+ * Set up an active chunk to hold partial decompressed data.
+ */
+static char *allocate_chunk_memory(void)
+{
+ struct decompress_mem_allocator *a = &dcmpr_allocator;
+ char *p;
+
+ if (unlikely((a->next_idx * a->chunk_size >= MAX_UNCOMPRESSED_BUF_SIZE)))
+ return NULL;
+
+ p = __vmalloc(a->chunk_size, GFP_KERNEL | __GFP_ACCOUNT);
+ if (!p)
+ return NULL;
+ a->chunk_base_addr[a->next_idx++] = p;
+ a->chunk_start = a->chunk_cur = p;
+
+ return p;
+}
+
+static int merge_decompressed_data(struct decompress_mem_allocator *a,
+ char **out, unsigned long *size)
+{
+ unsigned int last_chunk_sz = a->chunk_cur - a->chunk_start;
+ unsigned long total_sz;
+ char *dst, *cur_dst;
+ int i;
+
+ total_sz = (a->next_idx - 1) * a->chunk_size + last_chunk_sz;
+ cur_dst = dst = __vmalloc(total_sz, GFP_KERNEL | __GFP_ACCOUNT);
+ if (!dst)
+ return -ENOMEM;
+
+ for (i = 0; i < a->next_idx - 1; i++) {
+ memcpy(cur_dst, a->chunk_base_addr[i], a->chunk_size);
+ cur_dst += a->chunk_size;
+ vfree(a->chunk_base_addr[i]);
+ a->chunk_base_addr[i] = NULL;
+ }
+
+ memcpy(cur_dst, a->chunk_base_addr[i], last_chunk_sz);
+ vfree(a->chunk_base_addr[i]);
+ a->chunk_base_addr[i] = NULL;
+ *out = dst;
+ *size = total_sz;
+
+ return 0;
+}
+
+static int decompress_mem_allocator_init(
+ struct decompress_mem_allocator *a,
+ unsigned int chunk_size)
+{
+ unsigned long sz = (MAX_UNCOMPRESSED_BUF_SIZE / chunk_size) * sizeof(void *);
+ char *buf;
+
+ a->chunk_base_addr = __vmalloc(sz, GFP_KERNEL | __GFP_ACCOUNT);
+ if (!a->chunk_base_addr)
+ return -ENOMEM;
+
+ /* Pre-allocate the memory for the first chunk */
+ buf = __vmalloc(chunk_size, GFP_KERNEL | __GFP_ACCOUNT);
+ if (!buf) {
+ vfree(a->chunk_base_addr);
+ return -ENOMEM;
+ }
+ a->chunk_base_addr[0] = buf;
+ a->chunk_start = a->chunk_cur = buf;
+ a->chunk_size = chunk_size;
+ a->next_idx = 1;
+ return 0;
+}
+
+static void decompress_mem_allocator_fini(struct decompress_mem_allocator *a)
+{
+ int i;
+
+ for (i = 0; i < a->next_idx; i++) {
+ if (a->chunk_base_addr[i] != NULL)
+ vfree(a->chunk_base_addr[i]);
+ }
+ vfree(a->chunk_base_addr);
+}
+
+/*
+ * This is a callback for decompress_fn.
+ *
+ * It copies the partial decompressed content in [buf, buf + len) to dst. If the
+ * active chunk is not large enough, retire it and activate a new chunk to hold
+ * the remaining data.
+ */
+static long flush(void *buf, unsigned long len)
+{
+ struct decompress_mem_allocator *a = &dcmpr_allocator;
+ long free, copied = 0;
+
+ if (unlikely(len > a->chunk_size)) {
+ pr_info("Chunk size is too small to hold decompressed data\n");
+ return -1;
+ }
+ free = a->chunk_start + a->chunk_size - a->chunk_cur;
+ BUG_ON(free < 0);
+ if (free < len) {
+ memcpy(a->chunk_cur, buf, free);
+ copied += free;
+ a->chunk_cur += free;
+ buf += free;
+ len -= free;
+ a->chunk_start = a->chunk_cur = allocate_chunk_memory();
+ if (unlikely(!a->chunk_start)) {
+ pr_info("Decompression runs out of memory\n");
+ return -1;
+ }
+ }
+ memcpy(a->chunk_cur, buf, len);
+ copied += len;
+ a->chunk_cur += len;
+ return copied;
+}
+
+static int parser_cmd_decompress(char *compressed_data, int image_gz_sz,
+ char **out_buf, unsigned long *out_sz, struct kexec_context *ctx)
+{
+ struct decompress_mem_allocator *a = &dcmpr_allocator;
+ decompress_fn decompressor;
+ const char *name;
+ int ret;
+
+ ret = decompress_mem_allocator_init(a, CHUNK_SIZE);
+ if (ret < 0)
+ return ret;
+ decompressor = decompress_method(compressed_data, image_gz_sz, &name);
+ if (!decompressor) {
+ pr_err("Can not find decompress method\n");
+ ret = -1;
+ goto err;
+ }
+ pr_debug("Find decompressing method: %s, compressed sz:0x%x\n",
+ name, image_gz_sz);
+ ret = decompressor(compressed_data, image_gz_sz, NULL, flush,
+ NULL, NULL, NULL);
+ if (!!ret)
+ goto err;
+ ret = merge_decompressed_data(a, out_buf, out_sz);
+
+err:
+ decompress_mem_allocator_fini(a);
+
+ return ret;
+}
+
static int kexec_buff_parser(struct bpf_parser_context *parser)
{
+ struct bpf_parser_buf *pbuf = parser->buf;
+ struct kexec_context *ctx = (struct kexec_context *)parser->data;
+ struct cmd_hdr *cmd = (struct cmd_hdr *)pbuf->buf;
+ char *decompressed_buf, *buf, *p;
+ unsigned long decompressed_sz;
+ int ret = 0;
+
+ buf = pbuf->buf + sizeof(struct cmd_hdr);
+ if (cmd->payload_len + sizeof(struct cmd_hdr) > pbuf->size) {
+ pr_info("Invalid payload size:0x%x, while buffer size:0x%x\n",
+ cmd->payload_len, pbuf->size);
+ return -EINVAL;
+ }
+ switch (cmd->cmd) {
+ case KEXEC_BPF_CMD_DONE:
+ ctx->parsed = true;
+ break;
+ case KEXEC_BPF_CMD_DECOMPRESS:
+ ret = parser_cmd_decompress(buf, cmd->payload_len, &decompressed_buf,
+ &decompressed_sz, ctx);
+ if (!ret) {
+ switch (cmd->subcmd) {
+ case KEXEC_BPF_SUBCMD_KERNEL:
+ vfree(ctx->kernel);
+ ctx->kernel = decompressed_buf;
+ ctx->kernel_sz = decompressed_sz;
+ break;
+ default:
+ vfree(decompressed_buf);
+ break;
+ }
+ }
+ break;
+ default:
+ break;
+ }
+
return 0;
}
--
2.49.0
next prev parent reply other threads:[~2026-03-22 1:45 UTC|newest]
Thread overview: 26+ messages / expand[flat|nested] mbox.gz Atom feed top
2026-03-22 1:43 [PATCHv7 00/13] kexec: Use BPF lskel to enable kexec to load PE format boot image Pingfan Liu
2026-03-22 1:43 ` [PATCHv7 01/13] bpf: Introduce kfuncs to parser buffer content Pingfan Liu
2026-03-22 2:14 ` bot+bpf-ci
2026-03-22 1:43 ` [PATCHv7 02/13] kexec_file: Use bpf-prog to decompose image Pingfan Liu
2026-03-22 2:14 ` bot+bpf-ci
2026-03-22 1:43 ` [PATCHv7 03/13] lib/decompress: Keep decompressor when CONFIG_KEEP_DECOMPRESSOR Pingfan Liu
2026-03-22 2:26 ` bot+bpf-ci
2026-03-22 1:43 ` Pingfan Liu [this message]
2026-03-22 2:26 ` [PATCHv7 04/13] kexec_file: Implement decompress method for parser bot+bpf-ci
2026-03-22 1:43 ` [PATCHv7 05/13] kexec_file: Implement copy " Pingfan Liu
2026-03-22 1:43 ` [PATCHv7 06/13] kexec_file: Chain the stages into a pipeline Pingfan Liu
2026-03-22 2:14 ` bot+bpf-ci
2026-03-22 1:43 ` [PATCHv7 07/13] kexec_file: Introduce a bpf-prog lskel to run a format parser Pingfan Liu
2026-03-22 1:43 ` [PATCHv7 08/13] kexec_file: Factor out routine to find a symbol in ELF Pingfan Liu
2026-03-22 2:14 ` bot+bpf-ci
2026-03-23 7:07 ` kernel test robot
2026-03-23 10:19 ` kernel test robot
2026-03-22 1:43 ` [PATCHv7 09/13] kexec_file: Integrate bpf light skeleton to load image with bpf-prog Pingfan Liu
2026-03-22 2:14 ` bot+bpf-ci
2026-03-22 1:43 ` [PATCHv7 10/13] arm64/kexec: Select KEXEC_BPF to support UEFI-style kernel image Pingfan Liu
2026-03-22 1:44 ` [PATCHv7 11/13] tools/kexec: Introduce a bpf-prog to handle zboot image Pingfan Liu
2026-03-22 2:26 ` bot+bpf-ci
2026-03-22 1:44 ` [PATCHv7 12/13] tools/kexec: Introduce a bpf-prog to handle UKI image Pingfan Liu
2026-03-22 1:44 ` [PATCHv7 13/13] tools/kexec: Introduce a tool to build zboot envelop Pingfan Liu
2026-03-23 19:02 ` [PATCHv7 00/13] kexec: Use BPF lskel to enable kexec to load PE format boot image Andrew Morton
2026-03-24 7:09 ` Pingfan Liu
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20260322014402.8815-5-piliu@redhat.com \
--to=piliu@redhat.com \
--cc=akpm@linux-foundation.org \
--cc=andrii@kernel.org \
--cc=ardb@kernel.org \
--cc=ast@kernel.org \
--cc=bhe@redhat.com \
--cc=bpf@vger.kernel.org \
--cc=catalin.marinas@arm.com \
--cc=daniel@iogearbox.net \
--cc=davem@davemloft.net \
--cc=dyoung@redhat.com \
--cc=eddyz87@gmail.com \
--cc=horms@kernel.org \
--cc=jeremy.linton@arm.com \
--cc=john.fastabend@gmail.com \
--cc=kernel@jfarr.cc \
--cc=kexec@lists.infradead.org \
--cc=kraxel@redhat.com \
--cc=linux-kernel@vger.kernel.org \
--cc=martin.lau@linux.dev \
--cc=prudo@redhat.com \
--cc=song@kernel.org \
--cc=systemd-devel@lists.freedesktop.org \
--cc=vkuznets@redhat.com \
--cc=vmalik@redhat.com \
--cc=will@kernel.org \
--cc=yonghong.song@linux.dev \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox