The Linux Kernel Mailing List
 help / color / mirror / Atom feed
From: Pasha Tatashin <pasha.tatashin@soleen.com>
To: Michal Clapinski <mclapinski@google.com>
Cc: Kees Cook <kees@kernel.org>, Tony Luck <tony.luck@intel.com>,
	 "Guilherme G. Piccoli" <gpiccoli@igalia.com>,
	Pasha Tatashin <pasha.tatashin@soleen.com>,
	 Mike Rapoport <rppt@kernel.org>,
	Pratyush Yadav <pratyush@kernel.org>,
	 Alexander Graf <graf@amazon.com>,
	linux-kernel@vger.kernel.org, kexec@lists.infradead.org
Subject: Re: [PATCH v2] pstore: add a KHO backend
Date: Thu, 11 Jun 2026 18:55:08 +0000	[thread overview]
Message-ID: <air8jVtmUahab9hd@plex> (raw)
In-Reply-To: <20260605121040.1177072-1-mclapinski@google.com>

On 06-05 14:10, Michal Clapinski wrote:
> Up to this point to preserve late shutdown logs in memory, users had to
> predefine a memory region using ramoops. This commit changes this by
> preserving a buffer using kexec-handover.
> 
> pstore_kho supports preserving only 1 dmesg buffer.
> It gets replaced with the new buffer on every kexec, so the user has to
> copy the file out of pstore after every kexec.
> There is no erase() support.
> 
> Signed-off-by: Michal Clapinski <mclapinski@google.com>
> ---
> v2:
> - Added a comment explaining the benefits of pstore_kho.
> - Created include/linux/kho/abi/pstore.h.
> - Got rid of the KHO subtree.
> - Made sure never to free incoming kho data.
>   This way the module can be safely reloaded.
> - Sashiko complained that I trust the data coming from the old kernel.
>   I ignored it. LMK if I shouldn't trust the old kernel.
> ---
>  fs/pstore/Kconfig              |  10 ++
>  fs/pstore/Makefile             |   2 +
>  fs/pstore/pstore_kho.c         | 230 +++++++++++++++++++++++++++++++++
>  include/linux/kho/abi/pstore.h |  27 ++++
>  4 files changed, 269 insertions(+)
>  create mode 100644 fs/pstore/pstore_kho.c
>  create mode 100644 include/linux/kho/abi/pstore.h
> 
> diff --git a/fs/pstore/Kconfig b/fs/pstore/Kconfig
> index 3acc38600cd1..455790fec955 100644
> --- a/fs/pstore/Kconfig
> +++ b/fs/pstore/Kconfig
> @@ -81,6 +81,16 @@ config PSTORE_RAM
>  
>  	  For more information, see Documentation/admin-guide/ramoops.rst.
>  
> +config PSTORE_KHO
> +	tristate "Preserve logs over kexec"
> +	depends on PSTORE
> +	depends on KEXEC_HANDOVER
> +	help
> +	  A pstore backend for preserving dmesg over KHO (kexec handover).
> +	  It does not require any additional cmdline params to work.
> +
> +	  It supports preservation of only 1 dmesg file.
> +
>  config PSTORE_ZONE
>  	tristate
>  	depends on PSTORE
> diff --git a/fs/pstore/Makefile b/fs/pstore/Makefile
> index c270467aeece..518cd408bf8e 100644
> --- a/fs/pstore/Makefile
> +++ b/fs/pstore/Makefile
> @@ -13,6 +13,8 @@ pstore-$(CONFIG_PSTORE_PMSG)	+= pmsg.o
>  ramoops-objs += ram.o ram_core.o
>  obj-$(CONFIG_PSTORE_RAM)	+= ramoops.o
>  
> +obj-$(CONFIG_PSTORE_KHO)	+= pstore_kho.o
> +
>  pstore_zone-objs += zone.o
>  obj-$(CONFIG_PSTORE_ZONE)	+= pstore_zone.o
>  
> diff --git a/fs/pstore/pstore_kho.c b/fs/pstore/pstore_kho.c
> new file mode 100644
> index 000000000000..6d4187d91642
> --- /dev/null
> +++ b/fs/pstore/pstore_kho.c
> @@ -0,0 +1,230 @@
> +// SPDX-License-Identifier: GPL-2.0
> +/*
> + * KHO (Kexec Handover) backend for pstore.
No need to spelll it out.
> + *
> + * KHO-based pstore provides a mechanism to hand over pstore data (specifically
> + * dmesg logs) from one kernel to another across a kexec reboot using the
> + * Kexec Handover (KHO) framework.
> + *
> + * Key advantages of KHO-based pstore include:
> + * - No hardcoded memmap: Unlike ramoops, it does not require reserving a static
> + *   memory region in the bootloader or device tree. Memory is allocated
> + *   dynamically and handed over to the next kernel.
> + * - Firmware independence: It does not rely on platform firmware support (like
> + *   ACPI ERST or UEFI variable storage) to preserve logs across reboots.
> + * - High throughput: It avoids the performance bottlenecks of serial consoles,
> + *   not being limited by console baud rates.
> + * - Complete log preservation: It preserves all dmesg logs, including those
> + *   generated late in the reboot cycle after filesystems have been unmounted,
> + *   up to the point of the kexec jump.
> + */
> +
> +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
> +
> +#include <linux/kho/abi/pstore.h>
> +#include <linux/init.h>
> +#include <linux/io.h>
> +#include <linux/kexec_handover.h>
> +#include <linux/module.h>
> +#include <linux/pstore.h>
> +#include <linux/slab.h>
> +#include <linux/string.h>
> +
> +/*
> + * The in and out buffers are separate and they need not be the same size.
> + * Therefore, this is not part of ABI.
> + */
> +#define RECORD_MAX_SIZE		(1 << CONFIG_LOG_BUF_SHIFT)

This does not sound right. I think, we should enforce size through 
ABI. Or make it flexible so 

> +
> +struct pstore_kho_context {
> +	struct pstore_info pstore;
> +	bool read_done;
> +};
> +
> +static struct pstore_ser *kho_ser_in;
> +static struct pstore_ser *kho_ser_out;
> +
> +static int pstore_kho_open(struct pstore_info *psi)
> +{
> +	struct pstore_kho_context *cxt = psi->data;
> +
> +	cxt->read_done = false;
> +	return 0;
> +}
> +
> +static ssize_t pstore_kho_read(struct pstore_record *record)
> +{
> +	struct pstore_kho_context *cxt = record->psi->data;
> +	struct pstore_kho_record *kho_data_in;
> +
> +	if (cxt->read_done || !kho_ser_in)
> +		return 0;
> +
> +	cxt->read_done = true;
> +	kho_data_in = &kho_ser_in->record;
> +
> +	record->buf = kmemdup(kho_data_in->buf, kho_data_in->size, GFP_KERNEL);
> +	if (!record->buf)
> +		return -ENOMEM;
> +
> +	record->type = PSTORE_TYPE_DMESG;
> +	record->id = 0;
> +	record->size = kho_data_in->size;
> +	record->time.tv_sec = kho_data_in->time_sec;
> +	record->time.tv_nsec = kho_data_in->time_nsec;
> +	record->count = kho_data_in->count;
> +	record->reason = kho_data_in->reason;
> +	record->part = kho_data_in->part;
> +	record->compressed = kho_data_in->compressed;
> +
> +	return record->size;
> +}
> +
> +static int pstore_kho_write(struct pstore_record *record)
> +{
> +	struct pstore_kho_record *kho_data_out = &kho_ser_out->record;
> +
> +	if (record->type != PSTORE_TYPE_DMESG)
> +		return -EINVAL;
> +
> +	if (kho_data_out->size != 0) {
> +		pr_err("pstore kho already contains a record\n");
> +		return -ENOSPC;
> +	}
> +
> +	if (record->size > RECORD_MAX_SIZE) {
> +		pr_err("dmesg record too big, record size: %lu, available space: %d\n",
> +		       record->size, RECORD_MAX_SIZE);
> +		return -ENOSPC;
> +	}
> +
> +	memcpy(kho_data_out->buf, record->buf, record->size);
> +	kho_data_out->size = record->size;
> +	kho_data_out->time_sec = record->time.tv_sec;
> +	kho_data_out->time_nsec = record->time.tv_nsec;
> +	kho_data_out->count = record->count;
> +	kho_data_out->reason = record->reason;
> +	kho_data_out->part = record->part;
> +	kho_data_out->compressed = record->compressed;
> +
> +	return 0;
> +}
> +
> +static struct pstore_kho_context pstore_kho_cxt = {
> +	.pstore = {
> +		.owner		= THIS_MODULE,
> +		.name		= "kho",
> +		.bufsize	= RECORD_MAX_SIZE,

Let's make this ABI for simplicity.

> +		.flags		= PSTORE_FLAGS_DMESG,
> +		.max_reason	= KMSG_DUMP_SHUTDOWN,

In all other places, the default is KMSG_DUMP_OOPS, and it is increased 
or decreased based on user-provided parameters. Should we not do the 
same here?

> +		.open		= pstore_kho_open,
> +		.read		= pstore_kho_read,
> +		.write		= pstore_kho_write,
> +	},
> +};
> +
> +static void __init kho_setup_incoming(void)
> +{
> +	phys_addr_t kho_ser_phys;
> +	int err;
> +
> +	err = kho_retrieve_subtree(KHO_PSTORE_FDT_NAME, &kho_ser_phys);
> +	if (err) {
> +		if (err != -ENOENT)
> +			pr_err("failed to retrieve KHO data %s: %d\n",
> +			       KHO_PSTORE_FDT_NAME, err);
> +		return;
> +	}
> +
> +	kho_ser_in = phys_to_virt(kho_ser_phys);
> +
> +	if (kho_ser_in->version != KHO_PSTORE_VERSION) {
> +		pr_err("unsupported KHO pstore version: %d\n", kho_ser_in->version);
> +		kho_ser_in = NULL;
> +		return;
> +	}
> +
> +	pr_info("successfully restored preserved data\n");
> +}
> +
> +static int __init kho_setup_outgoing(void)
> +{
> +	int err;
> +	size_t total_size = sizeof(struct pstore_ser) + RECORD_MAX_SIZE;

Please use the reverse-christmas-tree order for variable declarations.

> +
> +	kho_ser_out = kho_alloc_preserve(total_size);

RECORD_MAX_SIZE is not part of the ABI, yet it is statically configured 
during kho_setup_outgoing(). We need to either make it dynamic, setting 
up preserved pages as we go based on the amount of used memory (i.e use 
something like KHO linked-blocks), or make this part of the ABI.

> +	if (IS_ERR(kho_ser_out)) {
> +		pr_err("failed to allocate pstore kho ser anchor\n");
> +		return PTR_ERR(kho_ser_out);
> +	}
> +	memset(kho_ser_out, 0, total_size);
> +	kho_ser_out->version = KHO_PSTORE_VERSION;
> +
> +	err = kho_add_subtree(KHO_PSTORE_FDT_NAME, kho_ser_out);
> +	if (err) {
> +		pr_err("failed to add KHO data\n");
> +		goto err_free_ser;
> +	}
> +
> +	return 0;
> +
> +err_free_ser:
> +	kho_unpreserve_free(kho_ser_out);
> +	return err;
> +}
> +
> +static int __init pstore_kho_init(void)
> +{
> +	int err;
> +	struct pstore_kho_context *cxt = &pstore_kho_cxt;

RCT order please.

> +
> +	if (!kho_is_enabled()) {
> +		pr_info("KHO is disabled, pstore_kho cannot start\n");
> +		return -ENODEV;
> +	}
> +
> +	kho_setup_incoming();
> +	err = kho_setup_outgoing();
> +	if (err) {
> +		pr_err("failed to setup outgoing KHO\n");
> +		return err;

Although the outgoing failed, can we still retrieve incoming messages?

> +	}
> +
> +	cxt->pstore.data = cxt;
> +	cxt->pstore.buf = kmalloc(cxt->pstore.bufsize, GFP_KERNEL);
> +	if (!cxt->pstore.buf) {
> +		err = -ENOMEM;
> +		goto err_free_outgoing;
> +	}
> +
> +	err = pstore_register(&cxt->pstore);
> +	if (err) {
> +		pr_err("failed to register with pstore\n");
> +		goto err_free_pstore_buf;
> +	}
> +
> +	return 0;
> +
> +err_free_pstore_buf:
> +	kfree(cxt->pstore.buf);
> +
> +err_free_outgoing:
> +	kho_remove_subtree(kho_ser_out);
> +	kho_unpreserve_free(kho_ser_out);
> +
> +	return err;
> +}
> +module_init(pstore_kho_init);
> +
> +static void __exit pstore_kho_exit(void)
> +{
> +	pstore_unregister(&pstore_kho_cxt.pstore);
> +	kfree(pstore_kho_cxt.pstore.buf);
> +
> +	kho_remove_subtree(kho_ser_out);
> +	kho_unpreserve_free(kho_ser_out);
> +}
> +module_exit(pstore_kho_exit);
> +
> +MODULE_LICENSE("GPL");
> +MODULE_DESCRIPTION("Pstore backend for dmesg preservation over kexec");
> diff --git a/include/linux/kho/abi/pstore.h b/include/linux/kho/abi/pstore.h
> new file mode 100644
> index 000000000000..743ec64d67fc
> --- /dev/null
> +++ b/include/linux/kho/abi/pstore.h
> @@ -0,0 +1,27 @@
> +/* SPDX-License-Identifier: GPL-2.0 */
> +
> +#ifndef _LINUX_KHO_ABI_PSTORE_H
> +#define _LINUX_KHO_ABI_PSTORE_H
> +
> +#include <linux/types.h>

Please use the header comment in other ABI files as a template for what 
should be stated here. Please also include it in the documentation, 
consistent with all other ABI headers.

> +
> +#define KHO_PSTORE_FDT_NAME	"pstore-kho"
> +#define KHO_PSTORE_VERSION	1
> +
> +struct pstore_kho_record {

I would prefer: pstore_record_ser

> +	s64			size;
> +	s64			time_sec;
> +	u32			time_nsec;
> +	s32			count;
> +	u32			reason;
> +	u32			part;
> +	u32			compressed;
> +	char			buf[];
> +};
> +
> +struct pstore_ser {
> +	u32			version;
> +	struct pstore_kho_record record;
> +};
> +
> +#endif /* _LINUX_KHO_ABI_PSTORE_H */
> -- 
> 2.54.0.1032.g2f8565e1d1-goog
> 

      parent reply	other threads:[~2026-06-11 18:55 UTC|newest]

Thread overview: 4+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2026-06-05 12:10 [PATCH v2] pstore: add a KHO backend Michal Clapinski
2026-06-10 20:34 ` Kees Cook
2026-06-11  9:18   ` Pratyush Yadav
2026-06-11 18:55 ` Pasha Tatashin [this message]

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=air8jVtmUahab9hd@plex \
    --to=pasha.tatashin@soleen.com \
    --cc=gpiccoli@igalia.com \
    --cc=graf@amazon.com \
    --cc=kees@kernel.org \
    --cc=kexec@lists.infradead.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=mclapinski@google.com \
    --cc=pratyush@kernel.org \
    --cc=rppt@kernel.org \
    --cc=tony.luck@intel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox