All of lore.kernel.org
 help / color / mirror / Atom feed
From: Aditya Gupta <adityag@linux.ibm.com>
To: <qemu-devel@nongnu.org>
Cc: qemu-ppc@nongnu.org, "Nicholas Piggin" <npiggin@gmail.com>,
	"Frédéric Barrat" <fbarrat@linux.ibm.com>,
	"Sourabh Jain" <sourabhjain@linux.ibm.com>,
	"Mahesh J Salgaonkar" <mahesh@linux.ibm.com>,
	"Hari Bathini" <hbathini@linux.ibm.com>
Subject: [PATCH 7/7] hw/ppc: Implement MPIPL in PowerNV
Date: Mon, 17 Feb 2025 12:49:34 +0530	[thread overview]
Message-ID: <20250217071934.86131-8-adityag@linux.ibm.com> (raw)
In-Reply-To: <20250217071934.86131-1-adityag@linux.ibm.com>

Linux expect a "ibm,opal/dump" node to know whether MPIPL (aka fadump)
is supported on the hardware.

Export the "ibm,opal/dump" node in QEMU's device tree for Linux to know
that PowerNV supports MPIPL.

With the commit, kernel boots thinking fadump is supported, and reserves
memory regions for fadump if "fadump=on" is passed in kernel cmdline:

    Linux/PowerPC load: init=/bin/sh debug fadump=on
    Finalizing device tree... flat tree at 0x20ebaca0
    [    1.005765851,5] DUMP: Payload sent metadata tag : 0x800002a8
    [    1.005980914,5] DUMP: Boot mem size : 0x40000000
    [    0.000000] opal fadump: Kernel metadata addr: 800002a8
    [    0.000000] fadump: Reserved 1024MB of memory at 0x00000040000000 (System RAM: 20480MB)
    [    0.000000] fadump: Initialized 0x40000000 bytes cma area at 1024MB from 0x400102a8 bytes of memory reserved for firmware-assisted dump

Also, OPAL and Linux expect the "mpipl-boot" device tree node on a MPIPL
boot. Hence add "mpipl-boot" property in device tree on an MPIPL boot.

Hence after crash, Linux knows when it's a MPIPL/fadump boot:

    [    0.000000] opal fadump: Firmware-assisted dump is active.
    [    0.000000] fadump: Firmware-assisted dump is active.
    [    0.000000] fadump: Reserving 23552MB of memory at 0x00000040000000 for preserving crash data

Do note that fadump boot in PowerNV seems to require more memory,
trying with 1GB causes this error by kernel:

    [    0.000000] fadump: Failed to find memory chunk for reservation!

And even with anything from 2GB - 19GB, the kernel fails to boot due to
some memory issues.

Trying with >20GB memory is recommended for now

Signed-off-by: Aditya Gupta <adityag@linux.ibm.com>
---
 hw/ppc/pnv.c             | 49 ++++++++++++++++++++++++++++++++++++++++
 hw/ppc/pnv_sbe.c         | 18 +++++++++++----
 include/hw/ppc/pnv_sbe.h |  4 ++++
 3 files changed, 67 insertions(+), 4 deletions(-)

diff --git a/hw/ppc/pnv.c b/hw/ppc/pnv.c
index 11fd477b71be..39ed3f873e9a 100644
--- a/hw/ppc/pnv.c
+++ b/hw/ppc/pnv.c
@@ -51,6 +51,7 @@
 #include "hw/ppc/pnv_chip.h"
 #include "hw/ppc/pnv_xscom.h"
 #include "hw/ppc/pnv_pnor.h"
+#include "hw/ppc/pnv_sbe.h"
 
 #include "hw/isa/isa.h"
 #include "hw/char/serial-isa.h"
@@ -697,6 +698,26 @@ static void *pnv_dt_create(MachineState *machine)
         pmc->dt_power_mgt(pnv, fdt);
     }
 
+    /* Add "dump" node so kernel knows MPIPL (aka fadump) is supported */
+    off = fdt_add_subnode(fdt, 0, "ibm,opal");
+    if (off == -FDT_ERR_EXISTS) {
+        off = fdt_path_offset(fdt, "/ibm,opal");
+    }
+
+    _FDT(off);
+    off = fdt_add_subnode(fdt, off, "dump");
+    _FDT(off);
+    _FDT((fdt_setprop_string(fdt, off, "compatible", "ibm,opal-dump")));
+
+    /* Add kernel and initrd as fw-load-area */
+    uint64_t fw_load_area[4] = {
+        cpu_to_be64(KERNEL_LOAD_ADDR), cpu_to_be64(KERNEL_MAX_SIZE),
+        cpu_to_be64(INITRD_LOAD_ADDR), cpu_to_be64(INITRD_MAX_SIZE)
+    };
+
+    _FDT((fdt_setprop(fdt, off, "fw-load-area",
+                    fw_load_area, sizeof(fw_load_area))));
+
     return fdt;
 }
 
@@ -714,6 +735,7 @@ static void pnv_reset(MachineState *machine, ResetType type)
     PnvMachineState *pnv = PNV_MACHINE(machine);
     IPMIBmc *bmc;
     void *fdt;
+    int node_offset;
 
     qemu_devices_reset(type);
 
@@ -744,6 +766,33 @@ static void pnv_reset(MachineState *machine, ResetType type)
         _FDT((fdt_pack(fdt)));
     }
 
+    /*
+     * If it's a MPIPL boot, add the "mpipl-boot" property, and reset the
+     * boolean for MPIPL boot for next boot
+     */
+    if (pnv_sbe_is_mpipl_boot()) {
+        void *fdt_copy = g_malloc0(FDT_MAX_SIZE);
+
+        /* Create a writable copy of the fdt */
+        _FDT((fdt_open_into(fdt, fdt_copy, FDT_MAX_SIZE)));
+
+        node_offset = fdt_path_offset(fdt_copy, "/ibm,opal/dump");
+        _FDT((fdt_appendprop_u64(fdt_copy, node_offset, "mpipl-boot", 1)));
+
+        /* Update the fdt, and free the original fdt */
+        if (fdt != machine->fdt) {
+            /*
+             * Only free the fdt if it's not machine->fdt, to prevent
+             * double free, since we already free machine->fdt later
+             */
+            g_free(fdt);
+        }
+        fdt = fdt_copy;
+
+        /* This boot is an MPIPL, reset the boolean for next boot */
+        pnv_sbe_reset_is_next_boot_mpipl();
+    }
+
     qemu_fdt_dumpdtb(fdt, fdt_totalsize(fdt));
     cpu_physical_memory_write(PNV_FDT_ADDR, fdt, fdt_totalsize(fdt));
 
diff --git a/hw/ppc/pnv_sbe.c b/hw/ppc/pnv_sbe.c
index 3b50667226b5..671dc81c9501 100644
--- a/hw/ppc/pnv_sbe.c
+++ b/hw/ppc/pnv_sbe.c
@@ -216,6 +216,18 @@ struct proc_dump_area {
     __be32  act_size;      /* Actual data size */
 } __packed;
 
+static bool is_next_boot_mpipl;
+
+bool pnv_sbe_is_mpipl_boot(void)
+{
+    return is_next_boot_mpipl;
+}
+
+void pnv_sbe_reset_is_next_boot_mpipl(void)
+{
+    is_next_boot_mpipl = false;
+}
+
 static void pnv_sbe_set_host_doorbell(PnvSBE *sbe, uint64_t val)
 {
     val &= SBE_HOST_RESPONSE_MASK; /* Is this right? What does HW do? */
@@ -334,10 +346,8 @@ static void pnv_sbe_power9_xscom_ctrl_write(void *opaque, hwaddr addr,
             /* Save processor state */
             pnv_mpipl_save_proc_regs();
 
-            /*
-             * TODO: Pass `mpipl` node in device tree to signify next
-             * boot is an MPIPL boot
-             */
+            /* Mark next boot as Memory-preserving boot */
+            is_next_boot_mpipl = true;
 
             /* Then do a guest reset */
             /*
diff --git a/include/hw/ppc/pnv_sbe.h b/include/hw/ppc/pnv_sbe.h
index f6cbcf990ed9..94bbdc7b6414 100644
--- a/include/hw/ppc/pnv_sbe.h
+++ b/include/hw/ppc/pnv_sbe.h
@@ -56,4 +56,8 @@ struct PnvSBEClass {
 /* Helper to access stashed SKIBOOT_BASE */
 bool pnv_sbe_mpipl_skiboot_base(void);
 
+/* Helpers to know if next boot is MPIPL boot */
+bool pnv_sbe_is_mpipl_boot(void);
+void pnv_sbe_reset_is_next_boot_mpipl(void);
+
 #endif /* PPC_PNV_SBE_H */
-- 
2.48.1



  parent reply	other threads:[~2025-02-17  7:21 UTC|newest]

Thread overview: 24+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2025-02-17  7:19 [PATCH 0/7] Implement MPIPL for PowerNV Aditya Gupta
2025-02-17  7:19 ` [PATCH 1/7] hw/ppc: Log S0/S1 Interrupt triggers by OPAL Aditya Gupta
2025-03-11  4:38   ` Harsh Prateek Bora
2025-03-13 18:43     ` Aditya Gupta
2025-02-17  7:19 ` [PATCH 2/7] hw/ppc: Implement S0 SBE interrupt as cpu_pause then host reset Aditya Gupta
2025-03-11  4:45   ` Harsh Prateek Bora
2025-03-13 18:45     ` Aditya Gupta
2025-02-17  7:19 ` [PATCH 3/7] hw/ppc: Handle stash command in PowerNV SBE Aditya Gupta
2025-03-11  4:50   ` Harsh Prateek Bora
2025-03-13 18:46     ` Aditya Gupta
2025-02-17  7:19 ` [PATCH 4/7] hw/ppc: Add MDST/MDDT/MDRT table structures and offsets Aditya Gupta
2025-03-11  5:11   ` Harsh Prateek Bora
2025-03-13 18:50     ` Aditya Gupta
2025-02-17  7:19 ` [PATCH 5/7] hw/ppc: Preserve Memory Regions as per MDST/MDDT tables Aditya Gupta
2025-03-11  5:18   ` Harsh Prateek Bora
2025-03-13 18:54     ` Aditya Gupta
2025-02-17  7:19 ` [PATCH 6/7] hw/ppc: [WIP] Add Processor Dump Area offsets in Pnv SBE Aditya Gupta
2025-03-11  5:23   ` Harsh Prateek Bora
2025-03-13 18:56     ` Aditya Gupta
2025-02-17  7:19 ` Aditya Gupta [this message]
2025-03-11  5:41   ` [PATCH 7/7] hw/ppc: Implement MPIPL in PowerNV Harsh Prateek Bora
2025-03-13 19:00     ` Aditya Gupta
2025-02-27  3:37 ` [PATCH 0/7] Implement MPIPL for PowerNV Nicholas Piggin
2025-02-27  6:23   ` Aditya Gupta

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20250217071934.86131-8-adityag@linux.ibm.com \
    --to=adityag@linux.ibm.com \
    --cc=fbarrat@linux.ibm.com \
    --cc=hbathini@linux.ibm.com \
    --cc=mahesh@linux.ibm.com \
    --cc=npiggin@gmail.com \
    --cc=qemu-devel@nongnu.org \
    --cc=qemu-ppc@nongnu.org \
    --cc=sourabhjain@linux.ibm.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.