qemu-devel.nongnu.org archive mirror
 help / color / mirror / Atom feed
From: Aditya Gupta <adityag@linux.ibm.com>
To: Nicholas Piggin <npiggin@gmail.com>, qemu-devel@nongnu.org
Cc: qemu-ppc@nongnu.org,
	Daniel Henrique Barboza <danielhb413@gmail.com>,
	Harsh Prateek Bora <harshpb@linux.ibm.com>,
	Sourabh Jain <sourabhjain@linux.ibm.com>,
	Mahesh J Salgaonkar <mahesh@linux.ibm.com>,
	Hari Bathini <hbathini@linux.ibm.com>
Subject: Re: [PATCH 4/6] hw/ppc: Implement saving CPU state in Fadump
Date: Thu, 27 Feb 2025 12:31:02 +0530	[thread overview]
Message-ID: <dec641b5-6baa-49f3-993d-2bb77694bcbc@linux.ibm.com> (raw)
In-Reply-To: <D82WPYZ2R1DS.2JC91G6HRY0U7@gmail.com>

On 27/02/25 08:57, Nicholas Piggin wrote:

> On Mon Feb 17, 2025 at 5:17 PM AEST, Aditya Gupta wrote:
>> <...snip...>
>> diff --git a/hw/ppc/spapr_rtas.c b/hw/ppc/spapr_rtas.c
>> index 9b29cadab2c9..0aca4270aee8 100644
>> <...snip...>
>> --- a/hw/ppc/spapr_rtas.c
>> +++ b/hw/ppc/spapr_rtas.c
>> @@ -413,9 +416,174 @@ static bool fadump_preserve_mem(void)
>>           }
>>   
>>           switch (data_type) {
>> -        case FADUMP_CPU_STATE_DATA:
>> -            /* TODO: Add CPU state data */
>> +        case FADUMP_CPU_STATE_DATA: {
> I would split these out into their own functions if they grow more than
> a few lines.
Makes sense. Will add this into a new helper function.
>
>> +            struct rtas_fadump_reg_save_area_header reg_save_hdr;
>> +            struct rtas_fadump_reg_entry **reg_entries;
>> +            struct rtas_fadump_reg_entry *curr_reg_entry;
>> +
>> +            uint32_t fadump_reg_entries_size;
>> +            __be32 num_cpus = 0;
>> +            uint32_t num_regs_per_cpu = 0;
>> +            CPUState *cpu;
>> +            CPUPPCState *env;
>> +            PowerPCCPU *ppc_cpu;
>> +
>> +            CPU_FOREACH(cpu) {
>> +                ++num_cpus;
>> +            }
>> +
>> +            reg_save_hdr.version = cpu_to_be32(1);
>> +            reg_save_hdr.magic_number =
>> +                cpu_to_be64(fadump_str_to_u64("REGSAVE"));
>> +
>> +            /* Reg save area header is immediately followed by num cpus */
>> +            reg_save_hdr.num_cpu_offset =
>> +                cpu_to_be32(sizeof(struct rtas_fadump_reg_save_area_header));
>> +
>> +            fadump_reg_entries_size = num_cpus *
>> +                                      FADUMP_NUM_PER_CPU_REGS *
>> +                                      sizeof(struct rtas_fadump_reg_entry);
>> +
>> +            reg_entries = malloc(fadump_reg_entries_size);
>> +            curr_reg_entry = (struct rtas_fadump_reg_entry *)reg_entries;
>> +
>> +            /* This must loop num_cpus time */
>> +            CPU_FOREACH(cpu) {
>> +                ppc_cpu = POWERPC_CPU(cpu);
>> +                env = cpu_env(cpu);
>> +                num_regs_per_cpu = 0;
>> +
>> +                curr_reg_entry->reg_id =
>> +                    cpu_to_be64(fadump_str_to_u64("CPUSTRT"));
>> +                curr_reg_entry->reg_value = ppc_cpu->vcpu_id;
>> +                ++curr_reg_entry;
>> +
>> +#define REG_ENTRY(id, val)                                     \
>> +                do {                                           \
>> +                    curr_reg_entry->reg_id =                   \
>> +                        cpu_to_be64(fadump_str_to_u64(#id));   \
>> +                    curr_reg_entry->reg_value = val;           \
>> +                    ++curr_reg_entry;                          \
>> +                    ++num_regs_per_cpu;                        \
>> +                } while (0)
>> +
>> +                REG_ENTRY(ACOP, env->spr[SPR_ACOP]);
>> +                REG_ENTRY(AMR, env->spr[SPR_AMR]);
>> +                REG_ENTRY(BESCR, env->spr[SPR_BESCR]);
>> +                REG_ENTRY(CFAR, env->spr[SPR_CFAR]);
>> +                REG_ENTRY(CIABR, env->spr[SPR_CIABR]);
>> +
>> +                /* Save the condition register */
>> +                uint64_t cr = 0;
>> +                cr |= (env->crf[0] & 0xf);
>> +                cr |= (env->crf[1] & 0xf) << 1;
>> +                cr |= (env->crf[2] & 0xf) << 2;
>> +                cr |= (env->crf[3] & 0xf) << 3;
>> +                cr |= (env->crf[4] & 0xf) << 4;
>> +                cr |= (env->crf[5] & 0xf) << 5;
>> +                cr |= (env->crf[6] & 0xf) << 6;
>> +                cr |= (env->crf[7] & 0xf) << 7;
> Shift values wrong here I think... Use ppc_get_cr()
Okay, I had some issues getting this CR. Will use 'ppc_get_cr', thanks !
>
>> +                REG_ENTRY(CR, cr);
>> +
>> +                REG_ENTRY(CTR, env->spr[SPR_CTR]);
>> +                REG_ENTRY(CTRL, env->spr[SPR_CTRL]);
>> +                REG_ENTRY(DABR, env->spr[SPR_DABR]);
>> +                REG_ENTRY(DABRX, env->spr[SPR_DABRX]);
>> +                REG_ENTRY(DAR, env->spr[SPR_DAR]);
>> +                REG_ENTRY(DAWR0, env->spr[SPR_DAWR0]);
>> +                REG_ENTRY(DAWR1, env->spr[SPR_DAWR1]);
>> +                REG_ENTRY(DAWRX0, env->spr[SPR_DAWRX0]);
>> +                REG_ENTRY(DAWRX1, env->spr[SPR_DAWRX1]);
>> +                REG_ENTRY(DPDES, env->spr[SPR_DPDES]);
>> +                REG_ENTRY(DSCR, env->spr[SPR_DSCR]);
>> +                REG_ENTRY(DSISR, env->spr[SPR_DSISR]);
>> +                REG_ENTRY(EBBHR, env->spr[SPR_EBBHR]);
>> +                REG_ENTRY(EBBRR, env->spr[SPR_EBBRR]);
>> +
>> +                REG_ENTRY(FPSCR, env->fpscr);
>> +                REG_ENTRY(FSCR, env->spr[SPR_FSCR]);
>> +
>> +                /* Save the GPRs */
>> +                for (int gpr_id = 0; gpr_id < 32; ++gpr_id) {
>> +                    curr_reg_entry->reg_id =
>> +                        cpu_to_be64(fadump_gpr_id_to_u64(gpr_id));
>> +                    curr_reg_entry->reg_value = env->gpr[i];
>> +                    ++curr_reg_entry;
>> +                    ++num_regs_per_cpu;
>> +                }
>> +
>> +                REG_ENTRY(IAMR, env->spr[SPR_IAMR]);
>> +                REG_ENTRY(IC, env->spr[SPR_IC]);
>> +                REG_ENTRY(LR, env->spr[SPR_LR]);
>> +
>> +                REG_ENTRY(MSR, env->msr);
>> +                REG_ENTRY(NIA, env->nip);   /* NIA */
>> +                REG_ENTRY(PIR, env->spr[SPR_PIR]);
>> +                REG_ENTRY(PSPB, env->spr[SPR_PSPB]);
>> +                REG_ENTRY(PVR, env->spr[SPR_PVR]);
>> +                REG_ENTRY(RPR, env->spr[SPR_RPR]);
>> +                REG_ENTRY(SPURR, env->spr[SPR_SPURR]);
>> +                REG_ENTRY(SRR0, env->spr[SPR_SRR0]);
>> +                REG_ENTRY(SRR1, env->spr[SPR_SRR1]);
>> +                REG_ENTRY(TAR, env->spr[SPR_TAR]);
>> +                REG_ENTRY(TEXASR, env->spr[SPR_TEXASR]);
>> +                REG_ENTRY(TFHAR, env->spr[SPR_TFHAR]);
>> +                REG_ENTRY(TFIAR, env->spr[SPR_TFIAR]);
>> +                REG_ENTRY(TIR, env->spr[SPR_TIR]);
>> +                REG_ENTRY(UAMOR, env->spr[SPR_UAMOR]);
>> +                REG_ENTRY(VRSAVE, env->spr[SPR_VRSAVE]);
>> +                REG_ENTRY(VSCR, env->vscr);
>> +                REG_ENTRY(VTB, env->spr[SPR_VTB]);
>> +                REG_ENTRY(WORT, env->spr[SPR_WORT]);
>> +                REG_ENTRY(XER, env->spr[SPR_XER]);
>> +
>> +                /*
>> +                 * Ignoring transaction checkpoint and few other registers
>> +                 * mentioned in PAPR as not supported in QEMU
>> +                 */
>> +#undef REG_ENTRY
>> +
>> +                /* End the registers for this CPU with "CPUEND" reg entry */
>> +                curr_reg_entry->reg_id =
>> +                    cpu_to_be64(fadump_str_to_u64("CPUEND"));
>> +
>> +                /* Ensure the number of registers match (+2 for STRT & END) */
>> +                assert(FADUMP_NUM_PER_CPU_REGS == num_regs_per_cpu + 2);
>> +
>> +                ++curr_reg_entry;
>> +            }
>> +
>> +            cpu_state_len = 0;
>> +            cpu_state_len += sizeof(reg_save_hdr);     /* reg save header */
>> +            cpu_state_len += sizeof(__be32);           /* num_cpus */
>> +            cpu_state_len += fadump_reg_entries_size;  /* reg entries */
>> +
>> +            cpu_state_region = &fdm->rgn[i];
>> +            cpu_state_addr = dest_addr;
>> +            cpu_state_buffer = g_malloc(cpu_state_len);
>> +
>> +            uint64_t offset = 0;
>> +            memcpy(cpu_state_buffer + offset,
>> +                    &reg_save_hdr, sizeof(reg_save_hdr));
>> +            offset += sizeof(reg_save_hdr);
>> +
>> +            /* Write num_cpus */
>> +            num_cpus = cpu_to_be32(num_cpus);
>> +            memcpy(cpu_state_buffer + offset, &num_cpus, sizeof(__be32));
>> +            offset += sizeof(__be32);
>> +
>> +            /* Write the register entries */
>> +            memcpy(cpu_state_buffer + offset,
>> +                    reg_entries, fadump_reg_entries_size);
>> +            offset += fadump_reg_entries_size;
>> +
>> +            /*
>> +             * We will write the cpu state data later, as otherwise it
>> +             * might get overwritten by other fadump regions
>> +             */
>> +
>>               break;
>> +        }
>>           case FADUMP_HPTE_REGION:
>>               /* TODO: Add hpte state data */
>>               break;
>> @@ -455,6 +623,34 @@ static bool fadump_preserve_mem(void)
>>           }
>>       }
>>   
>> +    /*
>> +     * Write the Register Save Area
>> +     *
>> +     * CPU State/Register Save Area should be written after dumping the
>> +     * memory to prevent overwritting while saving other memory regions
>> +     *
>> +     * eg. If boot memory region is 1G, then both the first 1GB memory, and
>> +     * the Register Save Area needs to be saved at 1GB.
>> +     * And as the CPU_STATE_DATA region comes first than the
>> +     * REAL_MODE_REGION region to be copied, the CPU_STATE_DATA will get
>> +     * overwritten if saved before the 0GB - 1GB region is copied after
>> +     * saving CPU state data
>> +     */
>> +    cpu_physical_memory_write(cpu_state_addr, cpu_state_buffer, cpu_state_len);
> Check docs/devel/loads-stores.rst, address_space_* is preferred to check
> for failures. It also says devices should operate on their own address
> spaces and that doesn't really apply to spapr since the "virtual
> hypervisor" doesn't really fit the model of a device...
>
> Perhaps look at h_enter_nested which uses CPU(cpu)->as.
Got it. Will try to use address_space_read/write in v2.
>> +    g_free(cpu_state_buffer);
>> +
>> +    /*
>> +     * Set bytes_dumped in cpu state region, so kernel knows platform have
>> +     * exported it
>> +     */
>> +    cpu_state_region->bytes_dumped = cpu_to_be64(cpu_state_len);
>> +
>> +    if (cpu_state_region->source_len != cpu_state_region->bytes_dumped) {
>> +        qemu_log_mask(LOG_GUEST_ERROR,
>> +                "CPU State region's length passed by kernel, doesn't match"
>> +                " with CPU State region length exported by QEMU");
>> +    }
>> +
>>       return true;
>>   }
>>   
>> diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h
>> index a80704187583..0e8002bad9e0 100644
>> --- a/include/hw/ppc/spapr.h
>> +++ b/include/hw/ppc/spapr.h
>> @@ -792,6 +792,9 @@ void push_sregs_to_kvm_pr(SpaprMachineState *spapr);
>>   #define FADUMP_HPTE_REGION      0x0002
>>   #define FADUMP_REAL_MODE_REGION 0x0011
>>   
>> +/* Number of registers stored per cpu */
>> +#define FADUMP_NUM_PER_CPU_REGS (32 /*GPR*/ + 45 /*others*/ + 2 /*STRT & END*/)
>> +
>>   /* OS defined sections */
>>   #define FADUMP_PARAM_AREA       0x0100
>>   
>> @@ -845,6 +848,86 @@ struct rtas_fadump_mem_struct {
>>       struct rtas_fadump_section        rgn[FADUMP_MAX_SECTIONS];
>>   };
>>   
>> +/*
>> + * The firmware-assisted dump format.
>> + *
>> + * The register save area is an area in the partition's memory used to preserve
>> + * the register contents (CPU state data) for the active CPUs during a firmware
>> + * assisted dump. The dump format contains register save area header followed
>> + * by register entries. Each list of registers for a CPU starts with "CPUSTRT"
>> + * and ends with "CPUEND".
>> + */
>> +
>> +/* Register save area header. */
>> +struct rtas_fadump_reg_save_area_header {
>> +    __be64    magic_number;
>> +    __be32    version;
>> +    __be32    num_cpu_offset;
>> +};
>> +
>> +/* Register entry. */
>> +struct rtas_fadump_reg_entry {
>> +    __be64    reg_id;
>> +    __be64    reg_value;
>> +};
>> +
>> +/*
>> + * Copy the ascii values for first 8 characters from a string into u64
>> + * variable at their respective indexes.
>> + * e.g.
>> + *  The string "FADMPINF" will be converted into 0x4641444d50494e46
>> + */
>> +static inline uint64_t fadump_str_to_u64(const char *str)
>> +{
>> +    uint64_t val = 0;
>> +    int i;
>> +
>> +    for (i = 0; i < sizeof(val); i++) {
>> +        val = (*str) ? (val << 8) | *str++ : val << 8;
>> +    }
>> +    return val;
>> +}
>> +
>> +/**
>> + * Get the identifier id for register entries of GPRs
>> + *
>> + * It gives the same id as 'fadump_str_to_u64' when the complete string id
>> + * of the GPR is given, ie.
>> + *
>> + *   fadump_str_to_u64("GPR05") == fadump_gpr_id_to_u64(5);
>> + *   fadump_str_to_u64("GPR12") == fadump_gpr_id_to_u64(12);
>> + *
>> + * And so on. Hence this can be implemented by creating a dynamic
>> + * string for each GPR, such as "GPR00", "GPR01", ... "GPR31"
>> + * Instead of allocating a string, an observation from the math of
>> + * 'fadump_str_to_u64' or from PAPR tells us that there's a pattern
>> + * in the identifier IDs, such that the first 8 bytes are affected only by
>> + * whether it is GPR0*, GPR1*, GPR2*, GPR3*. 9th byte is always 0x3. And
>> + * the the 10th byte is the index of the GPR modulo 10.
>> + */
>> +static inline uint64_t fadump_gpr_id_to_u64(uint32_t gpr_id)
>> +{
>> +    uint64_t val = 0;
>> +
>> +    /* Valid range of GPR id is only GPR0 to GPR31 */
>> +    assert(gpr_id < 32);
>> +
>> +    if (gpr_id <= 9) {
>> +        val = fadump_str_to_u64("GPR0");
>> +    } else if (gpr_id <= 19) {
>> +        val = fadump_str_to_u64("GPR1");
>> +    } else if (gpr_id <= 29) {
>> +        val = fadump_str_to_u64("GPR2");
>> +    } else {
>> +        val = fadump_str_to_u64("GPR3");
>> +    }
>> +
>> +    val |= 0x30000000;
>> +    val |= ((gpr_id % 10) << 12);
>> +
>> +    return val;
>> +}
> These two functions could probably go out of line, I doubt they
> are performance critical and make them static if not used outside
> the file.

True, have marked them static (but in a header file), can see I can move 
it into some .c file if not needed to be shared.


Thanks for your reviews Nick.


- Aditya G

>
> Thanks,
> Nick


  reply	other threads:[~2025-02-27  7:01 UTC|newest]

Thread overview: 29+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2025-02-17  7:17 [PATCH 0/6] Implement Firmware Assisted Dump for PSeries Aditya Gupta
2025-02-17  7:17 ` [PATCH 1/6] hw/ppc: Implement skeleton code for fadump in PSeries Aditya Gupta
2025-02-27  3:07   ` Nicholas Piggin
2025-02-27  6:49     ` Aditya Gupta
2025-02-27  8:48       ` Nicholas Piggin
2025-02-27 12:15         ` Aditya Gupta
2025-03-04  9:01   ` Harsh Prateek Bora
2025-03-06  4:08     ` Aditya Gupta
2025-02-17  7:17 ` [PATCH 2/6] hw/ppc: Trigger Fadump boot if fadump is registered Aditya Gupta
2025-02-27  3:14   ` Nicholas Piggin
2025-02-27  6:56     ` Aditya Gupta
2025-03-04  9:21   ` Harsh Prateek Bora
2025-03-06  4:11     ` Aditya Gupta
2025-02-17  7:17 ` [PATCH 3/6] hw/ppc: Preserve memory regions registered for fadump Aditya Gupta
2025-03-05  6:40   ` Harsh Prateek Bora
2025-03-06  4:16     ` Aditya Gupta
2025-02-17  7:17 ` [PATCH 4/6] hw/ppc: Implement saving CPU state in Fadump Aditya Gupta
2025-02-27  3:27   ` Nicholas Piggin
2025-02-27  7:01     ` Aditya Gupta [this message]
2025-03-05  7:23   ` Harsh Prateek Bora
2025-03-06  4:22     ` Aditya Gupta
2025-02-17  7:17 ` [PATCH 5/6] hw/ppc: Pass device tree properties for Fadump Aditya Gupta
2025-02-27  3:28   ` Nicholas Piggin
2025-02-27  7:02     ` Aditya Gupta
2025-03-05  7:34     ` Harsh Prateek Bora
2025-02-17  7:17 ` [PATCH 6/6] hw/ppc: Enable Fadump for PSeries Aditya Gupta
2025-02-27  3:33   ` Nicholas Piggin
2025-02-27  7:07     ` Aditya Gupta
2025-02-27  8:52       ` Nicholas Piggin

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=dec641b5-6baa-49f3-993d-2bb77694bcbc@linux.ibm.com \
    --to=adityag@linux.ibm.com \
    --cc=danielhb413@gmail.com \
    --cc=harshpb@linux.ibm.com \
    --cc=hbathini@linux.ibm.com \
    --cc=mahesh@linux.ibm.com \
    --cc=npiggin@gmail.com \
    --cc=qemu-devel@nongnu.org \
    --cc=qemu-ppc@nongnu.org \
    --cc=sourabhjain@linux.ibm.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).